]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[zdf] Add chapter extraction (#2198)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
109dd3b2 6import copy
fe93e2c4 7import datetime
adbc4ec4 8import functools
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
720c3099 12import math
c4417ddb 13import os.path
d77ab8e2 14import random
c5e8d7af 15import re
46383212 16import sys
8a784c74 17import time
e0df6211 18import traceback
adbc4ec4 19import threading
c5e8d7af 20
b05654f0 21from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 22from ..compat import (
edf3e38e 23 compat_chr,
29f7c58a 24 compat_HTTPError,
c5e8d7af 25 compat_parse_qs,
545cc85d 26 compat_str,
7fd002c0 27 compat_urllib_parse_unquote_plus,
15707c7e 28 compat_urllib_parse_urlencode,
7c80519c 29 compat_urllib_parse_urlparse,
7c61bd36 30 compat_urlparse,
4bb4a188 31)
545cc85d 32from ..jsinterp import JSInterpreter
4bb4a188 33from ..utils import (
720c3099 34 bug_reports_message,
c5e8d7af 35 clean_html,
d92f5d5a 36 datetime_from_str,
11f9be09 37 dict_get,
358de58c 38 error_to_compat_str,
c5e8d7af 39 ExtractorError,
2d30521a 40 float_or_none,
11f9be09 41 format_field,
dd27fd17 42 int_or_none,
641ad5d8 43 is_html,
34921b43 44 join_nonempty,
94278f72 45 mimetype2ext,
9c0d7f49 46 network_exceptions,
a6213a49 47 NO_DEFAULT,
11f9be09 48 orderedSet,
6310acf5 49 parse_codecs,
49bd8c66 50 parse_count,
7c80519c 51 parse_duration,
7ea65411 52 parse_iso8601,
4dfbf869 53 parse_qs,
dca3ff4a 54 qualities,
c0ac49bc 55 remove_end,
3995d37d 56 remove_start,
cf7e015f 57 smuggle_url,
dbdaaa23 58 str_or_none,
c93d53f5 59 str_to_int,
f3aa3c3f 60 strftime_or_none,
7c365c21 61 traverse_obj,
556dbe7f 62 try_get,
c5e8d7af
PH
63 unescapeHTML,
64 unified_strdate,
cf7e015f 65 unsmuggle_url,
8bdd16b4 66 update_url_query,
21c340b8 67 url_or_none,
fe93e2c4 68 urljoin,
7c365c21 69 variadic,
c5e8d7af
PH
70)
71
5f6a1245 72
720c3099 73def get_first(obj, keys, **kwargs):
74 return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
75
76
000c15a4 77# any clients starting with _ cannot be explicity requested by the user
78INNERTUBE_CLIENTS = {
79 'web': {
80 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
81 'INNERTUBE_CONTEXT': {
82 'client': {
83 'clientName': 'WEB',
84 'clientVersion': '2.20210622.10.00',
85 }
86 },
87 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
88 },
89 'web_embedded': {
90 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
91 'INNERTUBE_CONTEXT': {
92 'client': {
93 'clientName': 'WEB_EMBEDDED_PLAYER',
94 'clientVersion': '1.20210620.0.1',
95 },
96 },
97 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
98 },
99 'web_music': {
100 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
101 'INNERTUBE_HOST': 'music.youtube.com',
102 'INNERTUBE_CONTEXT': {
103 'client': {
104 'clientName': 'WEB_REMIX',
105 'clientVersion': '1.20210621.00.00',
106 }
107 },
108 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
109 },
e7e94f2a
D
110 'web_creator': {
111 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
112 'INNERTUBE_CONTEXT': {
113 'client': {
114 'clientName': 'WEB_CREATOR',
115 'clientVersion': '1.20210621.00.00',
116 }
117 },
118 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
119 },
000c15a4 120 'android': {
121 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
122 'INNERTUBE_CONTEXT': {
123 'client': {
124 'clientName': 'ANDROID',
125 'clientVersion': '16.20',
126 }
127 },
128 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
b6de707d 129 'REQUIRE_JS_PLAYER': False
000c15a4 130 },
131 'android_embedded': {
132 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
133 'INNERTUBE_CONTEXT': {
134 'client': {
135 'clientName': 'ANDROID_EMBEDDED_PLAYER',
136 'clientVersion': '16.20',
137 },
138 },
b6de707d 139 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
140 'REQUIRE_JS_PLAYER': False
000c15a4 141 },
142 'android_music': {
143 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
144 'INNERTUBE_HOST': 'music.youtube.com',
145 'INNERTUBE_CONTEXT': {
146 'client': {
147 'clientName': 'ANDROID_MUSIC',
148 'clientVersion': '4.32',
149 }
150 },
151 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
b6de707d 152 'REQUIRE_JS_PLAYER': False
000c15a4 153 },
e7e94f2a
D
154 'android_creator': {
155 'INNERTUBE_CONTEXT': {
156 'client': {
157 'clientName': 'ANDROID_CREATOR',
158 'clientVersion': '21.24.100',
159 },
160 },
b6de707d 161 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
162 'REQUIRE_JS_PLAYER': False
e7e94f2a 163 },
3619f78d 164 # ios has HLS live streams
165 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
000c15a4 166 'ios': {
167 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
168 'INNERTUBE_CONTEXT': {
169 'client': {
170 'clientName': 'IOS',
171 'clientVersion': '16.20',
172 }
173 },
b6de707d 174 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
175 'REQUIRE_JS_PLAYER': False
000c15a4 176 },
177 'ios_embedded': {
178 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
179 'INNERTUBE_CONTEXT': {
180 'client': {
181 'clientName': 'IOS_MESSAGES_EXTENSION',
182 'clientVersion': '16.20',
183 },
184 },
b6de707d 185 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
186 'REQUIRE_JS_PLAYER': False
000c15a4 187 },
188 'ios_music': {
189 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
190 'INNERTUBE_HOST': 'music.youtube.com',
191 'INNERTUBE_CONTEXT': {
192 'client': {
193 'clientName': 'IOS_MUSIC',
194 'clientVersion': '4.32',
195 },
196 },
b6de707d 197 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
198 'REQUIRE_JS_PLAYER': False
000c15a4 199 },
e7e94f2a
D
200 'ios_creator': {
201 'INNERTUBE_CONTEXT': {
202 'client': {
203 'clientName': 'IOS_CREATOR',
204 'clientVersion': '21.24.100',
205 },
206 },
b6de707d 207 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
208 'REQUIRE_JS_PLAYER': False
e7e94f2a 209 },
3619f78d 210 # mweb has 'ultralow' formats
211 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 212 'mweb': {
213 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
214 'INNERTUBE_CONTEXT': {
215 'client': {
216 'clientName': 'MWEB',
217 'clientVersion': '2.20210721.07.00',
218 }
219 },
220 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
221 },
222}
223
224
225def build_innertube_clients():
65c2fde2 226 third_party = {
227 'embedUrl': 'https://google.com', # Can be any valid URL
228 }
000c15a4 229 base_clients = ('android', 'web', 'ios', 'mweb')
230 priority = qualities(base_clients[::-1])
231
232 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 233 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 234 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 235 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 236 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
237 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
238
239 if client in base_clients:
240 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
241 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
65c2fde2 242 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 243 agegate_ytcfg['priority'] -= 1
244 elif client.endswith('_embedded'):
65c2fde2 245 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 246 ytcfg['priority'] -= 2
247 else:
248 ytcfg['priority'] -= 3
249
250
251build_innertube_clients()
252
253
de7f3446 254class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 255 """Provide base functions for Youtube extractors"""
e00eb564 256
3462ffa8 257 _RESERVED_NAMES = (
3cd786db 258 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
3619f78d 259 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
260 r'browse|oembed|get_video_info|iframe_api|s/player|'
cd7c66cf 261 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 262
3619f78d 263 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
264
b2e8bc1b 265 _NETRC_MACHINE = 'youtube'
3619f78d 266
b2e8bc1b
JMF
267 # If True it will raise an error if no login info is provided
268 _LOGIN_REQUIRED = False
269
d9190e44
RH
270 _INVIDIOUS_SITES = (
271 # invidious-redirect websites
272 r'(?:www\.)?redirect\.invidious\.io',
273 r'(?:(?:www|dev)\.)?invidio\.us',
274 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
275 r'(?:www\.)?invidious\.pussthecat\.org',
276 r'(?:www\.)?invidious\.zee\.li',
277 r'(?:www\.)?invidious\.ethibox\.fr',
278 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
279 # youtube-dl invidious instances list
280 r'(?:(?:www|no)\.)?invidiou\.sh',
281 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
282 r'(?:www\.)?invidious\.kabi\.tk',
283 r'(?:www\.)?invidious\.mastodon\.host',
284 r'(?:www\.)?invidious\.zapashcanon\.fr',
285 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
286 r'(?:www\.)?invidious\.tinfoil-hat\.net',
287 r'(?:www\.)?invidious\.himiko\.cloud',
288 r'(?:www\.)?invidious\.reallyancient\.tech',
289 r'(?:www\.)?invidious\.tube',
290 r'(?:www\.)?invidiou\.site',
291 r'(?:www\.)?invidious\.site',
292 r'(?:www\.)?invidious\.xyz',
293 r'(?:www\.)?invidious\.nixnet\.xyz',
294 r'(?:www\.)?invidious\.048596\.xyz',
295 r'(?:www\.)?invidious\.drycat\.fr',
296 r'(?:www\.)?inv\.skyn3t\.in',
297 r'(?:www\.)?tube\.poal\.co',
298 r'(?:www\.)?tube\.connect\.cafe',
299 r'(?:www\.)?vid\.wxzm\.sx',
300 r'(?:www\.)?vid\.mint\.lgbt',
301 r'(?:www\.)?vid\.puffyan\.us',
302 r'(?:www\.)?yewtu\.be',
303 r'(?:www\.)?yt\.elukerio\.org',
304 r'(?:www\.)?yt\.lelux\.fi',
305 r'(?:www\.)?invidious\.ggc-project\.de',
306 r'(?:www\.)?yt\.maisputain\.ovh',
307 r'(?:www\.)?ytprivate\.com',
308 r'(?:www\.)?invidious\.13ad\.de',
309 r'(?:www\.)?invidious\.toot\.koeln',
310 r'(?:www\.)?invidious\.fdn\.fr',
311 r'(?:www\.)?watch\.nettohikari\.com',
312 r'(?:www\.)?invidious\.namazso\.eu',
313 r'(?:www\.)?invidious\.silkky\.cloud',
314 r'(?:www\.)?invidious\.exonip\.de',
315 r'(?:www\.)?invidious\.riverside\.rocks',
316 r'(?:www\.)?invidious\.blamefran\.net',
317 r'(?:www\.)?invidious\.moomoo\.de',
318 r'(?:www\.)?ytb\.trom\.tf',
319 r'(?:www\.)?yt\.cyberhost\.uk',
320 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
321 r'(?:www\.)?qklhadlycap4cnod\.onion',
322 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
323 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
324 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
325 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
326 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
327 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
328 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
329 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
330 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
331 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
332 )
333
b2e8bc1b 334 def _login(self):
83317f69 335 """
336 Attempt to log in to YouTube.
83317f69 337 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
338 """
9d5d4d64 339
982ee69a
MB
340 if (self._LOGIN_REQUIRED
341 and self.get_param('cookiefile') is None
342 and self.get_param('cookiesfrombrowser') is None):
9d5d4d64 343 self.raise_login_required(
344 'Login details are needed to download this content', method='cookies')
68217024 345 username, password = self._get_login_info()
9d5d4d64 346 if username:
24b0a72b 347 self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
b2e8bc1b 348
cce889b9 349 def _initialize_consent(self):
350 cookies = self._get_cookies('https://www.youtube.com/')
351 if cookies.get('__Secure-3PSID'):
352 return
353 consent_id = None
354 consent = cookies.get('CONSENT')
355 if consent:
356 if 'YES' in consent.value:
357 return
358 consent_id = self._search_regex(
359 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
360 if not consent_id:
361 consent_id = random.randint(100, 999)
362 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 363
f3aa3c3f 364 def _initialize_pref(self):
365 cookies = self._get_cookies('https://www.youtube.com/')
366 pref_cookie = cookies.get('PREF')
367 pref = {}
368 if pref_cookie:
369 try:
370 pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
371 except ValueError:
372 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
373 pref.update({'hl': 'en'})
374 self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
375
b2e8bc1b 376 def _real_initialize(self):
f3aa3c3f 377 self._initialize_pref()
cce889b9 378 self._initialize_consent()
24b0a72b 379 self._login()
c5e8d7af 380
a0566bbf 381 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 382 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
383 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 384
000c15a4 385 def _get_default_ytcfg(self, client='web'):
386 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 387
000c15a4 388 def _get_innertube_host(self, client='web'):
389 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 390
000c15a4 391 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 392 # try_get but with fallback to default ytcfg client values when present
393 _func = lambda y: try_get(y, getter, expected_type)
394 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
395
000c15a4 396 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 397 return self._ytcfg_get_safe(
398 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
399 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
109dd3b2 400
000c15a4 401 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 402 return self._ytcfg_get_safe(
403 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
404 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
109dd3b2 405
000c15a4 406 def _extract_api_key(self, ytcfg=None, default_client='web'):
109dd3b2 407 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
408
000c15a4 409 def _extract_context(self, ytcfg=None, default_client='web'):
f3aa3c3f 410 context = get_first(
411 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
412 # Enforce language for extraction
413 traverse_obj(context, 'client', expected_type=dict, default={})['hl'] = 'en'
109dd3b2 414 return context
415
cf87314d 416 _SAPISID = None
417
109dd3b2 418 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 419 time_now = round(time.time())
cf87314d 420 if self._SAPISID is None:
421 yt_cookies = self._get_cookies('https://www.youtube.com')
422 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
423 # See: https://github.com/yt-dlp/yt-dlp/issues/393
424 sapisid_cookie = dict_get(
425 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
426 if sapisid_cookie and sapisid_cookie.value:
427 self._SAPISID = sapisid_cookie.value
428 self.write_debug('Extracted SAPISID cookie')
429 # SAPISID cookie is required if not already present
430 if not yt_cookies.get('SAPISID'):
431 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
432 self._set_cookie(
433 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
434 else:
435 self._SAPISID = False
436 if not self._SAPISID:
437 return None
1974e99f 438 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
439 sapisidhash = hashlib.sha1(
cf87314d 440 f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
1974e99f 441 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
442
443 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 444 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 445 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 446
109dd3b2 447 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 448 data.update(query)
11f9be09 449 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 450 real_headers.update({'content-type': 'application/json'})
451 if headers:
452 real_headers.update(headers)
545cc85d 453 return self._download_json(
109dd3b2 454 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 455 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 456 data=json.dumps(data).encode('utf8'), headers=real_headers,
457 query={'key': api_key or self._extract_api_key()})
458
ac56cf38 459 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
460 data = self._search_regex(
461 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
462 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
463 if data:
464 return self._parse_json(data, item_id, fatal=fatal)
0c148415 465
99e9e001 466 @staticmethod
467 def _extract_session_index(*data):
468 """
469 Index of current account in account list.
470 See: https://github.com/yt-dlp/yt-dlp/pull/519
471 """
472 for ytcfg in data:
473 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
474 if session_index is not None:
475 return session_index
476
477 # Deprecated?
478 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca
M
479 if ytcfg:
480 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
481 if token:
482 return token
99e9e001 483 if webpage:
484 return self._search_regex(
485 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
486 'identity token', default=None, fatal=False)
a1c5d2ca
M
487
488 @staticmethod
fe93e2c4 489 def _extract_account_syncid(*args):
8ea3f7b9 490 """
491 Extract syncId required to download private playlists of secondary channels
fe93e2c4 492 @params response and/or ytcfg
8ea3f7b9 493 """
fe93e2c4 494 for data in args:
495 # ytcfg includes channel_syncid if on secondary channel
496 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
497 if delegated_sid:
498 return delegated_sid
499 sync_ids = (try_get(
500 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
e6f21b3d 501 lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
fe93e2c4 502 if len(sync_ids) >= 2 and sync_ids[1]:
503 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
504 # and just "user_syncid||" for primary channel. We only want the channel_syncid
505 return sync_ids[0]
a1c5d2ca 506
ac56cf38 507 @staticmethod
508 def _extract_visitor_data(*args):
509 """
510 Extracts visitorData from an API response or ytcfg
511 Appears to be used to track session state
512 """
9222c381 513 return get_first(
514 args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
515 expected_type=str)
ac56cf38 516
99e9e001 517 @property
518 def is_authenticated(self):
519 return bool(self._generate_sapisidhash_header())
520
11f9be09 521 def extract_ytcfg(self, video_id, webpage):
8c54a305 522 if not webpage:
523 return {}
29f7c58a 524 return self._parse_json(
525 self._search_regex(
526 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 527 default='{}'), video_id, fatal=False) or {}
528
11f9be09 529 def generate_api_headers(
99e9e001 530 self, *, ytcfg=None, account_syncid=None, session_index=None,
531 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
532
11f9be09 533 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
f4f751af 534 headers = {
109dd3b2 535 'X-YouTube-Client-Name': compat_str(
11f9be09 536 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
537 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 538 'Origin': origin,
539 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
540 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
ac56cf38 541 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
99e9e001 542 }
543 if session_index is None:
314ee305 544 session_index = self._extract_session_index(ytcfg)
545 if account_syncid or session_index is not None:
546 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 547
109dd3b2 548 auth = self._generate_sapisidhash_header(origin)
f4f751af 549 if auth is not None:
550 headers['Authorization'] = auth
109dd3b2 551 headers['X-Origin'] = origin
99e9e001 552 return {h: v for h, v in headers.items() if v is not None}
29f7c58a 553
2d6659b9 554 @staticmethod
555 def _build_api_continuation_query(continuation, ctp=None):
556 query = {
557 'continuation': continuation
558 }
559 # TODO: Inconsistency with clickTrackingParams.
560 # Currently we have a fixed ctp contained within context (from ytcfg)
561 # and a ctp in root query for continuation.
562 if ctp:
563 query['clickTracking'] = {'clickTrackingParams': ctp}
564 return query
565
2d6659b9 566 @classmethod
567 def _extract_next_continuation_data(cls, renderer):
568 next_continuation = try_get(
569 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
570 lambda x: x['continuation']['reloadContinuationData']), dict)
571 if not next_continuation:
572 return
573 continuation = next_continuation.get('continuation')
574 if not continuation:
575 return
576 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 577 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 578
579 @classmethod
580 def _extract_continuation_ep_data(cls, continuation_ep: dict):
581 if isinstance(continuation_ep, dict):
582 continuation = try_get(
583 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
584 if not continuation:
585 return
586 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 587 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 588
589 @classmethod
590 def _extract_continuation(cls, renderer):
591 next_continuation = cls._extract_next_continuation_data(renderer)
592 if next_continuation:
593 return next_continuation
fe93e2c4 594
2d6659b9 595 contents = []
596 for key in ('contents', 'items'):
597 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 598
2d6659b9 599 for content in contents:
600 if not isinstance(content, dict):
601 continue
602 continuation_ep = try_get(
603 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
604 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
605 dict)
606 continuation = cls._extract_continuation_ep_data(continuation_ep)
607 if continuation:
608 return continuation
609
fe93e2c4 610 @classmethod
611 def _extract_alerts(cls, data):
109dd3b2 612 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
613 if not isinstance(alert_dict, dict):
614 continue
615 for alert in alert_dict.values():
616 alert_type = alert.get('type')
617 if not alert_type:
618 continue
052e1350 619 message = cls._get_text(alert, 'text')
109dd3b2 620 if message:
621 yield alert_type, message
622
c0ac49bc 623 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
109dd3b2 624 errors = []
625 warnings = []
626 for alert_type, alert_message in alerts:
641ad5d8 627 if alert_type.lower() == 'error' and fatal:
109dd3b2 628 errors.append([alert_type, alert_message])
629 else:
630 warnings.append([alert_type, alert_message])
631
632 for alert_type, alert_message in (warnings + errors[:-1]):
c0ac49bc 633 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
109dd3b2 634 if errors:
635 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
636
637 def _extract_and_report_alerts(self, data, *args, **kwargs):
638 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
639
47193e02 640 def _extract_badges(self, renderer: dict):
641 badges = set()
642 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
643 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
644 if label:
645 badges.add(label.lower())
646 return badges
647
648 @staticmethod
052e1350 649 def _get_text(data, *path_list, max_runs=None):
650 for path in path_list or [None]:
651 if path is None:
652 obj = [data]
653 else:
654 obj = traverse_obj(data, path, default=[])
655 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
656 obj = [obj]
657 for item in obj:
658 text = try_get(item, lambda x: x['simpleText'], compat_str)
659 if text:
660 return text
661 runs = try_get(item, lambda x: x['runs'], list) or []
662 if not runs and isinstance(item, list):
663 runs = item
664
665 runs = runs[:min(len(runs), max_runs or len(runs))]
666 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
667 if text:
668 return text
47193e02 669
a709d873 670 @staticmethod
671 def _extract_thumbnails(data, *path_list):
672 """
673 Extract thumbnails from thumbnails dict
674 @param path_list: path list to level that contains 'thumbnails' key
675 """
676 thumbnails = []
677 for path in path_list or [()]:
678 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
679 thumbnail_url = url_or_none(thumbnail.get('url'))
680 if not thumbnail_url:
681 continue
682 # Sometimes youtube gives a wrong thumbnail URL. See:
683 # https://github.com/yt-dlp/yt-dlp/issues/233
684 # https://github.com/ytdl-org/youtube-dl/issues/28023
685 if 'maxresdefault' in thumbnail_url:
686 thumbnail_url = thumbnail_url.split('?')[0]
687 thumbnails.append({
688 'url': thumbnail_url,
689 'height': int_or_none(thumbnail.get('height')),
690 'width': int_or_none(thumbnail.get('width')),
691 })
692 return thumbnails
693
f3aa3c3f 694 @staticmethod
695 def extract_relative_time(relative_time_text):
696 """
697 Extracts a relative time from string and converts to dt object
698 e.g. 'streamed 6 days ago', '5 seconds ago (edited)'
699 """
700 mobj = re.search(r'(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
701 if mobj:
702 try:
703 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')), precision='auto')
704 except ValueError:
705 return None
706
707 def _extract_time_text(self, renderer, *path_list):
708 text = self._get_text(renderer, *path_list) or ''
709 dt = self.extract_relative_time(text)
710 timestamp = None
711 if isinstance(dt, datetime.datetime):
712 timestamp = calendar.timegm(dt.timetuple())
713 if text and timestamp is None:
714 self.report_warning('Cannot parse localized time text' + bug_reports_message(), only_once=True)
715 return timestamp, text
716
109dd3b2 717 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
718 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 719 default_client='web'):
109dd3b2 720 response = None
721 last_error = None
722 count = -1
723 retries = self.get_param('extractor_retries', 3)
724 if check_get_keys is None:
725 check_get_keys = []
726 while count < retries:
727 count += 1
728 if last_error:
c0ac49bc 729 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
109dd3b2 730 try:
731 response = self._call_api(
732 ep=ep, fatal=True, headers=headers,
733 video_id=item_id, query=query,
734 context=self._extract_context(ytcfg, default_client),
735 api_key=self._extract_api_key(ytcfg, default_client),
736 api_hostname=api_hostname, default_client=default_client,
737 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
738 except ExtractorError as e:
9c0d7f49 739 if isinstance(e.cause, network_exceptions):
641ad5d8 740 if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
741 e.cause.seek(0)
742 yt_error = try_get(
743 self._parse_json(e.cause.read().decode(), item_id, fatal=False),
744 lambda x: x['error']['message'], compat_str)
745 if yt_error:
746 self._report_alerts([('ERROR', yt_error)], fatal=False)
109dd3b2 747 # Downloading page may result in intermittent 5xx HTTP error
748 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
9c0d7f49 749 # We also want to catch all other network exceptions since errors in later pages can be troublesome
750 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
751 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
526d74ec 752 last_error = error_to_compat_str(e.cause or e.msg)
9c0d7f49 753 if count < retries:
754 continue
109dd3b2 755 if fatal:
756 raise
757 else:
758 self.report_warning(error_to_compat_str(e))
759 return
760
761 else:
109dd3b2 762 try:
ac56cf38 763 self._extract_and_report_alerts(response, only_once=True)
109dd3b2 764 except ExtractorError as e:
c0ac49bc 765 # YouTube servers may return errors we want to retry on in a 200 OK response
766 # See: https://github.com/yt-dlp/yt-dlp/issues/839
767 if 'unknown error' in e.msg.lower():
768 last_error = e.msg
769 continue
109dd3b2 770 if fatal:
771 raise
772 self.report_warning(error_to_compat_str(e))
773 return
774 if not check_get_keys or dict_get(response, check_get_keys):
775 break
776 # Youtube sometimes sends incomplete data
777 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
778 last_error = 'Incomplete data received'
779 if count >= retries:
780 if fatal:
781 raise ExtractorError(last_error)
782 else:
783 self.report_warning(last_error)
784 return
785 return response
786
9297939e 787 @staticmethod
788 def is_music_url(url):
789 return re.match(r'https?://music\.youtube\.com/', url) is not None
790
30a074c2 791 def _extract_video(self, renderer):
792 video_id = renderer.get('videoId')
052e1350 793 title = self._get_text(renderer, 'title')
794 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 795 duration = parse_duration(self._get_text(
796 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
052e1350 797 view_count_text = self._get_text(renderer, 'viewCountText') or ''
30a074c2 798 view_count = str_to_int(self._search_regex(
799 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
800 'view count', default=None))
fe93e2c4 801
052e1350 802 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
f3aa3c3f 803 channel_id = traverse_obj(
804 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False)
805 timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
806 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
807 overlay_style = traverse_obj(
808 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)
809 badges = self._extract_badges(renderer)
a709d873 810 thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
811
30a074c2 812 return {
39ed931e 813 '_type': 'url',
30a074c2 814 'ie_key': YoutubeIE.ie_key(),
815 'id': video_id,
5e3f2f8f 816 'url': f'https://www.youtube.com/watch?v={video_id}',
30a074c2 817 'title': title,
818 'description': description,
819 'duration': duration,
820 'view_count': view_count,
821 'uploader': uploader,
f3aa3c3f 822 'channel_id': channel_id,
a709d873 823 'thumbnails': thumbnails,
f3aa3c3f 824 'upload_date': strftime_or_none(timestamp, '%Y%m%d'),
825 'live_status': ('is_upcoming' if scheduled_timestamp is not None
826 else 'was_live' if 'streamed' in time_text.lower()
827 else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges
828 else None),
829 'release_timestamp': scheduled_timestamp,
830 'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
30a074c2 831 }
832
0c148415 833
360e1ca5 834class YoutubeIE(YoutubeBaseInfoExtractor):
96565c7e 835 IE_DESC = 'YouTube'
cb7dfeea 836 _VALID_URL = r"""(?x)^
c5e8d7af 837 (
edb53e2d 838 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 839 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
840 (?:www\.)?deturl\.com/www\.youtube\.com|
841 (?:www\.)?pwnyoutube\.com|
842 (?:www\.)?hooktube\.com|
843 (?:www\.)?yourepeat\.com|
844 tube\.majestyc\.net|
845 %(invidious)s|
846 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
847 (?:.*?\#/)? # handle anchor (#/) redirect urls
848 (?: # the various things that can precede the ID:
8fc54b12 849 (?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/
c5e8d7af 850 |(?: # or the v= param in all its forms
f7000f3a 851 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 852 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 853 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
854 v=
855 )
f4b05232 856 ))
cbaed4bb
S
857 |(?:
858 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
859 vid\.plus| # or vid.plus/xxxx
860 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 861 %(invidious)s
cbaed4bb 862 )/
edb53e2d 863 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 864 )
c5e8d7af 865 )? # all until now is optional -> you can pass the naked ID
201c1459 866 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 867 (?(1).+)? # if we found the ID, everything can follow
9297939e 868 (?:\#|$)""" % {
d9190e44 869 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
bc2ca1bb 870 }
e40c758c 871 _PLAYER_INFO_RE = (
cc2db878 872 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
873 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 874 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 875 )
2c62dc26 876 _formats = {
c2d3cb4c 877 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
878 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
879 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
880 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
881 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
882 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
883 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
884 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 885 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 886 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
887 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
888 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
889 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
890 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
891 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 892 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 893 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
894 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 895
896
897 # 3D videos
c2d3cb4c 898 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
899 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
900 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
901 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 902 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
903 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
904 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 905
96fb5605 906 # Apple HTTP Live Streaming
11f12195 907 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 908 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
909 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
910 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
911 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
912 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 913 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
914 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
915
916 # DASH mp4 video
d23028a8
S
917 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
918 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
919 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
920 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
921 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 922 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
923 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
924 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
925 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
926 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
927 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
928 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 929
f6f1fc92 930 # Dash mp4 audio
d23028a8
S
931 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
932 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
933 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
934 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
935 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
936 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
937 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
938
939 # Dash webm
d23028a8
S
940 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
941 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
942 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
943 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
944 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
945 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
946 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
947 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
948 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
949 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
950 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
951 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
952 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
953 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
954 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 955 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
956 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
957 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
958 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
959 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
960 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
961 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
962
963 # Dash webm audio
d23028a8
S
964 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
965 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 966
0857baad 967 # Dash webm audio with opus inside
d23028a8
S
968 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
969 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
970 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 971
ce6b9a2d
PH
972 # RTMP (unnamed)
973 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
974
975 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
976 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
977 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
978 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
979 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
980 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
981 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
982 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
983 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 984 }
29f7c58a 985 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 986
fd5c4aab
S
987 _GEO_BYPASS = False
988
78caa52a 989 IE_NAME = 'youtube'
2eb88d95
PH
990 _TESTS = [
991 {
2d3d2997 992 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
993 'info_dict': {
994 'id': 'BaW_jenozKc',
995 'ext': 'mp4',
3867038a 996 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
997 'uploader': 'Philipp Hagemeister',
998 'uploader_id': 'phihag',
ec85ded8 999 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
ff9f925b 1000 'channel': 'Philipp Hagemeister',
dd4c4492
S
1001 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1002 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1003 'upload_date': '20121002',
ff9f925b 1004 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
4bc3a23e 1005 'categories': ['Science & Technology'],
3867038a 1006 'tags': ['youtube-dl'],
556dbe7f 1007 'duration': 10,
dbdaaa23 1008 'view_count': int,
3e7c1224 1009 'like_count': int,
ff9f925b 1010 # 'dislike_count': int,
1011 'availability': 'public',
1012 'playable_in_embed': True,
1013 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1014 'live_status': 'not_live',
1015 'age_limit': 0,
7c80519c 1016 'start_time': 1,
297a564b 1017 'end_time': 9,
2eb88d95 1018 }
0e853ca4 1019 },
fccd3771 1020 {
4bc3a23e
PH
1021 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1022 'note': 'Embed-only video (#1746)',
1023 'info_dict': {
1024 'id': 'yZIXLfi8CZQ',
1025 'ext': 'mp4',
1026 'upload_date': '20120608',
1027 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1028 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1029 'uploader': 'SET India',
94bfcd23 1030 'uploader_id': 'setindia',
ec85ded8 1031 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1032 'age_limit': 18,
545cc85d 1033 },
1034 'skip': 'Private video',
fccd3771 1035 },
11b56058 1036 {
8bdd16b4 1037 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1038 'note': 'Use the first video ID in the URL',
1039 'info_dict': {
1040 'id': 'BaW_jenozKc',
1041 'ext': 'mp4',
3867038a 1042 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1043 'uploader': 'Philipp Hagemeister',
1044 'uploader_id': 'phihag',
ec85ded8 1045 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 1046 'upload_date': '20121002',
3867038a 1047 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 1048 'categories': ['Science & Technology'],
3867038a 1049 'tags': ['youtube-dl'],
556dbe7f 1050 'duration': 10,
dbdaaa23 1051 'view_count': int,
11b56058
PM
1052 'like_count': int,
1053 'dislike_count': int,
34a7de29
S
1054 },
1055 'params': {
1056 'skip_download': True,
1057 },
11b56058 1058 },
dd27fd17 1059 {
2d3d2997 1060 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1061 'note': '256k DASH audio (format 141) via DASH manifest',
1062 'info_dict': {
1063 'id': 'a9LDPn-MO4I',
1064 'ext': 'm4a',
1065 'upload_date': '20121002',
1066 'uploader_id': '8KVIDEO',
ec85ded8 1067 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1068 'description': '',
1069 'uploader': '8KVIDEO',
1070 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1071 },
4bc3a23e
PH
1072 'params': {
1073 'youtube_include_dash_manifest': True,
1074 'format': '141',
4919603f 1075 },
de3c7fe0 1076 'skip': 'format 141 not served anymore',
dd27fd17 1077 },
8bdd16b4 1078 # DASH manifest with encrypted signature
1079 {
1080 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1081 'info_dict': {
1082 'id': 'IB3lcPjvWLA',
1083 'ext': 'm4a',
1084 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1085 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1086 'duration': 244,
1087 'uploader': 'AfrojackVEVO',
1088 'uploader_id': 'AfrojackVEVO',
1089 'upload_date': '20131011',
cc2db878 1090 'abr': 129.495,
8bdd16b4 1091 },
1092 'params': {
1093 'youtube_include_dash_manifest': True,
1094 'format': '141/bestaudio[ext=m4a]',
1095 },
1096 },
65c2fde2 1097 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1098 {
65c2fde2 1099 'note': 'Embed allowed age-gate video',
2d3d2997 1100 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1101 'info_dict': {
1102 'id': 'HtVdAasjOgU',
1103 'ext': 'mp4',
1104 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1105 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1106 'duration': 142,
c522adb1
JMF
1107 'uploader': 'The Witcher',
1108 'uploader_id': 'WitcherGame',
ec85ded8 1109 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1110 'upload_date': '20140605',
34952f09 1111 'age_limit': 18,
c522adb1
JMF
1112 },
1113 },
65c2fde2 1114 {
1115 'note': 'Age-gate video with embed allowed in public site',
1116 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1117 'info_dict': {
1118 'id': 'HsUATh_Nc2U',
1119 'ext': 'mp4',
1120 'title': 'Godzilla 2 (Official Video)',
1121 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1122 'upload_date': '20200408',
1123 'uploader_id': 'FlyingKitty900',
1124 'uploader': 'FlyingKitty',
1125 'age_limit': 18,
1126 },
1127 },
1128 {
1129 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1130 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1131 'info_dict': {
1132 'id': 'Tq92D6wQ1mg',
1133 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1134 'ext': 'mp4',
1135 'upload_date': '20191227',
65c2fde2 1136 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1137 'uploader': 'Projekt Melody',
1138 'description': 'md5:17eccca93a786d51bc67646756894066',
1139 'age_limit': 18,
1140 },
1141 },
1142 {
1143 'note': 'Non-Agegated non-embeddable video',
1144 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1145 'info_dict': {
1146 'id': 'MeJVWBSsPAY',
1147 'ext': 'mp4',
1148 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1149 'uploader': 'Herr Lurik',
1150 'uploader_id': 'st3in234',
1151 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1152 'upload_date': '20130730',
1153 },
1154 },
1155 {
1156 'note': 'Non-bypassable age-gated video',
1157 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1158 'only_matching': True,
1159 },
8bdd16b4 1160 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1161 # YouTube Red ad is not captured for creator
1162 {
1163 'url': '__2ABJjxzNo',
1164 'info_dict': {
1165 'id': '__2ABJjxzNo',
1166 'ext': 'mp4',
1167 'duration': 266,
1168 'upload_date': '20100430',
1169 'uploader_id': 'deadmau5',
1170 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1171 'creator': 'deadmau5',
1172 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1173 'uploader': 'deadmau5',
1174 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1175 'alt_title': 'Some Chords',
8bdd16b4 1176 },
1177 'expected_warnings': [
1178 'DASH manifest missing',
1179 ]
1180 },
067aa17e 1181 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1182 {
1183 'url': 'lqQg6PlCWgI',
1184 'info_dict': {
1185 'id': 'lqQg6PlCWgI',
1186 'ext': 'mp4',
556dbe7f 1187 'duration': 6085,
90227264 1188 'upload_date': '20150827',
cbe2bd91 1189 'uploader_id': 'olympic',
ec85ded8 1190 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1191 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
11f9be09 1192 'uploader': 'Olympics',
cbe2bd91
PH
1193 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1194 },
1195 'params': {
1196 'skip_download': 'requires avconv',
e52a40ab 1197 }
cbe2bd91 1198 },
6271f1ca
PH
1199 # Non-square pixels
1200 {
1201 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1202 'info_dict': {
1203 'id': '_b-2C3KPAM0',
1204 'ext': 'mp4',
1205 'stretched_ratio': 16 / 9.,
556dbe7f 1206 'duration': 85,
6271f1ca
PH
1207 'upload_date': '20110310',
1208 'uploader_id': 'AllenMeow',
ec85ded8 1209 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1210 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1211 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1212 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1213 },
06b491eb
S
1214 },
1215 # url_encoded_fmt_stream_map is empty string
1216 {
1217 'url': 'qEJwOuvDf7I',
1218 'info_dict': {
1219 'id': 'qEJwOuvDf7I',
f57b7835 1220 'ext': 'webm',
06b491eb
S
1221 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1222 'description': '',
1223 'upload_date': '20150404',
1224 'uploader_id': 'spbelect',
1225 'uploader': 'Наблюдатели Петербурга',
1226 },
1227 'params': {
1228 'skip_download': 'requires avconv',
e323cf3f
S
1229 },
1230 'skip': 'This live event has ended.',
06b491eb 1231 },
067aa17e 1232 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1233 {
1234 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1235 'info_dict': {
1236 'id': 'FIl7x6_3R5Y',
eb6793ba 1237 'ext': 'webm',
da77d856
S
1238 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1239 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1240 'duration': 220,
da77d856
S
1241 'upload_date': '20150625',
1242 'uploader_id': 'dorappi2000',
ec85ded8 1243 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1244 'uploader': 'dorappi2000',
eb6793ba 1245 'formats': 'mincount:31',
da77d856 1246 },
eb6793ba 1247 'skip': 'not actual anymore',
2ee8f5d8 1248 },
8a1a26ce
YCH
1249 # DASH manifest with segment_list
1250 {
1251 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1252 'md5': '8ce563a1d667b599d21064e982ab9e31',
1253 'info_dict': {
1254 'id': 'CsmdDsKjzN8',
1255 'ext': 'mp4',
17ee98e1 1256 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1257 'uploader': 'Airtek',
1258 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1259 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1260 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1261 },
1262 'params': {
1263 'youtube_include_dash_manifest': True,
1264 'format': '135', # bestvideo
be49068d
S
1265 },
1266 'skip': 'This live event has ended.',
2ee8f5d8 1267 },
cf7e015f
S
1268 {
1269 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1270 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1271 'info_dict': {
545cc85d 1272 'id': 'jvGDaLqkpTg',
1273 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1274 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1275 },
1276 'playlist': [{
1277 'info_dict': {
545cc85d 1278 'id': 'jvGDaLqkpTg',
cf7e015f 1279 'ext': 'mp4',
545cc85d 1280 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1281 'description': 'md5:e03b909557865076822aa169218d6a5d',
1282 'duration': 10643,
1283 'upload_date': '20161111',
1284 'uploader': 'Team PGP',
1285 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1286 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1287 },
1288 }, {
1289 'info_dict': {
545cc85d 1290 'id': '3AKt1R1aDnw',
cf7e015f 1291 'ext': 'mp4',
545cc85d 1292 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1293 'description': 'md5:e03b909557865076822aa169218d6a5d',
1294 'duration': 10991,
1295 'upload_date': '20161111',
1296 'uploader': 'Team PGP',
1297 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1298 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1299 },
1300 }, {
1301 'info_dict': {
545cc85d 1302 'id': 'RtAMM00gpVc',
cf7e015f 1303 'ext': 'mp4',
545cc85d 1304 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1305 'description': 'md5:e03b909557865076822aa169218d6a5d',
1306 'duration': 10995,
1307 'upload_date': '20161111',
1308 'uploader': 'Team PGP',
1309 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1310 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1311 },
1312 }, {
1313 'info_dict': {
545cc85d 1314 'id': '6N2fdlP3C5U',
cf7e015f 1315 'ext': 'mp4',
545cc85d 1316 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1317 'description': 'md5:e03b909557865076822aa169218d6a5d',
1318 'duration': 10990,
1319 'upload_date': '20161111',
1320 'uploader': 'Team PGP',
1321 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1322 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1323 },
1324 }],
1325 'params': {
1326 'skip_download': True,
1327 },
65c2fde2 1328 'skip': 'Not multifeed anymore',
cbaed4bb 1329 },
f9f49d87 1330 {
067aa17e 1331 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1332 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1333 'info_dict': {
1334 'id': 'gVfLd0zydlo',
1335 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1336 },
1337 'playlist_count': 2,
be49068d 1338 'skip': 'Not multifeed anymore',
f9f49d87 1339 },
cbaed4bb 1340 {
2d3d2997 1341 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1342 'only_matching': True,
0e49d9a6 1343 },
6d4fc66b 1344 {
2d3d2997 1345 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1346 'only_matching': True,
1347 },
0e49d9a6 1348 {
067aa17e 1349 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1350 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1351 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1352 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1353 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1354 'info_dict': {
1355 'id': 'lsguqyKfVQg',
1356 'ext': 'mp4',
1357 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1358 'alt_title': 'Dark Walk',
0e49d9a6 1359 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1360 'duration': 133,
0e49d9a6
LL
1361 'upload_date': '20151119',
1362 'uploader_id': 'IronSoulElf',
ec85ded8 1363 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1364 'uploader': 'IronSoulElf',
11f9be09 1365 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1366 'track': 'Dark Walk',
1367 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1368 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1369 },
1370 'params': {
1371 'skip_download': True,
1372 },
1373 },
61f92af1 1374 {
067aa17e 1375 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1376 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1377 'only_matching': True,
1378 },
313dfc45
LL
1379 {
1380 # Video with yt:stretch=17:0
1381 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1382 'info_dict': {
1383 'id': 'Q39EVAstoRM',
1384 'ext': 'mp4',
1385 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1386 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1387 'upload_date': '20151107',
1388 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1389 'uploader': 'CH GAMER DROID',
1390 },
1391 'params': {
1392 'skip_download': True,
1393 },
be49068d 1394 'skip': 'This video does not exist.',
313dfc45 1395 },
201c1459 1396 {
1397 # Video with incomplete 'yt:stretch=16:'
1398 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1399 'only_matching': True,
1400 },
7caf9830
S
1401 {
1402 # Video licensed under Creative Commons
1403 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1404 'info_dict': {
1405 'id': 'M4gD1WSo5mA',
1406 'ext': 'mp4',
1407 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1408 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1409 'duration': 721,
7caf9830
S
1410 'upload_date': '20150127',
1411 'uploader_id': 'BerkmanCenter',
ec85ded8 1412 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1413 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1414 'license': 'Creative Commons Attribution license (reuse allowed)',
1415 },
1416 'params': {
1417 'skip_download': True,
1418 },
1419 },
fd050249
S
1420 {
1421 # Channel-like uploader_url
1422 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1423 'info_dict': {
1424 'id': 'eQcmzGIKrzg',
1425 'ext': 'mp4',
1426 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1427 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1428 'duration': 4060,
fd050249 1429 'upload_date': '20151119',
eb6793ba 1430 'uploader': 'Bernie Sanders',
fd050249 1431 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1432 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1433 'license': 'Creative Commons Attribution license (reuse allowed)',
1434 },
1435 'params': {
1436 'skip_download': True,
1437 },
1438 },
040ac686
S
1439 {
1440 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1441 'only_matching': True,
7f29cf54
S
1442 },
1443 {
067aa17e 1444 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1445 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1446 'only_matching': True,
6496ccb4
S
1447 },
1448 {
1449 # Rental video preview
1450 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1451 'info_dict': {
1452 'id': 'uGpuVWrhIzE',
1453 'ext': 'mp4',
1454 'title': 'Piku - Trailer',
1455 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1456 'upload_date': '20150811',
1457 'uploader': 'FlixMatrix',
1458 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1459 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1460 'license': 'Standard YouTube License',
1461 },
1462 'params': {
1463 'skip_download': True,
1464 },
eb6793ba 1465 'skip': 'This video is not available.',
022a5d66 1466 },
12afdc2a
S
1467 {
1468 # YouTube Red video with episode data
1469 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1470 'info_dict': {
1471 'id': 'iqKdEhx-dD4',
1472 'ext': 'mp4',
1473 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1474 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1475 'duration': 2085,
12afdc2a
S
1476 'upload_date': '20170118',
1477 'uploader': 'Vsauce',
1478 'uploader_id': 'Vsauce',
1479 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1480 'series': 'Mind Field',
1481 'season_number': 1,
1482 'episode_number': 1,
1483 },
1484 'params': {
1485 'skip_download': True,
1486 },
1487 'expected_warnings': [
1488 'Skipping DASH manifest',
1489 ],
1490 },
c7121fa7
S
1491 {
1492 # The following content has been identified by the YouTube community
1493 # as inappropriate or offensive to some audiences.
1494 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1495 'info_dict': {
1496 'id': '6SJNVb0GnPI',
1497 'ext': 'mp4',
1498 'title': 'Race Differences in Intelligence',
1499 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1500 'duration': 965,
1501 'upload_date': '20140124',
1502 'uploader': 'New Century Foundation',
1503 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1504 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1505 },
1506 'params': {
1507 'skip_download': True,
1508 },
545cc85d 1509 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1510 },
022a5d66
S
1511 {
1512 # itag 212
1513 'url': '1t24XAntNCY',
1514 'only_matching': True,
fd5c4aab
S
1515 },
1516 {
1517 # geo restricted to JP
1518 'url': 'sJL6WA-aGkQ',
1519 'only_matching': True,
1520 },
cd5a74a2
S
1521 {
1522 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1523 'only_matching': True,
1524 },
bc2ca1bb 1525 {
1526 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1527 'only_matching': True,
1528 },
1529 {
1530 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1531 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1532 'only_matching': True,
1533 },
825cd268
RA
1534 {
1535 # DRM protected
1536 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1537 'only_matching': True,
4fe54c12
S
1538 },
1539 {
1540 # Video with unsupported adaptive stream type formats
1541 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1542 'info_dict': {
1543 'id': 'Z4Vy8R84T1U',
1544 'ext': 'mp4',
1545 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1546 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1547 'duration': 433,
1548 'upload_date': '20130923',
1549 'uploader': 'Amelia Putri Harwita',
1550 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1551 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1552 'formats': 'maxcount:10',
1553 },
1554 'params': {
1555 'skip_download': True,
1556 'youtube_include_dash_manifest': False,
1557 },
5429d6a9 1558 'skip': 'not actual anymore',
5caabd3c 1559 },
1560 {
822b9d9c 1561 # Youtube Music Auto-generated description
5caabd3c 1562 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1563 'info_dict': {
1564 'id': 'MgNrAu2pzNs',
1565 'ext': 'mp4',
1566 'title': 'Voyeur Girl',
1567 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1568 'upload_date': '20190312',
5429d6a9
S
1569 'uploader': 'Stephen - Topic',
1570 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1571 'artist': 'Stephen',
1572 'track': 'Voyeur Girl',
1573 'album': 'it\'s too much love to know my dear',
1574 'release_date': '20190313',
1575 'release_year': 2019,
1576 },
1577 'params': {
1578 'skip_download': True,
1579 },
1580 },
66b48727
RA
1581 {
1582 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1583 'only_matching': True,
1584 },
011e75e6
S
1585 {
1586 # invalid -> valid video id redirection
1587 'url': 'DJztXj2GPfl',
1588 'info_dict': {
1589 'id': 'DJztXj2GPfk',
1590 'ext': 'mp4',
1591 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1592 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1593 'upload_date': '20090125',
1594 'uploader': 'Prochorowka',
1595 'uploader_id': 'Prochorowka',
1596 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1597 'artist': 'Panjabi MC',
1598 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1599 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1600 },
1601 'params': {
1602 'skip_download': True,
1603 },
545cc85d 1604 'skip': 'Video unavailable',
ea74e00b
DP
1605 },
1606 {
1607 # empty description results in an empty string
1608 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1609 'info_dict': {
1610 'id': 'x41yOUIvK2k',
1611 'ext': 'mp4',
1612 'title': 'IMG 3456',
1613 'description': '',
1614 'upload_date': '20170613',
1615 'uploader_id': 'ElevageOrVert',
1616 'uploader': 'ElevageOrVert',
1617 },
1618 'params': {
1619 'skip_download': True,
1620 },
1621 },
a0566bbf 1622 {
29f7c58a 1623 # with '};' inside yt initial data (see [1])
1624 # see [2] for an example with '};' inside ytInitialPlayerResponse
1625 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1626 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1627 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1628 'info_dict': {
1629 'id': 'CHqg6qOn4no',
1630 'ext': 'mp4',
1631 'title': 'Part 77 Sort a list of simple types in c#',
1632 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1633 'upload_date': '20130831',
1634 'uploader_id': 'kudvenkat',
1635 'uploader': 'kudvenkat',
1636 },
1637 'params': {
1638 'skip_download': True,
1639 },
1640 },
29f7c58a 1641 {
1642 # another example of '};' in ytInitialData
1643 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1644 'only_matching': True,
1645 },
1646 {
1647 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1648 'only_matching': True,
1649 },
545cc85d 1650 {
cc2db878 1651 # https://github.com/ytdl-org/youtube-dl/pull/28094
1652 'url': 'OtqTfy26tG0',
1653 'info_dict': {
1654 'id': 'OtqTfy26tG0',
1655 'ext': 'mp4',
1656 'title': 'Burn Out',
1657 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1658 'upload_date': '20141120',
1659 'uploader': 'The Cinematic Orchestra - Topic',
1660 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1661 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1662 'artist': 'The Cinematic Orchestra',
1663 'track': 'Burn Out',
1664 'album': 'Every Day',
1665 'release_data': None,
1666 'release_year': None,
1667 },
1668 'params': {
1669 'skip_download': True,
1670 },
545cc85d 1671 },
bc2ca1bb 1672 {
1673 # controversial video, only works with bpctr when authenticated with cookies
1674 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1675 'only_matching': True,
1676 },
a1a7907b 1677 {
1678 # controversial video, requires bpctr/contentCheckOk
1679 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1680 'info_dict': {
1681 'id': 'SZJvDhaSDnc',
1682 'ext': 'mp4',
1683 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1684 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1685 'uploader': 'CBS This Morning',
11f9be09 1686 'uploader_id': 'CBSThisMorning',
a1a7907b 1687 'upload_date': '20140716',
1688 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1689 }
1690 },
f7ad7160 1691 {
1692 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1693 'url': 'cBvYw8_A0vQ',
1694 'info_dict': {
1695 'id': 'cBvYw8_A0vQ',
1696 'ext': 'mp4',
1697 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1698 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1699 'upload_date': '20201120',
1700 'uploader': 'Walk around Japan',
1701 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1702 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1703 },
1704 'params': {
1705 'skip_download': True,
1706 },
0fb983f6 1707 }, {
1708 # Has multiple audio streams
1709 'url': 'WaOKSUlf4TM',
1710 'only_matching': True
9297939e 1711 }, {
1712 # Requires Premium: has format 141 when requested using YTM url
1713 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1714 'only_matching': True
1715 }, {
120916da 1716 # multiple subtitles with same lang_code
1717 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1718 'only_matching': True,
109dd3b2 1719 }, {
1720 # Force use android client fallback
1721 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1722 'info_dict': {
1723 'id': 'YOelRv7fMxY',
11f9be09 1724 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 1725 'ext': '3gp',
1726 'upload_date': '20210624',
1727 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1728 'uploader': 'colinfurze',
11f9be09 1729 'uploader_id': 'colinfurze',
109dd3b2 1730 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
11f9be09 1731 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
109dd3b2 1732 },
1733 'params': {
1734 'format': '17', # 3gp format available on android
1735 'extractor_args': {'youtube': {'player_client': ['android']}},
1736 },
120916da 1737 },
109dd3b2 1738 {
1739 # Skip download of additional client configs (remix client config in this case)
1740 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1741 'only_matching': True,
1742 'params': {
1743 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1744 },
8fc54b12 1745 }, {
1746 # shorts
1747 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1748 'only_matching': True,
9222c381 1749 }, {
1750 'note': 'Storyboards',
1751 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
1752 'info_dict': {
1753 'id': '5KLPxDtMqe8',
1754 'ext': 'mhtml',
1755 'format_id': 'sb0',
1756 'title': 'Your Brain is Plastic',
1757 'uploader_id': 'scishow',
1758 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
1759 'upload_date': '20140324',
1760 'uploader': 'SciShow',
1761 }, 'params': {'format': 'mhtml', 'skip_download': True}
1762 }
2eb88d95
PH
1763 ]
1764
201c1459 1765 @classmethod
1766 def suitable(cls, url):
4dfbf869 1767 from ..utils import parse_qs
1768
201c1459 1769 qs = parse_qs(url)
1770 if qs.get('list', [None])[0]:
1771 return False
1772 return super(YoutubeIE, cls).suitable(url)
1773
e0df6211
PH
1774 def __init__(self, *args, **kwargs):
1775 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1776 self._code_cache = {}
83799698 1777 self._player_cache = {}
e0df6211 1778
adbc4ec4 1779 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
adbc4ec4
THD
1780 lock = threading.Lock()
1781
1782 is_live = True
185bf310 1783 start_time = time.time()
adbc4ec4
THD
1784 formats = [f for f in formats if f.get('is_from_start')]
1785
185bf310 1786 def refetch_manifest(format_id, delay):
1787 nonlocal formats, start_time, is_live
1788 if time.time() <= start_time + delay:
adbc4ec4
THD
1789 return
1790
1791 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
1792 video_details = traverse_obj(
1793 prs, (..., 'videoDetails'), expected_type=dict, default=[])
1794 microformats = traverse_obj(
1795 prs, (..., 'microformat', 'playerMicroformatRenderer'),
1796 expected_type=dict, default=[])
1797 _, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)
185bf310 1798 start_time = time.time()
adbc4ec4 1799
185bf310 1800 def mpd_feed(format_id, delay):
adbc4ec4
THD
1801 """
1802 @returns (manifest_url, manifest_stream_number, is_live) or None
1803 """
1804 with lock:
185bf310 1805 refetch_manifest(format_id, delay)
adbc4ec4
THD
1806
1807 f = next((f for f in formats if f['format_id'] == format_id), None)
1808 if not f:
185bf310 1809 if not is_live:
1810 self.to_screen(f'{video_id}: Video is no longer live')
1811 else:
1812 self.report_warning(
1813 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
adbc4ec4
THD
1814 return None
1815 return f['manifest_url'], f['manifest_stream_number'], is_live
1816
1817 for f in formats:
1818 f['protocol'] = 'http_dash_segments_generator'
1819 f['fragments'] = functools.partial(
1820 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
1821
1822 def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
1823 FETCH_SPAN, MAX_DURATION = 5, 432000
1824
1825 mpd_url, stream_number, is_live = None, None, True
1826
1827 begin_index = 0
1828 download_start_time = ctx.get('start') or time.time()
1829
1830 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
1831 if lack_early_segments:
1832 self.report_warning(bug_reports_message(
1833 'Starting download from the last 120 hours of the live stream since '
1834 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
1835 lack_early_segments = True
1836
1837 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
1838 fragments, fragment_base_url = None, None
1839
1840 def _extract_sequence_from_mpd(refresh_sequence):
1841 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
1842 # Obtain from MPD's maximum seq value
1843 old_mpd_url = mpd_url
185bf310 1844 last_error = ctx.pop('last_error', None)
1845 expire_fast = last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403
1846 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
1847 or (mpd_url, stream_number, False))
1848 if not refresh_sequence:
1849 if expire_fast and not is_live:
1850 return False, last_seq
1851 elif old_mpd_url == mpd_url:
1852 return True, last_seq
adbc4ec4
THD
1853 try:
1854 fmts, _ = self._extract_mpd_formats_and_subtitles(
1855 mpd_url, None, note=False, errnote=False, fatal=False)
1856 except ExtractorError:
1857 fmts = None
1858 if not fmts:
1859 no_fragment_score += 1
1860 return False, last_seq
1861 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
1862 fragments = fmt_info['fragments']
1863 fragment_base_url = fmt_info['fragment_base_url']
1864 assert fragment_base_url
1865
1866 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
1867 return True, _last_seq
1868
1869 while is_live:
1870 fetch_time = time.time()
1871 if no_fragment_score > 30:
1872 return
1873 if last_segment_url:
1874 # Obtain from "X-Head-Seqnum" header value from each segment
1875 try:
1876 urlh = self._request_webpage(
1877 last_segment_url, None, note=False, errnote=False, fatal=False)
1878 except ExtractorError:
1879 urlh = None
1880 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
1881 if last_seq is None:
1882 no_fragment_score += 1
1883 last_segment_url = None
1884 continue
1885 else:
185bf310 1886 should_continue, last_seq = _extract_sequence_from_mpd(True)
1887 if not should_continue:
adbc4ec4
THD
1888 continue
1889
1890 if known_idx > last_seq:
1891 last_segment_url = None
1892 continue
1893
1894 last_seq += 1
1895
1896 if begin_index < 0 and known_idx < 0:
1897 # skip from the start when it's negative value
1898 known_idx = last_seq + begin_index
1899 if lack_early_segments:
1900 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
1901 try:
1902 for idx in range(known_idx, last_seq):
1903 # do not update sequence here or you'll get skipped some part of it
185bf310 1904 should_continue, _ = _extract_sequence_from_mpd(False)
1905 if not should_continue:
adbc4ec4
THD
1906 known_idx = idx - 1
1907 raise ExtractorError('breaking out of outer loop')
1908 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
1909 yield {
1910 'url': last_segment_url,
1911 }
1912 if known_idx == last_seq:
1913 no_fragment_score += 5
1914 else:
1915 no_fragment_score = 0
1916 known_idx = last_seq
1917 except ExtractorError:
1918 continue
1919
1920 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
1921
b6de707d 1922 def _extract_player_url(self, *ytcfgs, webpage=None):
1923 player_url = traverse_obj(
1924 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1925 get_all=False, expected_type=compat_str)
11f9be09 1926 if not player_url:
b6de707d 1927 return
109dd3b2 1928 if player_url.startswith('//'):
1929 player_url = 'https:' + player_url
1930 elif not re.match(r'https?://', player_url):
1931 player_url = compat_urlparse.urljoin(
1932 'https://www.youtube.com', player_url)
1933 return player_url
1934
b6de707d 1935 def _download_player_url(self, video_id, fatal=False):
1936 res = self._download_webpage(
1937 'https://www.youtube.com/iframe_api',
1938 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1939 if res:
1940 player_version = self._search_regex(
1941 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1942 if player_version:
1943 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1944
60064c53
PH
1945 def _signature_cache_id(self, example_sig):
1946 """ Return a string representation of a signature """
78caa52a 1947 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1948
e40c758c
S
1949 @classmethod
1950 def _extract_player_info(cls, player_url):
1951 for player_re in cls._PLAYER_INFO_RE:
1952 id_m = re.search(player_re, player_url)
1953 if id_m:
1954 break
1955 else:
c081b35c 1956 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1957 return id_m.group('id')
e40c758c 1958
404f611f 1959 def _load_player(self, video_id, player_url, fatal=True):
109dd3b2 1960 player_id = self._extract_player_info(player_url)
1961 if player_id not in self._code_cache:
1276a43a 1962 code = self._download_webpage(
109dd3b2 1963 player_url, video_id, fatal=fatal,
1964 note='Downloading player ' + player_id,
1965 errnote='Download of %s failed' % player_url)
1276a43a 1966 if code:
1967 self._code_cache[player_id] = code
404f611f 1968 return self._code_cache.get(player_id)
109dd3b2 1969
e40c758c 1970 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1971 player_id = self._extract_player_info(player_url)
e0df6211 1972
c4417ddb 1973 # Read from filesystem cache
545cc85d 1974 func_id = 'js_%s_%s' % (
1975 player_id, self._signature_cache_id(example_sig))
c4417ddb 1976 assert os.path.basename(func_id) == func_id
a0e07d31 1977
69ea8ca4 1978 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1979 if cache_spec is not None:
78caa52a 1980 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1981
404f611f 1982 code = self._load_player(video_id, player_url)
1983 if code:
109dd3b2 1984 res = self._parse_sig_js(code)
e0df6211 1985
109dd3b2 1986 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1987 cache_res = res(test_string)
1988 cache_spec = [ord(c) for c in cache_res]
83799698 1989
109dd3b2 1990 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1991 return res
83799698 1992
60064c53 1993 def _print_sig_code(self, func, example_sig):
404f611f 1994 if not self.get_param('youtube_print_sig_code'):
1995 return
1996
edf3e38e
PH
1997 def gen_sig_code(idxs):
1998 def _genslice(start, end, step):
78caa52a 1999 starts = '' if start == 0 else str(start)
8bcc8756 2000 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 2001 steps = '' if step == 1 else (':%d' % step)
78caa52a 2002 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
2003
2004 step = None
7af808a5
PH
2005 # Quelch pyflakes warnings - start will be set when step is set
2006 start = '(Never used)'
edf3e38e
PH
2007 for i, prev in zip(idxs[1:], idxs[:-1]):
2008 if step is not None:
2009 if i - prev == step:
2010 continue
2011 yield _genslice(start, prev, step)
2012 step = None
2013 continue
2014 if i - prev in [-1, 1]:
2015 step = i - prev
2016 start = prev
2017 continue
2018 else:
78caa52a 2019 yield 's[%d]' % prev
edf3e38e 2020 if step is None:
78caa52a 2021 yield 's[%d]' % i
edf3e38e
PH
2022 else:
2023 yield _genslice(start, i, step)
2024
78caa52a 2025 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 2026 cache_res = func(test_string)
edf3e38e 2027 cache_spec = [ord(c) for c in cache_res]
78caa52a 2028 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
2029 signature_id_tuple = '(%s)' % (
2030 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 2031 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 2032 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 2033 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 2034
e0df6211
PH
2035 def _parse_sig_js(self, jscode):
2036 funcname = self._search_regex(
abefc03f
S
2037 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2038 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
2039 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2040 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2041 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2042 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 2043 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
2044 # Obsolete patterns
2045 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 2046 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
2047 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2048 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2049 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2050 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2051 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2052 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 2053 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
2054
2055 jsi = JSInterpreter(jscode)
2056 initial_function = jsi.extract_function(funcname)
e0df6211
PH
2057 return lambda s: initial_function([s])
2058
545cc85d 2059 def _decrypt_signature(self, s, video_id, player_url):
257a2501 2060 """Turn the encrypted s field into a working signature"""
6b37f0be 2061
c8bf86d5 2062 if player_url is None:
69ea8ca4 2063 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 2064
c8bf86d5 2065 try:
62af3a0e 2066 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
2067 if player_id not in self._player_cache:
2068 func = self._extract_signature_function(
60064c53 2069 video_id, player_url, s
c8bf86d5
PH
2070 )
2071 self._player_cache[player_id] = func
2072 func = self._player_cache[player_id]
404f611f 2073 self._print_sig_code(func, s)
c8bf86d5
PH
2074 return func(s)
2075 except Exception as e:
404f611f 2076 raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
2077
2078 def _decrypt_nsig(self, s, video_id, player_url):
2079 """Turn the encrypted n field into a working signature"""
2080 if player_url is None:
2081 raise ExtractorError('Cannot decrypt nsig without player_url')
2082 if player_url.startswith('//'):
2083 player_url = 'https:' + player_url
2084 elif not re.match(r'https?://', player_url):
2085 player_url = compat_urlparse.urljoin(
2086 'https://www.youtube.com', player_url)
2087
2088 sig_id = ('nsig_value', s)
2089 if sig_id in self._player_cache:
2090 return self._player_cache[sig_id]
2091
2092 try:
2093 player_id = ('nsig', player_url)
2094 if player_id not in self._player_cache:
2095 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
2096 func = self._player_cache[player_id]
2097 self._player_cache[sig_id] = func(s)
2098 self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
2099 return self._player_cache[sig_id]
2100 except Exception as e:
aa9369a2 2101 raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
404f611f 2102
2103 def _extract_n_function_name(self, jscode):
2104 return self._search_regex(
2105 (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
2106 jscode, 'Initial JS player n function name', group='nfunc')
2107
2108 def _extract_n_function(self, video_id, player_url):
2109 player_id = self._extract_player_info(player_url)
2110 func_code = self._downloader.cache.load('youtube-nsig', player_id)
2111
2112 if func_code:
2113 jsi = JSInterpreter(func_code)
2114 else:
2115 jscode = self._load_player(video_id, player_url)
2116 funcname = self._extract_n_function_name(jscode)
2117 jsi = JSInterpreter(jscode)
2118 func_code = jsi.extract_function_code(funcname)
2119 self._downloader.cache.store('youtube-nsig', player_id, func_code)
2120
2121 if self.get_param('youtube_print_sig_code'):
2122 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
2123
2124 return lambda s: jsi.extract_function_from_code(*func_code)([s])
e0df6211 2125
109dd3b2 2126 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2127 """
2128 Extract signatureTimestamp (sts)
2129 Required to tell API what sig/player version is in use.
2130 """
2131 sts = None
2132 if isinstance(ytcfg, dict):
2133 sts = int_or_none(ytcfg.get('STS'))
2134
2135 if not sts:
2136 # Attempt to extract from player
2137 if player_url is None:
2138 error_msg = 'Cannot extract signature timestamp without player_url.'
2139 if fatal:
2140 raise ExtractorError(error_msg)
2141 self.report_warning(error_msg)
2142 return
404f611f 2143 code = self._load_player(video_id, player_url, fatal=fatal)
2144 if code:
109dd3b2 2145 sts = int_or_none(self._search_regex(
2146 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2147 'JS player signature timestamp', group='sts', fatal=fatal))
2148 return sts
2149
11f9be09 2150 def _mark_watched(self, video_id, player_responses):
9222c381 2151 playback_url = get_first(
2152 player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2153 expected_type=url_or_none)
d77ab8e2 2154 if not playback_url:
352d63fd 2155 self.report_warning('Unable to mark watched')
d77ab8e2
S
2156 return
2157 parsed_playback_url = compat_urlparse.urlparse(playback_url)
2158 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2159
2160 # cpn generation algorithm is reverse engineered from base.js.
2161 # In fact it works even with dummy cpn.
2162 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2163 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2164
2165 qs.update({
2166 'ver': ['2'],
2167 'cpn': [cpn],
2168 })
2169 playback_url = compat_urlparse.urlunparse(
15707c7e 2170 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
2171
2172 self._download_webpage(
2173 playback_url, video_id, 'Marking watched',
2174 'Unable to mark watched', fatal=False)
2175
66c9fa36
S
2176 @staticmethod
2177 def _extract_urls(webpage):
2178 # Embedded YouTube player
2179 entries = [
2180 unescapeHTML(mobj.group('url'))
2181 for mobj in re.finditer(r'''(?x)
2182 (?:
2183 <iframe[^>]+?src=|
2184 data-video-url=|
2185 <embed[^>]+?src=|
2186 embedSWF\(?:\s*|
2187 <object[^>]+data=|
2188 new\s+SWFObject\(
2189 )
2190 (["\'])
2191 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 2192 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
2193 \1''', webpage)]
2194
2195 # lazyYT YouTube embed
2196 entries.extend(list(map(
2197 unescapeHTML,
2198 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2199
2200 # Wordpress "YouTube Video Importer" plugin
2201 matches = re.findall(r'''(?x)<div[^>]+
2202 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2203 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2204 entries.extend(m[-1] for m in matches)
2205
2206 return entries
2207
2208 @staticmethod
2209 def _extract_url(webpage):
2210 urls = YoutubeIE._extract_urls(webpage)
2211 return urls[0] if urls else None
2212
97665381
PH
2213 @classmethod
2214 def extract_id(cls, url):
2215 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 2216 if mobj is None:
69ea8ca4 2217 raise ExtractorError('Invalid URL: %s' % url)
5ad28e7f 2218 return mobj.group('id')
c5e8d7af 2219
7c365c21 2220 def _extract_chapters_from_json(self, data, duration):
2221 chapter_list = traverse_obj(
2222 data, (
2223 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2224 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2225 ), expected_type=list)
2226
2227 return self._extract_chapters(
2228 chapter_list,
2229 chapter_time=lambda chapter: float_or_none(
2230 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2231 chapter_title=lambda chapter: traverse_obj(
2232 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2233 duration=duration)
2234
2235 def _extract_chapters_from_engagement_panel(self, data, duration):
2236 content_list = traverse_obj(
8bdd16b4 2237 data,
7c365c21 2238 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2239 expected_type=list, default=[])
052e1350 2240 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2241 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 2242
2243 return next((
2244 filter(None, (
2245 self._extract_chapters(
2246 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2247 chapter_time, chapter_title, duration)
2248 for contents in content_list
2249 ))), [])
2250
2251 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 2252 chapters = []
7c365c21 2253 last_chapter = {'start_time': 0}
2254 for idx, chapter in enumerate(chapter_list or []):
2255 title = chapter_title(chapter)
84213ea8
S
2256 start_time = chapter_time(chapter)
2257 if start_time is None:
2258 continue
7c365c21 2259 last_chapter['end_time'] = start_time
2260 if start_time < last_chapter['start_time']:
2261 if idx == 1:
2262 chapters.pop()
2263 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2264 else:
2265 self.report_warning(f'Invalid start time for chapter "{title}"')
2266 continue
2267 last_chapter = {'start_time': start_time, 'title': title}
2268 chapters.append(last_chapter)
2269 last_chapter['end_time'] = duration
84213ea8
S
2270 return chapters
2271
545cc85d 2272 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2273 return self._parse_json(self._search_regex(
2274 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2275 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 2276
a1c5d2ca
M
2277 def _extract_comment(self, comment_renderer, parent=None):
2278 comment_id = comment_renderer.get('commentId')
2279 if not comment_id:
2280 return
fe93e2c4 2281
052e1350 2282 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 2283
49bd8c66 2284 # note: timestamp is an estimate calculated from the current time and time_text
f3aa3c3f 2285 timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
052e1350 2286 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca
M
2287 author_id = try_get(comment_renderer,
2288 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2289
49bd8c66 2290 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2291 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2292 author_thumbnail = try_get(comment_renderer,
2293 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2294
2295 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2296 is_favorited = 'creatorHeart' in (try_get(
2297 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2298 return {
2299 'id': comment_id,
2300 'text': text,
d92f5d5a 2301 'timestamp': timestamp,
a1c5d2ca
M
2302 'time_text': time_text,
2303 'like_count': votes,
97524332 2304 'is_favorited': is_favorited,
a1c5d2ca
M
2305 'author': author,
2306 'author_id': author_id,
2307 'author_thumbnail': author_thumbnail,
2308 'author_is_uploader': author_is_uploader,
2309 'parent': parent or 'root'
2310 }
2311
46383212 2312 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2313
2314 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2d6659b9 2315
2316 def extract_header(contents):
2d6659b9 2317 _continuation = None
2318 for content in contents:
46383212 2319 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
fe93e2c4 2320 expected_comment_count = parse_count(self._get_text(
052e1350 2321 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
fe93e2c4 2322
2d6659b9 2323 if expected_comment_count:
46383212 2324 tracker['est_total'] = expected_comment_count
2325 self.to_screen(f'Downloading ~{expected_comment_count} comments')
2326 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2d6659b9 2327
2328 sort_menu_item = try_get(
2329 comments_header_renderer,
2330 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2331 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2332
2333 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2334 if not _continuation:
2335 continue
2336
46383212 2337 sort_text = str_or_none(sort_menu_item.get('title'))
2338 if not sort_text:
2d6659b9 2339 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
46383212 2340 self.to_screen('Sorting comments by %s' % sort_text.lower())
2d6659b9 2341 break
a2160aa4 2342 return _continuation
a1c5d2ca 2343
2d6659b9 2344 def extract_thread(contents):
a1c5d2ca 2345 if not parent:
46383212 2346 tracker['current_page_thread'] = 0
a1c5d2ca 2347 for content in contents:
46383212 2348 if not parent and tracker['total_parent_comments'] >= max_parents:
2349 yield
a1c5d2ca 2350 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
46383212 2351 comment_renderer = get_first(
2352 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2353 expected_type=dict, default={})
a1c5d2ca 2354
a1c5d2ca
M
2355 comment = self._extract_comment(comment_renderer, parent)
2356 if not comment:
2357 continue
46383212 2358
2359 tracker['running_total'] += 1
2360 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
a1c5d2ca 2361 yield comment
46383212 2362
a1c5d2ca
M
2363 # Attempt to get the replies
2364 comment_replies_renderer = try_get(
2365 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2366
2367 if comment_replies_renderer:
46383212 2368 tracker['current_page_thread'] += 1
a1c5d2ca 2369 comment_entries_iter = self._comment_entries(
99e9e001 2370 comment_replies_renderer, ytcfg, video_id,
46383212 2371 parent=comment.get('id'), tracker=tracker)
2372 for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))):
a1c5d2ca
M
2373 yield reply_comment
2374
46383212 2375 # Keeps track of counts across recursive calls
2376 if not tracker:
2377 tracker = dict(
2378 running_total=0,
2379 est_total=0,
2380 current_page_thread=0,
2381 total_parent_comments=0,
2382 total_reply_comments=0)
2383
2384 # TODO: Deprecated
2d6659b9 2385 # YouTube comments have a max depth of 2
46383212 2386 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
2387 if max_depth:
2388 self._downloader.deprecation_warning(
2389 '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
2d6659b9 2390 if max_depth == 1 and parent:
2391 return
a1c5d2ca 2392
46383212 2393 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
2394 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2d6659b9 2395
46383212 2396 continuation = self._extract_continuation(root_continuation_data)
aae16f6e 2397 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2398 if message and not parent:
2399 self.report_warning(message, video_id=video_id)
2400
46383212 2401 response = None
2d6659b9 2402 is_first_continuation = parent is None
a1c5d2ca
M
2403
2404 for page_num in itertools.count(0):
2405 if not continuation:
2406 break
46383212 2407 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
2408 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2d6659b9 2409 if page_num == 0:
2410 if is_first_continuation:
2411 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2412 else:
2d6659b9 2413 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
46383212 2414 tracker['current_page_thread'], comment_prog_str)
2d6659b9 2415 else:
2416 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2417 ' ' if parent else '', ' replies' if parent else '',
2418 page_num, comment_prog_str)
2419
2420 response = self._extract_response(
fe93e2c4 2421 item_id=None, query=continuation,
2d6659b9 2422 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
46383212 2423 check_get_keys='onResponseReceivedEndpoints')
a1c5d2ca 2424
46383212 2425 continuation_contents = traverse_obj(
2426 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
a1c5d2ca 2427
2d6659b9 2428 continuation = None
46383212 2429 for continuation_section in continuation_contents:
2430 continuation_items = traverse_obj(
2431 continuation_section,
2432 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
2433 get_all=False, expected_type=list) or []
2434 if is_first_continuation:
2435 continuation = extract_header(continuation_items)
2436 is_first_continuation = False
2d6659b9 2437 if continuation:
a1c5d2ca 2438 break
46383212 2439 continue
a1c5d2ca 2440
46383212 2441 for entry in extract_thread(continuation_items):
2442 if not entry:
2443 return
2444 yield entry
2445 continuation = self._extract_continuation({'contents': continuation_items})
2446 if continuation:
2d6659b9 2447 break
a1c5d2ca 2448
a2160aa4 2449 def _get_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2450 """Entry for comment extraction"""
2d6659b9 2451 def _real_comment_extract(contents):
aae16f6e 2452 renderer = next((
2453 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2454 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2455 yield from self._comment_entries(renderer, ytcfg, video_id)
99e9e001 2456
a2160aa4 2457 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
a2160aa4 2458 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
a1c5d2ca 2459
109dd3b2 2460 @staticmethod
99e9e001 2461 def _get_checkok_params():
2462 return {'contentCheckOk': True, 'racyCheckOk': True}
2463
2464 @classmethod
2465 def _generate_player_context(cls, sts=None):
109dd3b2 2466 context = {
2467 'html5Preference': 'HTML5_PREF_WANTS',
2468 }
2469 if sts is not None:
2470 context['signatureTimestamp'] = sts
2471 return {
2472 'playbackContext': {
2473 'contentPlaybackContext': context
a1a7907b 2474 },
99e9e001 2475 **cls._get_checkok_params()
109dd3b2 2476 }
2477
e7e94f2a
D
2478 @staticmethod
2479 def _is_agegated(player_response):
2480 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 2481 return True
e7e94f2a
D
2482
2483 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2484 AGE_GATE_REASONS = (
2485 'confirm your age', 'age-restricted', 'inappropriate', # reason
2486 'age_verification_required', 'age_check_required', # status
2487 )
2488 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2489
2490 @staticmethod
2491 def _is_unplayable(player_response):
2492 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 2493
99e9e001 2494 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
109dd3b2 2495
11f9be09 2496 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2497 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 2498 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 2499 headers = self.generate_api_headers(
99e9e001 2500 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 2501
11f9be09 2502 yt_query = {'videoId': video_id}
2503 yt_query.update(self._generate_player_context(sts))
2504 return self._extract_response(
2505 item_id=video_id, ep='player', query=yt_query,
379e44ed 2506 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 2507 default_client=client,
11f9be09 2508 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2509 ) or None
2510
11f9be09 2511 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 2512 requested_clients = []
d0d012d4 2513 default = ['android', 'web']
000c15a4 2514 allowed_clients = sorted(
2515 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2516 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 2517 for client in self._configuration_arg('player_client'):
2518 if client in allowed_clients:
2519 requested_clients.append(client)
d0d012d4 2520 elif client == 'default':
2521 requested_clients.extend(default)
b4c055ba 2522 elif client == 'all':
2523 requested_clients.extend(allowed_clients)
2524 else:
2525 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 2526 if not requested_clients:
d0d012d4 2527 requested_clients = default
cf7e015f 2528
11f9be09 2529 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2530 requested_clients.extend(
e7e94f2a 2531 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 2532
11f9be09 2533 return orderedSet(requested_clients)
cf7e015f 2534
c0bc527b
M
2535 def _extract_player_ytcfg(self, client, video_id):
2536 url = {
2537 'web_music': 'https://music.youtube.com',
2538 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2539 }.get(client)
2540 if not url:
2541 return {}
2542 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2543 return self.extract_ytcfg(video_id, webpage) or {}
2544
99e9e001 2545 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
11f9be09 2546 initial_pr = None
2547 if webpage:
2548 initial_pr = self._extract_yt_initial_variable(
2549 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2550 video_id, 'initial player response')
6b09401b 2551
c0bc527b
M
2552 original_clients = clients
2553 clients = clients[::-1]
b6de707d 2554 prs = []
e7e94f2a
D
2555
2556 def append_client(client_name):
2557 if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2558 clients.append(client_name)
2559
379e44ed 2560 # Android player_response does not have microFormats which are needed for
2561 # extraction of some data. So we return the initial_pr with formats
2562 # stripped out even if not requested by the user
2563 # See: https://github.com/yt-dlp/yt-dlp/issues/501
379e44ed 2564 if initial_pr:
2565 pr = dict(initial_pr)
2566 pr['streamingData'] = None
b6de707d 2567 prs.append(pr)
379e44ed 2568
2569 last_error = None
b6de707d 2570 tried_iframe_fallback = False
2571 player_url = None
c0bc527b
M
2572 while clients:
2573 client = clients.pop()
11f9be09 2574 player_ytcfg = master_ytcfg if client == 'web' else {}
c0bc527b
M
2575 if 'configs' not in self._configuration_arg('player_skip'):
2576 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
c0bc527b 2577
b6de707d 2578 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2579 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2580 if 'js' in self._configuration_arg('player_skip'):
2581 require_js_player = False
2582 player_url = None
2583
2584 if not player_url and not tried_iframe_fallback and require_js_player:
2585 player_url = self._download_player_url(video_id)
2586 tried_iframe_fallback = True
2587
379e44ed 2588 try:
2589 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
99e9e001 2590 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
379e44ed 2591 except ExtractorError as e:
2592 if last_error:
2593 self.report_warning(last_error)
2594 last_error = e
2595 continue
2596
11f9be09 2597 if pr:
b6de707d 2598 prs.append(pr)
c0bc527b 2599
e7e94f2a 2600 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
99e9e001 2601 if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
e7e94f2a
D
2602 append_client(client.replace('_agegate', '_creator'))
2603 elif self._is_agegated(pr):
2604 append_client(f'{client}_agegate')
c0bc527b 2605
379e44ed 2606 if last_error:
b6de707d 2607 if not len(prs):
379e44ed 2608 raise last_error
2609 self.report_warning(last_error)
b6de707d 2610 return prs, player_url
11f9be09 2611
2612 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
a0bb6ce5 2613 itags, stream_ids = {}, []
2a9c6dcd 2614 itag_qualities, res_qualities = {}, {}
d3fc8074 2615 q = qualities([
2a9c6dcd 2616 # Normally tiny is the smallest video-only formats. But
2617 # audio-only formats with unknown quality may get tagged as tiny
2618 'tiny',
2619 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 2620 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2621 ])
11f9be09 2622 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 2623
545cc85d 2624 for fmt in streaming_formats:
2625 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2626 continue
321bf820 2627
cc2db878 2628 itag = str_or_none(fmt.get('itag'))
9297939e 2629 audio_track = fmt.get('audioTrack') or {}
2630 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2631 if stream_id in stream_ids:
2632 continue
2633
cc2db878 2634 quality = fmt.get('quality')
2a9c6dcd 2635 height = int_or_none(fmt.get('height'))
d3fc8074 2636 if quality == 'tiny' or not quality:
2637 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 2638 # The 3gp format (17) in android client has a quality of "small",
2639 # but is actually worse than other formats
2640 if itag == '17':
2641 quality = 'tiny'
2642 if quality:
2643 if itag:
2644 itag_qualities[itag] = quality
2645 if height:
2646 res_qualities[height] = quality
cc2db878 2647 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2648 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2649 # number of fragment that would subsequently requested with (`&sq=N`)
2650 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2651 continue
2652
545cc85d 2653 fmt_url = fmt.get('url')
2654 if not fmt_url:
2655 sc = compat_parse_qs(fmt.get('signatureCipher'))
2656 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2657 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2658 if not (sc and fmt_url and encrypted_sig):
2659 continue
545cc85d 2660 if not player_url:
201e9eaa 2661 continue
545cc85d 2662 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2663 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2664 fmt_url += '&' + sp + '=' + signature
2665
404f611f 2666 query = parse_qs(fmt_url)
2667 throttled = False
b2916526 2668 if query.get('n'):
404f611f 2669 try:
2670 fmt_url = update_url_query(fmt_url, {
2671 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
2672 except ExtractorError as e:
aa9369a2 2673 self.report_warning(
2674 f'nsig extraction failed: You may experience throttling for some formats\n'
2675 f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
404f611f 2676 throttled = True
2677
545cc85d 2678 if itag:
a0bb6ce5 2679 itags[itag] = 'https'
9297939e 2680 stream_ids.append(stream_id)
2681
cc2db878 2682 tbr = float_or_none(
2683 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2684 dct = {
2685 'asr': int_or_none(fmt.get('audioSampleRate')),
2686 'filesize': int_or_none(fmt.get('contentLength')),
2687 'format_id': itag,
34921b43 2688 'format_note': join_nonempty(
26e8e044 2689 '%s%s' % (audio_track.get('displayName') or '',
2690 ' (default)' if audio_track.get('audioIsDefault') else ''),
404f611f 2691 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
34921b43 2692 throttled and 'THROTTLED', delim=', '),
c18d4482 2693 'source_preference': -10 if throttled else -1,
a4211baf 2694 'fps': int_or_none(fmt.get('fps')) or None,
2a9c6dcd 2695 'height': height,
dca3ff4a 2696 'quality': q(quality),
cc2db878 2697 'tbr': tbr,
545cc85d 2698 'url': fmt_url,
2a9c6dcd 2699 'width': int_or_none(fmt.get('width')),
0fb983f6 2700 'language': audio_track.get('id', '').split('.')[0],
26e8e044 2701 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
545cc85d 2702 }
60bdb7bd 2703 mime_mobj = re.match(
2704 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2705 if mime_mobj:
2706 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2707 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 2708 no_audio = dct.get('acodec') == 'none'
2709 no_video = dct.get('vcodec') == 'none'
2710 if no_audio:
2711 dct['vbr'] = tbr
2712 if no_video:
2713 dct['abr'] = tbr
2714 if no_audio or no_video:
545cc85d 2715 dct['downloader_options'] = {
2716 # Youtube throttles chunks >~10M
2717 'http_chunk_size': 10485760,
bf1317d2 2718 }
7c60c33e 2719 if dct.get('ext'):
2720 dct['container'] = dct['ext'] + '_dash'
11f9be09 2721 yield dct
545cc85d 2722
adbc4ec4 2723 live_from_start = is_live and self.get_param('live_from_start')
4bb6b02f 2724 skip_manifests = self._configuration_arg('skip')
adbc4ec4
THD
2725 if not self.get_param('youtube_include_hls_manifest', True):
2726 skip_manifests.append('hls')
2727 get_dash = 'dash' not in skip_manifests and (
2728 not is_live or live_from_start or self._configuration_arg('include_live_dash'))
2729 get_hls = not live_from_start and 'hls' not in skip_manifests
5d3a0e79 2730
a0bb6ce5 2731 def process_manifest_format(f, proto, itag):
2732 if itag in itags:
2733 if itags[itag] == proto or f'{itag}-{proto}' in itags:
2734 return False
2735 itag = f'{itag}-{proto}'
2736 if itag:
2737 f['format_id'] = itag
2738 itags[itag] = proto
2739
2740 f['quality'] = next((
2741 q(qdict[val])
e339d25a 2742 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
a0bb6ce5 2743 if val in qdict), -1)
2744 return True
2a9c6dcd 2745
11f9be09 2746 for sd in streaming_data:
5d3a0e79 2747 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2748 if hls_manifest_url:
2a9c6dcd 2749 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
a0bb6ce5 2750 if process_manifest_format(f, 'hls', self._search_regex(
2751 r'/itag/(\d+)', f['url'], 'itag', default=None)):
2752 yield f
545cc85d 2753
5d3a0e79 2754 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2755 if dash_manifest_url:
2a9c6dcd 2756 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
a0bb6ce5 2757 if process_manifest_format(f, 'dash', f['format_id']):
2758 f['filesize'] = int_or_none(self._search_regex(
2759 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
adbc4ec4
THD
2760 if live_from_start:
2761 f['is_from_start'] = True
2762
a0bb6ce5 2763 yield f
11f9be09 2764
720c3099 2765 def _extract_storyboard(self, player_responses, duration):
2766 spec = get_first(
2767 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
2768 if not spec:
2769 return
2770 base_url = spec.pop()
2771 L = len(spec) - 1
2772 for i, args in enumerate(spec):
2773 args = args.split('#')
2774 counts = list(map(int_or_none, args[:5]))
2775 if len(args) != 8 or not all(counts):
2776 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
2777 continue
2778 width, height, frame_count, cols, rows = counts
2779 N, sigh = args[6:]
2780
2781 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
2782 fragment_count = frame_count / (cols * rows)
2783 fragment_duration = duration / fragment_count
2784 yield {
2785 'format_id': f'sb{i}',
2786 'format_note': 'storyboard',
2787 'ext': 'mhtml',
2788 'protocol': 'mhtml',
2789 'acodec': 'none',
2790 'vcodec': 'none',
2791 'url': url,
2792 'width': width,
2793 'height': height,
2794 'fragments': [{
2795 'path': url.replace('$M', str(j)),
2796 'duration': min(fragment_duration, duration - (j * fragment_duration)),
2797 } for j in range(math.ceil(fragment_count))],
2798 }
2799
adbc4ec4 2800 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
b6de707d 2801 webpage = None
2802 if 'webpage' not in self._configuration_arg('player_skip'):
2803 webpage = self._download_webpage(
2804 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
11f9be09 2805
2806 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 2807
b6de707d 2808 player_responses, player_url = self._extract_player_responses(
11f9be09 2809 self._get_requested_clients(url, smuggled_data),
99e9e001 2810 video_id, webpage, master_ytcfg)
11f9be09 2811
adbc4ec4
THD
2812 return webpage, master_ytcfg, player_responses, player_url
2813
2814 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url):
2815 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2816 is_live = get_first(video_details, 'isLive')
2817 if is_live is None:
2818 is_live = get_first(live_broadcast_details, 'isLiveNow')
2819
2820 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2821 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2822
2823 return live_broadcast_details, is_live, streaming_data, formats
2824
2825 def _real_extract(self, url):
2826 url, smuggled_data = unsmuggle_url(url, {})
2827 video_id = self._match_id(url)
2828
2829 base_url = self.http_scheme() + '//www.youtube.com/'
2830 webpage_url = base_url + 'watch?v=' + video_id
2831
2832 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2833
11f9be09 2834 playability_statuses = traverse_obj(
2835 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2836
2837 trailer_video_id = get_first(
2838 playability_statuses,
2839 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2840 expected_type=str)
2841 if trailer_video_id:
2842 return self.url_result(
2843 trailer_video_id, self.ie_key(), trailer_video_id)
2844
2845 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2846 if webpage else (lambda x: None))
2847
2848 video_details = traverse_obj(
2849 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2850 microformats = traverse_obj(
2851 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2852 expected_type=dict, default=[])
2853 video_title = (
2854 get_first(video_details, 'title')
2855 or self._get_text(microformats, (..., 'title'))
2856 or search_meta(['og:title', 'twitter:title', 'title']))
2857 video_description = get_first(video_details, 'shortDescription')
2858
d89257f3 2859 multifeed_metadata_list = get_first(
2860 player_responses,
2861 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2862 expected_type=str)
2863 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
2864 if self.get_param('noplaylist'):
11f9be09 2865 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
d89257f3 2866 else:
2867 entries = []
2868 feed_ids = []
2869 for feed in multifeed_metadata_list.split(','):
2870 # Unquote should take place before split on comma (,) since textual
2871 # fields may contain comma as well (see
2872 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2873 feed_data = compat_parse_qs(
2874 compat_urllib_parse_unquote_plus(feed))
2875
2876 def feed_entry(name):
2877 return try_get(
2878 feed_data, lambda x: x[name][0], compat_str)
2879
2880 feed_id = feed_entry('id')
2881 if not feed_id:
2882 continue
2883 feed_title = feed_entry('title')
2884 title = video_title
2885 if feed_title:
2886 title += ' (%s)' % feed_title
2887 entries.append({
2888 '_type': 'url_transparent',
2889 'ie_key': 'Youtube',
2890 'url': smuggle_url(
2891 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2892 {'force_singlefeed': True}),
2893 'title': title,
2894 })
2895 feed_ids.append(feed_id)
2896 self.to_screen(
2897 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2898 % (', '.join(feed_ids), video_id))
2899 return self.playlist_result(
2900 entries, video_id, video_title, video_description)
11f9be09 2901
adbc4ec4 2902 live_broadcast_details, is_live, streaming_data, formats = self._list_formats(video_id, microformats, video_details, player_responses, player_url)
bf1317d2 2903
545cc85d 2904 if not formats:
11f9be09 2905 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 2906 self.report_drm(video_id)
11f9be09 2907 pemr = get_first(
2908 playability_statuses,
2909 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2910 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2911 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 2912 if subreason:
545cc85d 2913 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 2914 countries = get_first(microformats, 'availableCountries')
545cc85d 2915 if not countries:
2916 regions_allowed = search_meta('regionsAllowed')
2917 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2918 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 2919 reason += f'. {subreason}'
545cc85d 2920 if reason:
b7da73eb 2921 self.raise_no_formats(reason, expected=True)
bf1317d2 2922
11f9be09 2923 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 2924 if not keywords and webpage:
2925 keywords = [
2926 unescapeHTML(m.group('content'))
2927 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2928 for keyword in keywords:
2929 if keyword.startswith('yt:stretch='):
201c1459 2930 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2931 if mobj:
2932 # NB: float is intentional for forcing float division
2933 w, h = (float(v) for v in mobj.groups())
2934 if w > 0 and h > 0:
2935 ratio = w / h
2936 for f in formats:
2937 if f.get('vcodec') != 'none':
2938 f['stretched_ratio'] = ratio
2939 break
a709d873 2940 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
ff2751ac 2941 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2942 if thumbnail_url:
2943 thumbnails.append({
2944 'url': thumbnail_url,
ff2751ac 2945 })
fccf5021 2946 original_thumbnails = thumbnails.copy()
2947
0ba692ac 2948 # The best resolution thumbnails sometimes does not appear in the webpage
2949 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 2950 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
e820fbaa 2951 thumbnail_names = [
2952 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
cca80fe6 2953 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2954 'mqdefault', 'mq1', 'mq2', 'mq3',
2955 'default', '1', '2', '3'
2956 ]
cca80fe6 2957 n_thumbnail_names = len(thumbnail_names)
0ba692ac 2958 thumbnails.extend({
2959 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2960 video_id=video_id, name=name, ext=ext,
2961 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 2962 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 2963 for thumb in thumbnails:
cca80fe6 2964 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 2965 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 2966 self._remove_duplicate_formats(thumbnails)
fccf5021 2967 self._downloader._sort_thumbnails(original_thumbnails)
545cc85d 2968
7ea65411 2969 category = get_first(microformats, 'category') or search_meta('genre')
2970 channel_id = str_or_none(
2971 get_first(video_details, 'channelId')
2972 or get_first(microformats, 'externalChannelId')
2973 or search_meta('channelId'))
2974 duration = int_or_none(
2975 get_first(video_details, 'lengthSeconds')
2976 or get_first(microformats, 'lengthSeconds')
2977 or parse_duration(search_meta('duration'))) or None
2978 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2979
2980 live_content = get_first(video_details, 'isLiveContent')
2981 is_upcoming = get_first(video_details, 'isUpcoming')
2982 if is_live is None:
2983 if is_upcoming or live_content is False:
2984 is_live = False
2985 if is_upcoming is None and (live_content or is_live):
2986 is_upcoming = False
adbc4ec4
THD
2987 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2988 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2989 if not duration and live_end_time and live_start_time:
2990 duration = live_end_time - live_start_time
2991
2992 if is_live and self.get_param('live_from_start'):
2993 self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
7ea65411 2994
720c3099 2995 formats.extend(self._extract_storyboard(player_responses, duration))
2996
2997 # Source is given priority since formats that throttle are given lower source_preference
2998 # When throttling issue is fully fixed, remove this
2999 self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
3000
545cc85d 3001 info = {
3002 'id': video_id,
39ca3b5c 3003 'title': video_title,
545cc85d 3004 'formats': formats,
3005 'thumbnails': thumbnails,
fccf5021 3006 # The best thumbnail that we are sure exists. Prevents unnecessary
3007 # URL checking if user don't care about getting the best possible thumbnail
3008 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
545cc85d 3009 'description': video_description,
3010 'upload_date': unified_strdate(
11f9be09 3011 get_first(microformats, 'uploadDate')
545cc85d 3012 or search_meta('uploadDate')),
11f9be09 3013 'uploader': get_first(video_details, 'author'),
545cc85d 3014 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3015 'uploader_url': owner_profile_url,
3016 'channel_id': channel_id,
11f9be09 3017 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
545cc85d 3018 'duration': duration,
3019 'view_count': int_or_none(
11f9be09 3020 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 3021 or search_meta('interactionCount')),
11f9be09 3022 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 3023 'age_limit': 18 if (
11f9be09 3024 get_first(microformats, 'isFamilySafe') is False
545cc85d 3025 or search_meta('isFamilyFriendly') == 'false'
3026 or search_meta('og:restrictions:age') == '18+') else 0,
3027 'webpage_url': webpage_url,
3028 'categories': [category] if category else None,
3029 'tags': keywords,
11f9be09 3030 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 3031 'is_live': is_live,
3032 'was_live': (False if is_live or is_upcoming or live_content is False
3033 else None if is_live is None or is_upcoming is None
3034 else live_content),
3035 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
adbc4ec4 3036 'release_timestamp': live_start_time,
545cc85d 3037 }
b477fc13 3038
3944e7af 3039 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 3040 if pctr:
ecdc9049 3041 def get_lang_code(track):
3042 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3043 or track.get('languageCode'))
3044
3045 # Converted into dicts to remove duplicates
3046 captions = {
3047 get_lang_code(sub): sub
3048 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3049 translation_languages = {
3050 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3051 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3052
774d79cc 3053 def process_language(container, base_url, lang_code, sub_name, query):
120916da 3054 lang_subs = container.setdefault(lang_code, [])
545cc85d 3055 for fmt in self._SUBTITLE_FORMATS:
3056 query.update({
3057 'fmt': fmt,
3058 })
3059 lang_subs.append({
3060 'ext': fmt,
3061 'url': update_url_query(base_url, query),
774d79cc 3062 'name': sub_name,
545cc85d 3063 })
7e72694b 3064
ecdc9049 3065 subtitles, automatic_captions = {}, {}
3066 for lang_code, caption_track in captions.items():
3067 base_url = caption_track.get('baseUrl')
545cc85d 3068 if not base_url:
3069 continue
ecdc9049 3070 lang_name = self._get_text(caption_track, 'name', max_runs=1)
545cc85d 3071 if caption_track.get('kind') != 'asr':
545cc85d 3072 if not lang_code:
3073 continue
3074 process_language(
ecdc9049 3075 subtitles, base_url, lang_code, lang_name, {})
3076 if not caption_track.get('isTranslatable'):
3077 continue
3944e7af 3078 for trans_code, trans_name in translation_languages.items():
3079 if not trans_code:
545cc85d 3080 continue
ecdc9049 3081 if caption_track.get('kind') != 'asr':
3082 trans_code += f'-{lang_code}'
3083 trans_name += format_field(lang_name, template=' from %s')
545cc85d 3084 process_language(
ecdc9049 3085 automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
3086 info['automatic_captions'] = automatic_captions
3087 info['subtitles'] = subtitles
7e72694b 3088
545cc85d 3089 parsed_url = compat_urllib_parse_urlparse(url)
3090 for component in [parsed_url.fragment, parsed_url.query]:
3091 query = compat_parse_qs(component)
3092 for k, v in query.items():
3093 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3094 d_k += '_time'
3095 if d_k not in info and k in s_ks:
3096 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
3097
3098 # Youtube Music Auto-generated description
822b9d9c 3099 if video_description:
38d70284 3100 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 3101 if mobj:
822b9d9c
RA
3102 release_year = mobj.group('release_year')
3103 release_date = mobj.group('release_date')
3104 if release_date:
3105 release_date = release_date.replace('-', '')
3106 if not release_year:
545cc85d 3107 release_year = release_date[:4]
3108 info.update({
3109 'album': mobj.group('album'.strip()),
3110 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3111 'track': mobj.group('track').strip(),
3112 'release_date': release_date,
cc2db878 3113 'release_year': int_or_none(release_year),
545cc85d 3114 })
7e72694b 3115
545cc85d 3116 initial_data = None
3117 if webpage:
3118 initial_data = self._extract_yt_initial_variable(
3119 webpage, self._YT_INITIAL_DATA_RE, video_id,
3120 'yt initial data')
3121 if not initial_data:
99e9e001 3122 query = {'videoId': video_id}
3123 query.update(self._get_checkok_params())
109dd3b2 3124 initial_data = self._extract_response(
3125 item_id=video_id, ep='next', fatal=False,
99e9e001 3126 ytcfg=master_ytcfg, query=query,
3127 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 3128 note='Downloading initial data API JSON')
545cc85d 3129
c60ee3a2 3130 try:
3131 # This will error if there is no livechat
3132 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
ecdc9049 3133 info.setdefault('subtitles', {})['live_chat'] = [{
c60ee3a2 3134 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
3135 'video_id': video_id,
3136 'ext': 'json',
f6745c49 3137 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 3138 }]
3139 except (KeyError, IndexError, TypeError):
3140 pass
545cc85d 3141
3142 if initial_data:
7c365c21 3143 info['chapters'] = (
3144 self._extract_chapters_from_json(initial_data, duration)
3145 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3146 or None)
545cc85d 3147
3148 contents = try_get(
3149 initial_data,
3150 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3151 list) or []
3152 for content in contents:
3153 vpir = content.get('videoPrimaryInfoRenderer')
3154 if vpir:
3155 stl = vpir.get('superTitleLink')
3156 if stl:
fe93e2c4 3157 stl = self._get_text(stl)
545cc85d 3158 if try_get(
3159 vpir,
3160 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3161 info['location'] = stl
3162 else:
3163 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3164 if mobj:
3165 info.update({
3166 'series': mobj.group(1),
3167 'season_number': int(mobj.group(2)),
3168 'episode_number': int(mobj.group(3)),
3169 })
3170 for tlb in (try_get(
3171 vpir,
3172 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3173 list) or []):
3174 tbr = tlb.get('toggleButtonRenderer') or {}
3175 for getter, regex in [(
3176 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3177 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3178 lambda x: x['accessibility'],
3179 lambda x: x['accessibilityData']['accessibilityData'],
3180 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3181 label = (try_get(tbr, getter, dict) or {}).get('label')
3182 if label:
3183 mobj = re.match(regex, label)
3184 if mobj:
3185 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3186 break
3187 sbr_tooltip = try_get(
3188 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3189 if sbr_tooltip:
3190 like_count, dislike_count = sbr_tooltip.split(' / ')
3191 info.update({
3192 'like_count': str_to_int(like_count),
3193 'dislike_count': str_to_int(dislike_count),
3194 })
3195 vsir = content.get('videoSecondaryInfoRenderer')
3196 if vsir:
052e1350 3197 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
545cc85d 3198 rows = try_get(
3199 vsir,
3200 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3201 list) or []
3202 multiple_songs = False
3203 for row in rows:
3204 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3205 multiple_songs = True
3206 break
3207 for row in rows:
3208 mrr = row.get('metadataRowRenderer') or {}
3209 mrr_title = mrr.get('title')
3210 if not mrr_title:
3211 continue
052e1350 3212 mrr_title = self._get_text(mrr, 'title')
3213 mrr_contents_text = self._get_text(mrr, ('contents', 0))
545cc85d 3214 if mrr_title == 'License':
3215 info['license'] = mrr_contents_text
3216 elif not multiple_songs:
3217 if mrr_title == 'Album':
3218 info['album'] = mrr_contents_text
3219 elif mrr_title == 'Artist':
3220 info['artist'] = mrr_contents_text
3221 elif mrr_title == 'Song':
3222 info['track'] = mrr_contents_text
3223
3224 fallbacks = {
3225 'channel': 'uploader',
3226 'channel_id': 'uploader_id',
3227 'channel_url': 'uploader_url',
3228 }
3229 for to, frm in fallbacks.items():
3230 if not info.get(to):
3231 info[to] = info.get(frm)
3232
3233 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3234 v = info.get(s_k)
3235 if v:
3236 info[d_k] = v
b84071c0 3237
11f9be09 3238 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3239 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3240 is_membersonly = None
b28f8d24 3241 is_premium = None
c224251a
M
3242 if initial_data and is_private is not None:
3243 is_membersonly = False
b28f8d24 3244 is_premium = False
47193e02 3245 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3246 badge_labels = set()
3247 for content in contents:
3248 if not isinstance(content, dict):
3249 continue
3250 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3251 for badge_label in badge_labels:
3252 if badge_label.lower() == 'members only':
3253 is_membersonly = True
3254 elif badge_label.lower() == 'premium':
3255 is_premium = True
3256 elif badge_label.lower() == 'unlisted':
3257 is_unlisted = True
c224251a 3258
c224251a
M
3259 info['availability'] = self._availability(
3260 is_private=is_private,
b28f8d24 3261 needs_premium=is_premium,
c224251a
M
3262 needs_subscription=is_membersonly,
3263 needs_auth=info['age_limit'] >= 18,
3264 is_unlisted=None if is_private is None else is_unlisted)
3265
a2160aa4 3266 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3267
11f9be09 3268 self.mark_watched(video_id, player_responses)
d77ab8e2 3269
545cc85d 3270 return info
c5e8d7af 3271
a61fd4cf 3272
a6213a49 3273class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
5f6a1245 3274
a6213a49 3275 def _extract_channel_id(self, webpage):
3276 channel_id = self._html_search_meta(
3277 'channelId', webpage, 'channel id', default=None)
3278 if channel_id:
3279 return channel_id
3280 channel_url = self._html_search_meta(
3281 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3282 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3283 'twitter:app:url:googleplay'), webpage, 'channel url')
3284 return self._search_regex(
3285 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3286 channel_url, 'channel id')
15f6397c 3287
8bdd16b4 3288 @staticmethod
cd7c66cf 3289 def _extract_basic_item_renderer(item):
3290 # Modified from _extract_grid_item_renderer
201c1459 3291 known_basic_renderers = (
3292 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3293 )
3294 for key, renderer in item.items():
201c1459 3295 if not isinstance(renderer, dict):
cd7c66cf 3296 continue
201c1459 3297 elif key in known_basic_renderers:
3298 return renderer
3299 elif key.startswith('grid') and key.endswith('Renderer'):
3300 return renderer
8bdd16b4 3301
8bdd16b4 3302 def _grid_entries(self, grid_renderer):
3303 for item in grid_renderer['items']:
3304 if not isinstance(item, dict):
39b62db1 3305 continue
cd7c66cf 3306 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3307 if not isinstance(renderer, dict):
3308 continue
052e1350 3309 title = self._get_text(renderer, 'title')
fe93e2c4 3310
8bdd16b4 3311 # playlist
3312 playlist_id = renderer.get('playlistId')
3313 if playlist_id:
3314 yield self.url_result(
3315 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3316 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3317 video_title=title)
201c1459 3318 continue
8bdd16b4 3319 # video
3320 video_id = renderer.get('videoId')
3321 if video_id:
3322 yield self._extract_video(renderer)
201c1459 3323 continue
8bdd16b4 3324 # channel
3325 channel_id = renderer.get('channelId')
3326 if channel_id:
8bdd16b4 3327 yield self.url_result(
3328 'https://www.youtube.com/channel/%s' % channel_id,
3329 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3330 continue
3331 # generic endpoint URL support
3332 ep_url = urljoin('https://www.youtube.com/', try_get(
3333 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3334 compat_str))
3335 if ep_url:
3336 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3337 if ie.suitable(ep_url):
3338 yield self.url_result(
3339 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3340 break
8bdd16b4 3341
3d3dddc9 3342 def _shelf_entries_from_content(self, shelf_renderer):
3343 content = shelf_renderer.get('content')
3344 if not isinstance(content, dict):
8bdd16b4 3345 return
cd7c66cf 3346 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3347 if renderer:
3348 # TODO: add support for nested playlists so each shelf is processed
3349 # as separate playlist
3350 # TODO: this includes only first N items
3351 for entry in self._grid_entries(renderer):
3352 yield entry
3353 renderer = content.get('horizontalListRenderer')
3354 if renderer:
3355 # TODO
3356 pass
8bdd16b4 3357
29f7c58a 3358 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3359 ep = try_get(
3360 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3361 compat_str)
3362 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3363 if shelf_url:
29f7c58a 3364 # Skipping links to another channels, note that checking for
3365 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3366 # will not work
3367 if skip_channels and '/channels?' in shelf_url:
3368 return
052e1350 3369 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 3370 yield self.url_result(shelf_url, video_title=title)
3371 # Shelf may not contain shelf URL, fallback to extraction from content
3372 for entry in self._shelf_entries_from_content(shelf_renderer):
3373 yield entry
c5e8d7af 3374
8bdd16b4 3375 def _playlist_entries(self, video_list_renderer):
3376 for content in video_list_renderer['contents']:
3377 if not isinstance(content, dict):
3378 continue
3379 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3380 if not isinstance(renderer, dict):
3381 continue
3382 video_id = renderer.get('videoId')
3383 if not video_id:
3384 continue
3385 yield self._extract_video(renderer)
07aeced6 3386
3462ffa8 3387 def _rich_entries(self, rich_grid_renderer):
3388 renderer = try_get(
70d5c17b 3389 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3390 video_id = renderer.get('videoId')
3391 if not video_id:
3392 return
3393 yield self._extract_video(renderer)
3394
8bdd16b4 3395 def _video_entry(self, video_renderer):
3396 video_id = video_renderer.get('videoId')
3397 if video_id:
3398 return self._extract_video(video_renderer)
dacb3a86 3399
8bdd16b4 3400 def _post_thread_entries(self, post_thread_renderer):
3401 post_renderer = try_get(
3402 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3403 if not post_renderer:
3404 return
3405 # video attachment
3406 video_renderer = try_get(
895b0931 3407 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3408 video_id = video_renderer.get('videoId')
3409 if video_id:
3410 entry = self._extract_video(video_renderer)
8bdd16b4 3411 if entry:
3412 yield entry
895b0931 3413 # playlist attachment
3414 playlist_id = try_get(
3415 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3416 if playlist_id:
3417 yield self.url_result(
e28f1c0a 3418 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3419 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3420 # inline video links
3421 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3422 for run in runs:
3423 if not isinstance(run, dict):
3424 continue
3425 ep_url = try_get(
3426 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3427 if not ep_url:
3428 continue
3429 if not YoutubeIE.suitable(ep_url):
3430 continue
3431 ep_video_id = YoutubeIE._match_id(ep_url)
3432 if video_id == ep_video_id:
3433 continue
895b0931 3434 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3435
8bdd16b4 3436 def _post_thread_continuation_entries(self, post_thread_continuation):
3437 contents = post_thread_continuation.get('contents')
3438 if not isinstance(contents, list):
3439 return
3440 for content in contents:
3441 renderer = content.get('backstagePostThreadRenderer')
3442 if not isinstance(renderer, dict):
3443 continue
3444 for entry in self._post_thread_entries(renderer):
3445 yield entry
07aeced6 3446
39ed931e 3447 r''' # unused
3448 def _rich_grid_entries(self, contents):
3449 for content in contents:
3450 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3451 if video_renderer:
3452 entry = self._video_entry(video_renderer)
3453 if entry:
3454 yield entry
3455 '''
a6213a49 3456 def _extract_entries(self, parent_renderer, continuation_list):
3457 # continuation_list is modified in-place with continuation_list = [continuation_token]
3458 continuation_list[:] = [None]
3459 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3460 for content in contents:
3461 if not isinstance(content, dict):
3462 continue
3463 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3464 if not is_renderer:
3465 renderer = content.get('richItemRenderer')
3466 if renderer:
3467 for entry in self._rich_entries(renderer):
3468 yield entry
3469 continuation_list[0] = self._extract_continuation(parent_renderer)
3470 continue
3471 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3472 for isr_content in isr_contents:
3473 if not isinstance(isr_content, dict):
8bdd16b4 3474 continue
69184e41 3475
a6213a49 3476 known_renderers = {
3477 'playlistVideoListRenderer': self._playlist_entries,
3478 'gridRenderer': self._grid_entries,
3479 'shelfRenderer': lambda x: self._shelf_entries(x),
3480 'backstagePostThreadRenderer': self._post_thread_entries,
3481 'videoRenderer': lambda x: [self._video_entry(x)],
a61fd4cf 3482 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
3483 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
a6213a49 3484 }
3485 for key, renderer in isr_content.items():
3486 if key not in known_renderers:
3487 continue
3488 for entry in known_renderers[key](renderer):
3489 if entry:
3490 yield entry
3491 continuation_list[0] = self._extract_continuation(renderer)
3492 break
70d5c17b 3493
3494 if not continuation_list[0]:
a6213a49 3495 continuation_list[0] = self._extract_continuation(is_renderer)
3462ffa8 3496
a6213a49 3497 if not continuation_list[0]:
3498 continuation_list[0] = self._extract_continuation(parent_renderer)
3499
3500 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
3501 continuation_list = [None]
3502 extract_entries = lambda x: self._extract_entries(x, continuation_list)
29f7c58a 3503 tab_content = try_get(tab, lambda x: x['content'], dict)
3504 if not tab_content:
3505 return
3462ffa8 3506 parent_renderer = (
29f7c58a 3507 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3508 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3509 for entry in extract_entries(parent_renderer):
3510 yield entry
3462ffa8 3511 continuation = continuation_list[0]
d069eca7 3512
8bdd16b4 3513 for page_num in itertools.count(1):
3514 if not continuation:
3515 break
99e9e001 3516 headers = self.generate_api_headers(
3517 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 3518 response = self._extract_response(
3519 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3520 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3521 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3522
3523 if not response:
8bdd16b4 3524 break
ac56cf38 3525 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
3526 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
3527 visitor_data = self._extract_visitor_data(response) or visitor_data
ebf1b291 3528
69184e41 3529 known_continuation_renderers = {
3530 'playlistVideoListContinuation': self._playlist_entries,
3531 'gridContinuation': self._grid_entries,
3532 'itemSectionContinuation': self._post_thread_continuation_entries,
3533 'sectionListContinuation': extract_entries, # for feeds
3534 }
8bdd16b4 3535 continuation_contents = try_get(
69184e41 3536 response, lambda x: x['continuationContents'], dict) or {}
3537 continuation_renderer = None
3538 for key, value in continuation_contents.items():
3539 if key not in known_continuation_renderers:
3462ffa8 3540 continue
69184e41 3541 continuation_renderer = value
3542 continuation_list = [None]
3543 for entry in known_continuation_renderers[key](continuation_renderer):
3544 yield entry
3545 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3546 break
3547 if continuation_renderer:
3548 continue
c5e8d7af 3549
a1b535bd 3550 known_renderers = {
3551 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3552 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 3553 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 3554 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3555 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3556 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3557 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3558 }
cce889b9 3559 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3560 continuation_items = try_get(
cce889b9 3561 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3562 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3563 video_items_renderer = None
3564 for key, value in continuation_item.items():
3565 if key not in known_renderers:
8bdd16b4 3566 continue
a1b535bd 3567 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3568 continuation_list = [None]
a1b535bd 3569 for entry in known_renderers[key][0](video_items_renderer):
3570 yield entry
9ba5705a 3571 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3572 break
3573 if video_items_renderer:
3574 continue
8bdd16b4 3575 break
9558dcec 3576
8bdd16b4 3577 @staticmethod
3578 def _extract_selected_tab(tabs):
3579 for tab in tabs:
cd684175 3580 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3581 if renderer.get('selected') is True:
3582 return renderer
2b3c2546 3583 else:
8bdd16b4 3584 raise ExtractorError('Unable to find selected tab')
b82f815f 3585
47193e02 3586 @classmethod
3587 def _extract_uploader(cls, data):
8bdd16b4 3588 uploader = {}
47193e02 3589 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3590 owner = try_get(
3591 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3592 if owner:
3593 uploader['uploader'] = owner.get('text')
3594 uploader['uploader_id'] = try_get(
3595 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3596 uploader['uploader_url'] = urljoin(
3597 'https://www.youtube.com/',
3598 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3599 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3600
ac56cf38 3601 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
b60419c5 3602 playlist_id = title = description = channel_url = channel_name = channel_id = None
ac56cf38 3603 tags = []
b60419c5 3604
8bdd16b4 3605 selected_tab = self._extract_selected_tab(tabs)
3606 renderer = try_get(
3607 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3608 if renderer:
b60419c5 3609 channel_name = renderer.get('title')
3610 channel_url = renderer.get('channelUrl')
3611 channel_id = renderer.get('externalId')
39ed931e 3612 else:
64c0d954 3613 renderer = try_get(
3614 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3615
8bdd16b4 3616 if renderer:
3617 title = renderer.get('title')
ecc97af3 3618 description = renderer.get('description', '')
b60419c5 3619 playlist_id = channel_id
3620 tags = renderer.get('keywords', '').split()
b60419c5 3621
a709d873 3622 thumbnails = (
3623 self._extract_thumbnails(renderer, 'avatar')
3624 or self._extract_thumbnails(
3625 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3626 ('thumbnailRenderer', 'playlistVideoThumbnailRenderer', 'thumbnail')))
3627
3462ffa8 3628 if playlist_id is None:
70d5c17b 3629 playlist_id = item_id
3630 if title is None:
39ed931e 3631 title = (
3632 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3633 or playlist_id)
b60419c5 3634 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3635 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3636 metadata = {
3637 'playlist_id': playlist_id,
3638 'playlist_title': title,
3639 'playlist_description': description,
3640 'uploader': channel_name,
3641 'uploader_id': channel_id,
3642 'uploader_url': channel_url,
3643 'thumbnails': thumbnails,
3644 'tags': tags,
3645 }
47193e02 3646 availability = self._extract_availability(data)
3647 if availability:
3648 metadata['availability'] = availability
b60419c5 3649 if not channel_id:
3650 metadata.update(self._extract_uploader(data))
3651 metadata.update({
3652 'channel': metadata['uploader'],
3653 'channel_id': metadata['uploader_id'],
3654 'channel_url': metadata['uploader_url']})
3655 return self.playlist_result(
d069eca7 3656 self._entries(
ac56cf38 3657 selected_tab, playlist_id, ytcfg,
3658 self._extract_account_syncid(ytcfg, data),
3659 self._extract_visitor_data(data, ytcfg)),
b60419c5 3660 **metadata)
73c4ac2c 3661
ac56cf38 3662 def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
3663 first_id = last_id = response = None
2be71994 3664 for page_num in itertools.count(1):
cd7c66cf 3665 videos = list(self._playlist_entries(playlist))
3666 if not videos:
3667 return
2be71994 3668 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3669 if start >= len(videos):
3670 return
3671 for video in videos[start:]:
3672 if video['id'] == first_id:
3673 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3674 return
3675 yield video
3676 first_id = first_id or videos[0]['id']
3677 last_id = videos[-1]['id']
79360d99 3678 watch_endpoint = try_get(
3679 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
ac56cf38 3680 headers = self.generate_api_headers(
3681 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3682 visitor_data=self._extract_visitor_data(response, data, ytcfg))
79360d99 3683 query = {
3684 'playlistId': playlist_id,
3685 'videoId': watch_endpoint.get('videoId') or last_id,
3686 'index': watch_endpoint.get('index') or len(videos),
3687 'params': watch_endpoint.get('params') or 'OAE%3D'
3688 }
3689 response = self._extract_response(
3690 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 3691 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 3692 check_get_keys='contents'
3693 )
cd7c66cf 3694 playlist = try_get(
79360d99 3695 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3696
ac56cf38 3697 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
8bdd16b4 3698 title = playlist.get('title') or try_get(
3699 data, lambda x: x['titleText']['simpleText'], compat_str)
3700 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3701
3702 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3703 playlist_url = urljoin(url, try_get(
3704 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3705 compat_str))
3706 if playlist_url and playlist_url != url:
3707 return self.url_result(
3708 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3709 video_title=title)
cd7c66cf 3710
8bdd16b4 3711 return self.playlist_result(
ac56cf38 3712 self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
cd7c66cf 3713 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3714
47193e02 3715 def _extract_availability(self, data):
3716 """
3717 Gets the availability of a given playlist/tab.
3718 Note: Unless YouTube tells us explicitly, we do not assume it is public
3719 @param data: response
3720 """
3721 is_private = is_unlisted = None
3722 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3723 badge_labels = self._extract_badges(renderer)
3724
3725 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3726 privacy_dropdown_entries = try_get(
3727 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3728 for renderer_dict in privacy_dropdown_entries:
3729 is_selected = try_get(
3730 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3731 if not is_selected:
3732 continue
052e1350 3733 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
47193e02 3734 if label:
3735 badge_labels.add(label.lower())
3736 break
3737
3738 for badge_label in badge_labels:
3739 if badge_label == 'unlisted':
3740 is_unlisted = True
3741 elif badge_label == 'private':
3742 is_private = True
3743 elif badge_label == 'public':
3744 is_unlisted = is_private = False
3745 return self._availability(is_private, False, False, False, is_unlisted)
3746
3747 @staticmethod
3748 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
3749 sidebar_renderer = try_get(
3750 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
3751 for item in sidebar_renderer:
3752 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
3753 if renderer:
3754 return renderer
3755
ac56cf38 3756 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
358de58c 3757 """
3758 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3759 """
5d342002 3760 browse_id = params = None
47193e02 3761 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
3762 if not renderer:
3763 return
3764 menu_renderer = try_get(
3765 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3766 for menu_item in menu_renderer:
3767 if not isinstance(menu_item, dict):
358de58c 3768 continue
47193e02 3769 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3770 text = try_get(
3771 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3772 if not text or text.lower() != 'show unavailable videos':
3773 continue
3774 browse_endpoint = try_get(
3775 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3776 browse_id = browse_endpoint.get('browseId')
3777 params = browse_endpoint.get('params')
3778 break
5d342002 3779
11f9be09 3780 headers = self.generate_api_headers(
99e9e001 3781 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
ac56cf38 3782 visitor_data=self._extract_visitor_data(data, ytcfg))
47193e02 3783 query = {
3784 'params': params or 'wgYCCAA=',
3785 'browseId': browse_id or 'VL%s' % item_id
3786 }
3787 return self._extract_response(
3788 item_id=item_id, headers=headers, query=query,
fe93e2c4 3789 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 3790 note='Downloading API JSON with unavailable videos')
358de58c 3791
ac56cf38 3792 def _extract_webpage(self, url, item_id, fatal=True):
a06916d9 3793 retries = self.get_param('extractor_retries', 3)
62bff2c1 3794 count = -1
ac56cf38 3795 webpage = data = last_error = None
14fdfea9 3796 while count < retries:
62bff2c1 3797 count += 1
14fdfea9 3798 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3799 # See: https://github.com/yt-dlp/yt-dlp/issues/116
ac56cf38 3800 if last_error:
c705177d 3801 self.report_warning('%s. Retrying ...' % last_error)
ac56cf38 3802 try:
3803 webpage = self._download_webpage(
3804 url, item_id,
3805 note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
3806 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
3807 except ExtractorError as e:
3808 if isinstance(e.cause, network_exceptions):
3809 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
3810 last_error = error_to_compat_str(e.cause or e.msg)
3811 if count < retries:
3812 continue
3813 if fatal:
3814 raise
3815 self.report_warning(error_to_compat_str(e))
14fdfea9 3816 break
ac56cf38 3817 else:
3818 try:
3819 self._extract_and_report_alerts(data)
3820 except ExtractorError as e:
3821 if fatal:
3822 raise
3823 self.report_warning(error_to_compat_str(e))
3824 break
3825
3826 if dict_get(data, ('contents', 'currentVideoEndpoint')):
3827 break
3828
3829 last_error = 'Incomplete yt initial data received'
3830 if count >= retries:
3831 if fatal:
3832 raise ExtractorError(last_error)
3833 self.report_warning(last_error)
3834 break
3835
cd7c66cf 3836 return webpage, data
3837
ac56cf38 3838 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
3839 data = None
3840 if 'webpage' not in self._configuration_arg('skip'):
3841 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
3842 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
3843 if not data:
3844 if not ytcfg and self.is_authenticated:
3845 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
3846 if 'authcheck' not in self._configuration_arg('skip') and fatal:
3847 raise ExtractorError(
3848 msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
3849 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
3850 expected=True)
3851 self.report_warning(msg, only_once=True)
3852 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
3853 return data, ytcfg
3854
3855 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
3856 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
3857 resolve_response = self._extract_response(
3858 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
3859 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
3860 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
3861 for ep_key, ep in endpoints.items():
3862 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
3863 if params:
3864 return self._extract_response(
3865 item_id=item_id, query=params, ep=ep, headers=headers,
3866 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
3867 check_get_keys=('contents', 'currentVideoEndpoint'))
3868 err_note = 'Failed to resolve url (does the playlist exist?)'
3869 if fatal:
3870 raise ExtractorError(err_note, expected=True)
3871 self.report_warning(err_note, item_id)
3872
a6213a49 3873 @staticmethod
3874 def _smuggle_data(entries, data):
3875 for entry in entries:
3876 if data:
3877 entry['url'] = smuggle_url(entry['url'], data)
3878 yield entry
3879
3880 _SEARCH_PARAMS = None
3881
3882 def _search_results(self, query, params=NO_DEFAULT):
3883 data = {'query': query}
3884 if params is NO_DEFAULT:
3885 params = self._SEARCH_PARAMS
3886 if params:
3887 data['params'] = params
a61fd4cf 3888 continuation_list = [None]
a6213a49 3889 for page_num in itertools.count(1):
a61fd4cf 3890 data.update(continuation_list[0] or {})
a6213a49 3891 search = self._extract_response(
3892 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
a61fd4cf 3893 check_get_keys=('contents', 'onResponseReceivedCommands'))
a6213a49 3894 slr_contents = try_get(
3895 search,
3896 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3897 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3898 list)
a61fd4cf 3899 yield from self._extract_entries({'contents': slr_contents}, continuation_list)
3900 if not continuation_list[0]:
a6213a49 3901 break
3902
3903
3904class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
3905 IE_DESC = 'YouTube Tabs'
3906 _VALID_URL = r'''(?x:
3907 https?://
3908 (?:\w+\.)?
3909 (?:
3910 youtube(?:kids)?\.com|
3911 %(invidious)s
3912 )/
3913 (?:
3914 (?P<channel_type>channel|c|user|browse)/|
3915 (?P<not_channel>
3916 feed/|hashtag/|
3917 (?:playlist|watch)\?.*?\blist=
3918 )|
3919 (?!(?:%(reserved_names)s)\b) # Direct URLs
3920 )
3921 (?P<id>[^/?\#&]+)
3922 )''' % {
3923 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
3924 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
3925 }
3926 IE_NAME = 'youtube:tab'
3927
3928 _TESTS = [{
3929 'note': 'playlists, multipage',
3930 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3931 'playlist_mincount': 94,
3932 'info_dict': {
3933 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3934 'title': 'Игорь Клейнер - Playlists',
3935 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3936 'uploader': 'Игорь Клейнер',
3937 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3938 },
3939 }, {
3940 'note': 'playlists, multipage, different order',
3941 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3942 'playlist_mincount': 94,
3943 'info_dict': {
3944 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3945 'title': 'Игорь Клейнер - Playlists',
3946 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3947 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3948 'uploader': 'Игорь Клейнер',
3949 },
3950 }, {
3951 'note': 'playlists, series',
3952 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3953 'playlist_mincount': 5,
3954 'info_dict': {
3955 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3956 'title': '3Blue1Brown - Playlists',
3957 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3958 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3959 'uploader': '3Blue1Brown',
3960 },
3961 }, {
3962 'note': 'playlists, singlepage',
3963 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3964 'playlist_mincount': 4,
3965 'info_dict': {
3966 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3967 'title': 'ThirstForScience - Playlists',
3968 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3969 'uploader': 'ThirstForScience',
3970 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3971 }
3972 }, {
3973 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3974 'only_matching': True,
3975 }, {
3976 'note': 'basic, single video playlist',
3977 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3978 'info_dict': {
3979 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3980 'uploader': 'Sergey M.',
3981 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3982 'title': 'youtube-dl public playlist',
3983 },
3984 'playlist_count': 1,
3985 }, {
3986 'note': 'empty playlist',
3987 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3988 'info_dict': {
3989 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3990 'uploader': 'Sergey M.',
3991 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3992 'title': 'youtube-dl empty playlist',
3993 },
3994 'playlist_count': 0,
3995 }, {
3996 'note': 'Home tab',
3997 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3998 'info_dict': {
3999 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4000 'title': 'lex will - Home',
4001 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4002 'uploader': 'lex will',
4003 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4004 },
4005 'playlist_mincount': 2,
4006 }, {
4007 'note': 'Videos tab',
4008 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
4009 'info_dict': {
4010 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4011 'title': 'lex will - Videos',
4012 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4013 'uploader': 'lex will',
4014 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4015 },
4016 'playlist_mincount': 975,
4017 }, {
4018 'note': 'Videos tab, sorted by popular',
4019 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
4020 'info_dict': {
4021 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4022 'title': 'lex will - Videos',
4023 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4024 'uploader': 'lex will',
4025 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4026 },
4027 'playlist_mincount': 199,
4028 }, {
4029 'note': 'Playlists tab',
4030 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
4031 'info_dict': {
4032 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4033 'title': 'lex will - Playlists',
4034 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4035 'uploader': 'lex will',
4036 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4037 },
4038 'playlist_mincount': 17,
4039 }, {
4040 'note': 'Community tab',
4041 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
4042 'info_dict': {
4043 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4044 'title': 'lex will - Community',
4045 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4046 'uploader': 'lex will',
4047 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4048 },
4049 'playlist_mincount': 18,
4050 }, {
4051 'note': 'Channels tab',
4052 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
4053 'info_dict': {
4054 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4055 'title': 'lex will - Channels',
4056 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4057 'uploader': 'lex will',
4058 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4059 },
4060 'playlist_mincount': 12,
4061 }, {
4062 'note': 'Search tab',
4063 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
4064 'playlist_mincount': 40,
4065 'info_dict': {
4066 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4067 'title': '3Blue1Brown - Search - linear algebra',
4068 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4069 'uploader': '3Blue1Brown',
4070 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4071 },
4072 }, {
4073 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4074 'only_matching': True,
4075 }, {
4076 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4077 'only_matching': True,
4078 }, {
4079 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4080 'only_matching': True,
4081 }, {
4082 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
4083 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4084 'info_dict': {
4085 'title': '29C3: Not my department',
4086 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4087 'uploader': 'Christiaan008',
4088 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4089 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
4090 },
4091 'playlist_count': 96,
4092 }, {
4093 'note': 'Large playlist',
4094 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
4095 'info_dict': {
4096 'title': 'Uploads from Cauchemar',
4097 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
4098 'uploader': 'Cauchemar',
4099 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
4100 },
4101 'playlist_mincount': 1123,
4102 }, {
4103 'note': 'even larger playlist, 8832 videos',
4104 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
4105 'only_matching': True,
4106 }, {
4107 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
4108 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
4109 'info_dict': {
4110 'title': 'Uploads from Interstellar Movie',
4111 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4112 'uploader': 'Interstellar Movie',
4113 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4114 },
4115 'playlist_mincount': 21,
4116 }, {
4117 'note': 'Playlist with "show unavailable videos" button',
4118 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
4119 'info_dict': {
4120 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
4121 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
4122 'uploader': 'Phim Siêu Nhân Nhật Bản',
4123 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
4124 },
4125 'playlist_mincount': 200,
4126 }, {
4127 'note': 'Playlist with unavailable videos in page 7',
4128 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
4129 'info_dict': {
4130 'title': 'Uploads from BlankTV',
4131 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
4132 'uploader': 'BlankTV',
4133 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
4134 },
4135 'playlist_mincount': 1000,
4136 }, {
4137 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
4138 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4139 'info_dict': {
4140 'title': 'Data Analysis with Dr Mike Pound',
4141 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4142 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
4143 'uploader': 'Computerphile',
4144 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
4145 },
4146 'playlist_mincount': 11,
4147 }, {
4148 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4149 'only_matching': True,
4150 }, {
4151 'note': 'Playlist URL that does not actually serve a playlist',
4152 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
4153 'info_dict': {
4154 'id': 'FqZTN594JQw',
4155 'ext': 'webm',
4156 'title': "Smiley's People 01 detective, Adventure Series, Action",
4157 'uploader': 'STREEM',
4158 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
4159 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
4160 'upload_date': '20150526',
4161 'license': 'Standard YouTube License',
4162 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
4163 'categories': ['People & Blogs'],
4164 'tags': list,
4165 'view_count': int,
4166 'like_count': int,
4167 'dislike_count': int,
4168 },
4169 'params': {
4170 'skip_download': True,
4171 },
4172 'skip': 'This video is not available.',
4173 'add_ie': [YoutubeIE.ie_key()],
4174 }, {
4175 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
4176 'only_matching': True,
4177 }, {
4178 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
4179 'only_matching': True,
4180 }, {
4181 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
4182 'info_dict': {
4183 'id': '3yImotZU3tw', # This will keep changing
4184 'ext': 'mp4',
4185 'title': compat_str,
4186 'uploader': 'Sky News',
4187 'uploader_id': 'skynews',
4188 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
4189 'upload_date': r're:\d{8}',
4190 'description': compat_str,
4191 'categories': ['News & Politics'],
4192 'tags': list,
4193 'like_count': int,
4194 'dislike_count': int,
4195 },
4196 'params': {
4197 'skip_download': True,
4198 },
4199 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
4200 }, {
4201 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
4202 'info_dict': {
4203 'id': 'a48o2S1cPoo',
4204 'ext': 'mp4',
4205 'title': 'The Young Turks - Live Main Show',
4206 'uploader': 'The Young Turks',
4207 'uploader_id': 'TheYoungTurks',
4208 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
4209 'upload_date': '20150715',
4210 'license': 'Standard YouTube License',
4211 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
4212 'categories': ['News & Politics'],
4213 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
4214 'like_count': int,
4215 'dislike_count': int,
4216 },
4217 'params': {
4218 'skip_download': True,
4219 },
4220 'only_matching': True,
4221 }, {
4222 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
4223 'only_matching': True,
4224 }, {
4225 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
4226 'only_matching': True,
4227 }, {
4228 'note': 'A channel that is not live. Should raise error',
4229 'url': 'https://www.youtube.com/user/numberphile/live',
4230 'only_matching': True,
4231 }, {
4232 'url': 'https://www.youtube.com/feed/trending',
4233 'only_matching': True,
4234 }, {
4235 'url': 'https://www.youtube.com/feed/library',
4236 'only_matching': True,
4237 }, {
4238 'url': 'https://www.youtube.com/feed/history',
4239 'only_matching': True,
4240 }, {
4241 'url': 'https://www.youtube.com/feed/subscriptions',
4242 'only_matching': True,
4243 }, {
4244 'url': 'https://www.youtube.com/feed/watch_later',
4245 'only_matching': True,
4246 }, {
4247 'note': 'Recommended - redirects to home page.',
4248 'url': 'https://www.youtube.com/feed/recommended',
4249 'only_matching': True,
4250 }, {
4251 'note': 'inline playlist with not always working continuations',
4252 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
4253 'only_matching': True,
4254 }, {
4255 'url': 'https://www.youtube.com/course',
4256 'only_matching': True,
4257 }, {
4258 'url': 'https://www.youtube.com/zsecurity',
4259 'only_matching': True,
4260 }, {
4261 'url': 'http://www.youtube.com/NASAgovVideo/videos',
4262 'only_matching': True,
4263 }, {
4264 'url': 'https://www.youtube.com/TheYoungTurks/live',
4265 'only_matching': True,
4266 }, {
4267 'url': 'https://www.youtube.com/hashtag/cctv9',
4268 'info_dict': {
4269 'id': 'cctv9',
4270 'title': '#cctv9',
4271 },
4272 'playlist_mincount': 350,
4273 }, {
4274 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
4275 'only_matching': True,
4276 }, {
4277 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
4278 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4279 'only_matching': True
4280 }, {
4281 'note': '/browse/ should redirect to /channel/',
4282 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
4283 'only_matching': True
4284 }, {
4285 'note': 'VLPL, should redirect to playlist?list=PL...',
4286 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4287 'info_dict': {
4288 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4289 'uploader': 'NoCopyrightSounds',
4290 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
4291 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
4292 'title': 'NCS Releases',
4293 },
4294 'playlist_mincount': 166,
4295 }, {
4296 'note': 'Topic, should redirect to playlist?list=UU...',
4297 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4298 'info_dict': {
4299 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4300 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4301 'title': 'Uploads from Royalty Free Music - Topic',
4302 'uploader': 'Royalty Free Music - Topic',
4303 },
4304 'expected_warnings': [
4305 'A channel/user page was given',
4306 'The URL does not have a videos tab',
4307 ],
4308 'playlist_mincount': 101,
4309 }, {
4310 'note': 'Topic without a UU playlist',
4311 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
4312 'info_dict': {
4313 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
4314 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
4315 },
4316 'expected_warnings': [
4317 'A channel/user page was given',
4318 'The URL does not have a videos tab',
4319 'Falling back to channel URL',
4320 ],
4321 'playlist_mincount': 9,
4322 }, {
4323 'note': 'Youtube music Album',
4324 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
4325 'info_dict': {
4326 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
4327 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
4328 },
4329 'playlist_count': 50,
4330 }, {
4331 'note': 'unlisted single video playlist',
4332 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4333 'info_dict': {
4334 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
4335 'uploader': 'colethedj',
4336 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4337 'title': 'yt-dlp unlisted playlist test',
4338 'availability': 'unlisted'
4339 },
4340 'playlist_count': 1,
4341 }, {
4342 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
4343 'url': 'https://www.youtube.com/feed/recommended',
4344 'info_dict': {
4345 'id': 'recommended',
4346 'title': 'recommended',
4347 },
4348 'playlist_mincount': 50,
4349 'params': {
4350 'skip_download': True,
4351 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4352 },
4353 }, {
4354 'note': 'API Fallback: /videos tab, sorted by oldest first',
4355 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
4356 'info_dict': {
4357 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4358 'title': 'Cody\'sLab - Videos',
4359 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
4360 'uploader': 'Cody\'sLab',
4361 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4362 },
4363 'playlist_mincount': 650,
4364 'params': {
4365 'skip_download': True,
4366 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4367 },
4368 }, {
4369 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
4370 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4371 'info_dict': {
4372 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4373 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4374 'title': 'Uploads from Royalty Free Music - Topic',
4375 'uploader': 'Royalty Free Music - Topic',
4376 },
4377 'expected_warnings': [
4378 'A channel/user page was given',
4379 'The URL does not have a videos tab',
4380 ],
4381 'playlist_mincount': 101,
4382 'params': {
4383 'skip_download': True,
4384 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4385 },
4386 }]
4387
4388 @classmethod
4389 def suitable(cls, url):
4390 return False if YoutubeIE.suitable(url) else super(
4391 YoutubeTabIE, cls).suitable(url)
9297939e 4392
cd7c66cf 4393 def _real_extract(self, url):
9297939e 4394 url, smuggled_data = unsmuggle_url(url, {})
4395 if self.is_music_url(url):
4396 smuggled_data['is_music_url'] = True
fe03a6cd 4397 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4398 if info_dict.get('entries'):
4399 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4400 return info_dict
4401
37e57a9f 4402 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$')
fe03a6cd 4403
4404 def __real_extract(self, url, smuggled_data):
cd7c66cf 4405 item_id = self._match_id(url)
4406 url = compat_urlparse.urlunparse(
4407 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4408 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4409
fe03a6cd 4410 def get_mobj(url):
37e57a9f 4411 mobj = self._URL_RE.match(url).groupdict()
07cce701 4412 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4413 return mobj
4414
37e57a9f 4415 mobj, redirect_warning = get_mobj(url), None
fe03a6cd 4416 # Youtube returns incomplete data if tabname is not lower case
4417 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
fe03a6cd 4418 if is_channel:
4419 if smuggled_data.get('is_music_url'):
37e57a9f 4420 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
fe03a6cd 4421 item_id = item_id[2:]
37e57a9f 4422 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
4423 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
ac56cf38 4424 mdata = self._extract_tab_endpoint(
37e57a9f 4425 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
4426 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
4427 get_all=False, expected_type=compat_str)
ac56cf38 4428 if not murl:
37e57a9f 4429 raise ExtractorError('Failed to resolve album to playlist')
ac56cf38 4430 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
37e57a9f 4431 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
4432 pre = f'https://www.youtube.com/channel/{item_id}'
4433
fe03a6cd 4434 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4435 # Home URLs should redirect to /videos/
37e57a9f 4436 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
4437 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4438 tab = '/videos'
4439
4440 url = ''.join((pre, tab, post))
4441 mobj = get_mobj(url)
cd7c66cf 4442
4443 # Handle both video/playlist URLs
201c1459 4444 qs = parse_qs(url)
37e57a9f 4445 video_id, playlist_id = [qs.get(key, [None])[0] for key in ('v', 'list')]
cd7c66cf 4446
fe03a6cd 4447 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4448 if not playlist_id:
fe03a6cd 4449 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4450 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4451 # Common mistake: https://www.youtube.com/watch?list=playlist_id
37e57a9f 4452 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
4453 url = f'https://www.youtube.com/playlist?list={playlist_id}'
18db7548 4454 mobj = get_mobj(url)
cd7c66cf 4455
4456 if video_id and playlist_id:
a06916d9 4457 if self.get_param('noplaylist'):
37e57a9f 4458 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
4459 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
4460 ie=YoutubeIE.ie_key(), video_id=video_id)
4461 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
cd7c66cf 4462
ac56cf38 4463 data, ytcfg = self._extract_data(url, item_id)
14fdfea9 4464
37e57a9f 4465 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
18db7548 4466 if tabs:
4467 selected_tab = self._extract_selected_tab(tabs)
4468 tab_name = selected_tab.get('title', '')
09f1580e 4469 if 'no-youtube-channel-redirect' not in compat_opts:
4470 if mobj['tab'] == '/live':
4471 # Live tab should have redirected to the video
4472 raise ExtractorError('The channel is not currently live', expected=True)
4473 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
37e57a9f 4474 redirect_warning = f'The URL does not have a {mobj["tab"][1:]} tab'
09f1580e 4475 if not mobj['not_channel'] and item_id[:2] == 'UC':
4476 # Topic channels don't have /videos. Use the equivalent playlist instead
37e57a9f 4477 pl_id = f'UU{item_id[2:]}'
4478 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
09f1580e 4479 try:
37e57a9f 4480 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True)
09f1580e 4481 except ExtractorError:
37e57a9f 4482 redirect_warning += ' and the playlist redirect gave error'
4483 else:
4484 item_id, url, tab_name = pl_id, pl_url, mobj['tab'][1:]
4485 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
4486 if tab_name.lower() != mobj['tab'][1:]:
4487 redirect_warning += f'. {tab_name} tab is being downloaded instead'
18db7548 4488
37e57a9f 4489 if redirect_warning:
4490 self.report_warning(redirect_warning)
4491 self.write_debug(f'Final URL: {url}')
18db7548 4492
358de58c 4493 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4494 if 'no-youtube-unavailable-videos' not in compat_opts:
ac56cf38 4495 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
c0ac49bc 4496 self._extract_and_report_alerts(data, only_once=True)
37e57a9f 4497 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
8bdd16b4 4498 if tabs:
ac56cf38 4499 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
cd7c66cf 4500
37e57a9f 4501 playlist = traverse_obj(
4502 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
8bdd16b4 4503 if playlist:
ac56cf38 4504 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
cd7c66cf 4505
37e57a9f 4506 video_id = traverse_obj(
4507 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
8bdd16b4 4508 if video_id:
09f1580e 4509 if mobj['tab'] != '/live': # live tab is expected to redirect to video
37e57a9f 4510 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
4511 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
4512 ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4513
8bdd16b4 4514 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4515
c5e8d7af 4516
8bdd16b4 4517class YoutubePlaylistIE(InfoExtractor):
96565c7e 4518 IE_DESC = 'YouTube playlists'
8bdd16b4 4519 _VALID_URL = r'''(?x)(?:
4520 (?:https?://)?
4521 (?:\w+\.)?
4522 (?:
4523 (?:
4524 youtube(?:kids)?\.com|
d9190e44 4525 %(invidious)s
8bdd16b4 4526 )
4527 /.*?\?.*?\blist=
4528 )?
4529 (?P<id>%(playlist_id)s)
d9190e44
RH
4530 )''' % {
4531 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
4532 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4533 }
8bdd16b4 4534 IE_NAME = 'youtube:playlist'
cdc628a4 4535 _TESTS = [{
8bdd16b4 4536 'note': 'issue #673',
4537 'url': 'PLBB231211A4F62143',
cdc628a4 4538 'info_dict': {
8bdd16b4 4539 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4540 'id': 'PLBB231211A4F62143',
4541 'uploader': 'Wickydoo',
4542 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 4543 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
8bdd16b4 4544 },
4545 'playlist_mincount': 29,
4546 }, {
4547 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4548 'info_dict': {
4549 'title': 'YDL_safe_search',
4550 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4551 },
4552 'playlist_count': 2,
4553 'skip': 'This playlist is private',
9558dcec 4554 }, {
8bdd16b4 4555 'note': 'embedded',
4556 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4557 'playlist_count': 4,
9558dcec 4558 'info_dict': {
8bdd16b4 4559 'title': 'JODA15',
4560 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4561 'uploader': 'milan',
4562 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4563 }
cdc628a4 4564 }, {
8bdd16b4 4565 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
11f9be09 4566 'playlist_mincount': 654,
8bdd16b4 4567 'info_dict': {
4568 'title': '2018 Chinese New Singles (11/6 updated)',
4569 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4570 'uploader': 'LBK',
4571 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 4572 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
8bdd16b4 4573 }
daa0df9e 4574 }, {
29f7c58a 4575 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4576 'only_matching': True,
4577 }, {
4578 # music album playlist
4579 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4580 'only_matching': True,
4581 }]
4582
4583 @classmethod
4584 def suitable(cls, url):
201c1459 4585 if YoutubeTabIE.suitable(url):
4586 return False
49a57e70 4587 from ..utils import parse_qs
201c1459 4588 qs = parse_qs(url)
4589 if qs.get('v', [None])[0]:
4590 return False
4591 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4592
4593 def _real_extract(self, url):
4594 playlist_id = self._match_id(url)
46953e7e 4595 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4596 url = update_url_query(
4597 'https://www.youtube.com/playlist',
4598 parse_qs(url) or {'list': playlist_id})
4599 if is_music_url:
4600 url = smuggle_url(url, {'is_music_url': True})
4601 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4602
4603
4604class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4605 IE_DESC = 'youtu.be'
29f7c58a 4606 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4607 _TESTS = [{
8bdd16b4 4608 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4609 'info_dict': {
4610 'id': 'yeWKywCrFtk',
4611 'ext': 'mp4',
4612 'title': 'Small Scale Baler and Braiding Rugs',
4613 'uploader': 'Backus-Page House Museum',
4614 'uploader_id': 'backuspagemuseum',
4615 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4616 'upload_date': '20161008',
4617 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4618 'categories': ['Nonprofits & Activism'],
4619 'tags': list,
4620 'like_count': int,
4621 'dislike_count': int,
4622 },
4623 'params': {
4624 'noplaylist': True,
4625 'skip_download': True,
4626 },
39e7107d 4627 }, {
8bdd16b4 4628 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4629 'only_matching': True,
cdc628a4
PH
4630 }]
4631
8bdd16b4 4632 def _real_extract(self, url):
5ad28e7f 4633 mobj = self._match_valid_url(url)
29f7c58a 4634 video_id = mobj.group('id')
4635 playlist_id = mobj.group('playlist_id')
8bdd16b4 4636 return self.url_result(
29f7c58a 4637 update_url_query('https://www.youtube.com/watch', {
4638 'v': video_id,
4639 'list': playlist_id,
4640 'feature': 'youtu.be',
4641 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4642
4643
4644class YoutubeYtUserIE(InfoExtractor):
96565c7e 4645 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
8bdd16b4 4646 _VALID_URL = r'ytuser:(?P<id>.+)'
4647 _TESTS = [{
4648 'url': 'ytuser:phihag',
4649 'only_matching': True,
4650 }]
4651
4652 def _real_extract(self, url):
4653 user_id = self._match_id(url)
4654 return self.url_result(
c586f9e8 4655 'https://www.youtube.com/user/%s/videos' % user_id,
8bdd16b4 4656 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4657
b05654f0 4658
3d3dddc9 4659class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4660 IE_NAME = 'youtube:favorites'
96565c7e 4661 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
70d5c17b 4662 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4663 _LOGIN_REQUIRED = True
4664 _TESTS = [{
4665 'url': ':ytfav',
4666 'only_matching': True,
4667 }, {
4668 'url': ':ytfavorites',
4669 'only_matching': True,
4670 }]
4671
4672 def _real_extract(self, url):
4673 return self.url_result(
4674 'https://www.youtube.com/playlist?list=LL',
4675 ie=YoutubeTabIE.ie_key())
4676
4677
a6213a49 4678class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
4679 IE_DESC = 'YouTube search'
78caa52a 4680 IE_NAME = 'youtube:search'
b05654f0 4681 _SEARCH_KEY = 'ytsearch'
a61fd4cf 4682 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
9dd8e46a 4683 _TESTS = []
b05654f0 4684
a61fd4cf 4685
5f7cb91a 4686class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
cb7fb546 4687 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4688 _SEARCH_KEY = 'ytsearchdate'
a6213a49 4689 IE_DESC = 'YouTube search, newest videos first'
a61fd4cf 4690 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
75dff0ee 4691
c9ae7b95 4692
a6213a49 4693class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
96565c7e 4694 IE_DESC = 'YouTube search URLs with sorting and filter support'
386e1dd9 4695 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4696 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
3462ffa8 4697 _TESTS = [{
4698 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4699 'playlist_mincount': 5,
4700 'info_dict': {
11f9be09 4701 'id': 'youtube-dl test video',
3462ffa8 4702 'title': 'youtube-dl test video',
4703 }
a61fd4cf 4704 }, {
4705 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
4706 'playlist_mincount': 5,
4707 'info_dict': {
4708 'id': 'python',
4709 'title': 'python',
4710 }
4711
3462ffa8 4712 }, {
4713 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4714 'only_matching': True,
4715 }]
4716
4717 def _real_extract(self, url):
4dfbf869 4718 qs = parse_qs(url)
386e1dd9 4719 query = (qs.get('search_query') or qs.get('q'))[0]
a6213a49 4720 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
3462ffa8 4721
4722
4723class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4724 """
25f14e9f 4725 Base class for feed extractors
3d3dddc9 4726 Subclasses must define the _FEED_NAME property.
d7ae0639 4727 """
b2e8bc1b 4728 _LOGIN_REQUIRED = True
ef2f3c7f 4729 _TESTS = []
d7ae0639
JMF
4730
4731 @property
4732 def IE_NAME(self):
78caa52a 4733 return 'youtube:%s' % self._FEED_NAME
04cc9617 4734
3853309f 4735 def _real_extract(self, url):
3d3dddc9 4736 return self.url_result(
4737 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4738 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4739
4740
ef2f3c7f 4741class YoutubeWatchLaterIE(InfoExtractor):
4742 IE_NAME = 'youtube:watchlater'
96565c7e 4743 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
3d3dddc9 4744 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4745 _TESTS = [{
8bdd16b4 4746 'url': ':ytwatchlater',
bc7a9cd8
S
4747 'only_matching': True,
4748 }]
25f14e9f
S
4749
4750 def _real_extract(self, url):
ef2f3c7f 4751 return self.url_result(
4752 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4753
4754
25f14e9f 4755class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
96565c7e 4756 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
3d3dddc9 4757 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4758 _FEED_NAME = 'recommended'
45db527f 4759 _LOGIN_REQUIRED = False
3d3dddc9 4760 _TESTS = [{
4761 'url': ':ytrec',
4762 'only_matching': True,
4763 }, {
4764 'url': ':ytrecommended',
4765 'only_matching': True,
4766 }, {
4767 'url': 'https://youtube.com',
4768 'only_matching': True,
4769 }]
1ed5b5c9 4770
1ed5b5c9 4771
25f14e9f 4772class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
96565c7e 4773 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
3d3dddc9 4774 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4775 _FEED_NAME = 'subscriptions'
3d3dddc9 4776 _TESTS = [{
4777 'url': ':ytsubs',
4778 'only_matching': True,
4779 }, {
4780 'url': ':ytsubscriptions',
4781 'only_matching': True,
4782 }]
1ed5b5c9 4783
1ed5b5c9 4784
25f14e9f 4785class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
96565c7e 4786 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
a5c56234 4787 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4788 _FEED_NAME = 'history'
3d3dddc9 4789 _TESTS = [{
4790 'url': ':ythistory',
4791 'only_matching': True,
4792 }]
1ed5b5c9
JMF
4793
4794
15870e90
PH
4795class YoutubeTruncatedURLIE(InfoExtractor):
4796 IE_NAME = 'youtube:truncated_url'
4797 IE_DESC = False # Do not list
975d35db 4798 _VALID_URL = r'''(?x)
b95aab84
PH
4799 (?:https?://)?
4800 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4801 (?:watch\?(?:
c4808c60 4802 feature=[a-z_]+|
b95aab84
PH
4803 annotation_id=annotation_[^&]+|
4804 x-yt-cl=[0-9]+|
c1708b89 4805 hl=[^&]*|
287be8c6 4806 t=[0-9]+
b95aab84
PH
4807 )?
4808 |
4809 attribution_link\?a=[^&]+
4810 )
4811 $
975d35db 4812 '''
15870e90 4813
c4808c60 4814 _TESTS = [{
2d3d2997 4815 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4816 'only_matching': True,
dc2fc736 4817 }, {
2d3d2997 4818 'url': 'https://www.youtube.com/watch?',
dc2fc736 4819 'only_matching': True,
b95aab84
PH
4820 }, {
4821 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4822 'only_matching': True,
4823 }, {
4824 'url': 'https://www.youtube.com/watch?feature=foo',
4825 'only_matching': True,
c1708b89
PH
4826 }, {
4827 'url': 'https://www.youtube.com/watch?hl=en-GB',
4828 'only_matching': True,
287be8c6
PH
4829 }, {
4830 'url': 'https://www.youtube.com/watch?t=2372',
4831 'only_matching': True,
c4808c60
PH
4832 }]
4833
15870e90
PH
4834 def _real_extract(self, url):
4835 raise ExtractorError(
78caa52a
PH
4836 'Did you forget to quote the URL? Remember that & is a meta '
4837 'character in most shells, so you want to put the URL in quotes, '
3867038a 4838 'like youtube-dl '
2d3d2997 4839 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4840 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4841 expected=True)
772fd5cc
PH
4842
4843
3cd786db 4844class YoutubeClipIE(InfoExtractor):
4845 IE_NAME = 'youtube:clip'
4846 IE_DESC = False # Do not list
4847 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4848
4849 def _real_extract(self, url):
4850 self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4851 return self.url_result(url, 'Generic')
4852
4853
772fd5cc
PH
4854class YoutubeTruncatedIDIE(InfoExtractor):
4855 IE_NAME = 'youtube:truncated_id'
4856 IE_DESC = False # Do not list
b95aab84 4857 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4858
4859 _TESTS = [{
4860 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4861 'only_matching': True,
4862 }]
4863
4864 def _real_extract(self, url):
4865 video_id = self._match_id(url)
4866 raise ExtractorError(
4867 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4868 expected=True)