]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[PatreonUser] Do not capture RSS URLs
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
2d6659b9 5import base64
d92f5d5a 6import calendar
109dd3b2 7import copy
fe93e2c4 8import datetime
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
720c3099 12import math
c4417ddb 13import os.path
d77ab8e2 14import random
c5e8d7af 15import re
8a784c74 16import time
e0df6211 17import traceback
c5e8d7af 18
b05654f0 19from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 20from ..compat import (
edf3e38e 21 compat_chr,
29f7c58a 22 compat_HTTPError,
c5e8d7af 23 compat_parse_qs,
545cc85d 24 compat_str,
7fd002c0 25 compat_urllib_parse_unquote_plus,
15707c7e 26 compat_urllib_parse_urlencode,
7c80519c 27 compat_urllib_parse_urlparse,
7c61bd36 28 compat_urlparse,
4bb4a188 29)
545cc85d 30from ..jsinterp import JSInterpreter
4bb4a188 31from ..utils import (
720c3099 32 bug_reports_message,
2d6659b9 33 bytes_to_intlist,
c5e8d7af 34 clean_html,
d92f5d5a 35 datetime_from_str,
11f9be09 36 dict_get,
358de58c 37 error_to_compat_str,
c5e8d7af 38 ExtractorError,
2d30521a 39 float_or_none,
11f9be09 40 format_field,
dd27fd17 41 int_or_none,
2d6659b9 42 intlist_to_bytes,
641ad5d8 43 is_html,
34921b43 44 join_nonempty,
94278f72 45 mimetype2ext,
9c0d7f49 46 network_exceptions,
11f9be09 47 orderedSet,
6310acf5 48 parse_codecs,
49bd8c66 49 parse_count,
7c80519c 50 parse_duration,
7ea65411 51 parse_iso8601,
4dfbf869 52 parse_qs,
dca3ff4a 53 qualities,
c0ac49bc 54 remove_end,
3995d37d 55 remove_start,
cf7e015f 56 smuggle_url,
dbdaaa23 57 str_or_none,
c93d53f5 58 str_to_int,
7c365c21 59 traverse_obj,
556dbe7f 60 try_get,
c5e8d7af
PH
61 unescapeHTML,
62 unified_strdate,
cf7e015f 63 unsmuggle_url,
8bdd16b4 64 update_url_query,
21c340b8 65 url_or_none,
fe93e2c4 66 urljoin,
7c365c21 67 variadic,
c5e8d7af
PH
68)
69
5f6a1245 70
720c3099 71def get_first(obj, keys, **kwargs):
72 return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
73
74
000c15a4 75# any clients starting with _ cannot be explicity requested by the user
76INNERTUBE_CLIENTS = {
77 'web': {
78 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
79 'INNERTUBE_CONTEXT': {
80 'client': {
81 'clientName': 'WEB',
82 'clientVersion': '2.20210622.10.00',
83 }
84 },
85 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
86 },
87 'web_embedded': {
88 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
89 'INNERTUBE_CONTEXT': {
90 'client': {
91 'clientName': 'WEB_EMBEDDED_PLAYER',
92 'clientVersion': '1.20210620.0.1',
93 },
94 },
95 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
96 },
97 'web_music': {
98 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
99 'INNERTUBE_HOST': 'music.youtube.com',
100 'INNERTUBE_CONTEXT': {
101 'client': {
102 'clientName': 'WEB_REMIX',
103 'clientVersion': '1.20210621.00.00',
104 }
105 },
106 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
107 },
e7e94f2a
D
108 'web_creator': {
109 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
110 'INNERTUBE_CONTEXT': {
111 'client': {
112 'clientName': 'WEB_CREATOR',
113 'clientVersion': '1.20210621.00.00',
114 }
115 },
116 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
117 },
000c15a4 118 'android': {
119 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
120 'INNERTUBE_CONTEXT': {
121 'client': {
122 'clientName': 'ANDROID',
123 'clientVersion': '16.20',
124 }
125 },
126 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
b6de707d 127 'REQUIRE_JS_PLAYER': False
000c15a4 128 },
129 'android_embedded': {
130 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
131 'INNERTUBE_CONTEXT': {
132 'client': {
133 'clientName': 'ANDROID_EMBEDDED_PLAYER',
134 'clientVersion': '16.20',
135 },
136 },
b6de707d 137 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
138 'REQUIRE_JS_PLAYER': False
000c15a4 139 },
140 'android_music': {
141 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
142 'INNERTUBE_HOST': 'music.youtube.com',
143 'INNERTUBE_CONTEXT': {
144 'client': {
145 'clientName': 'ANDROID_MUSIC',
146 'clientVersion': '4.32',
147 }
148 },
149 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
b6de707d 150 'REQUIRE_JS_PLAYER': False
000c15a4 151 },
e7e94f2a
D
152 'android_creator': {
153 'INNERTUBE_CONTEXT': {
154 'client': {
155 'clientName': 'ANDROID_CREATOR',
156 'clientVersion': '21.24.100',
157 },
158 },
b6de707d 159 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
160 'REQUIRE_JS_PLAYER': False
e7e94f2a 161 },
3619f78d 162 # ios has HLS live streams
163 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
000c15a4 164 'ios': {
165 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
166 'INNERTUBE_CONTEXT': {
167 'client': {
168 'clientName': 'IOS',
169 'clientVersion': '16.20',
170 }
171 },
b6de707d 172 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
173 'REQUIRE_JS_PLAYER': False
000c15a4 174 },
175 'ios_embedded': {
176 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
177 'INNERTUBE_CONTEXT': {
178 'client': {
179 'clientName': 'IOS_MESSAGES_EXTENSION',
180 'clientVersion': '16.20',
181 },
182 },
b6de707d 183 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
184 'REQUIRE_JS_PLAYER': False
000c15a4 185 },
186 'ios_music': {
187 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
188 'INNERTUBE_HOST': 'music.youtube.com',
189 'INNERTUBE_CONTEXT': {
190 'client': {
191 'clientName': 'IOS_MUSIC',
192 'clientVersion': '4.32',
193 },
194 },
b6de707d 195 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
196 'REQUIRE_JS_PLAYER': False
000c15a4 197 },
e7e94f2a
D
198 'ios_creator': {
199 'INNERTUBE_CONTEXT': {
200 'client': {
201 'clientName': 'IOS_CREATOR',
202 'clientVersion': '21.24.100',
203 },
204 },
b6de707d 205 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
206 'REQUIRE_JS_PLAYER': False
e7e94f2a 207 },
3619f78d 208 # mweb has 'ultralow' formats
209 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 210 'mweb': {
211 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
212 'INNERTUBE_CONTEXT': {
213 'client': {
214 'clientName': 'MWEB',
215 'clientVersion': '2.20210721.07.00',
216 }
217 },
218 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
219 },
220}
221
222
223def build_innertube_clients():
65c2fde2 224 third_party = {
225 'embedUrl': 'https://google.com', # Can be any valid URL
226 }
000c15a4 227 base_clients = ('android', 'web', 'ios', 'mweb')
228 priority = qualities(base_clients[::-1])
229
230 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 231 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 232 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 233 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 234 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
235 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
236
237 if client in base_clients:
238 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
239 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
65c2fde2 240 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 241 agegate_ytcfg['priority'] -= 1
242 elif client.endswith('_embedded'):
65c2fde2 243 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 244 ytcfg['priority'] -= 2
245 else:
246 ytcfg['priority'] -= 3
247
248
249build_innertube_clients()
250
251
de7f3446 252class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 253 """Provide base functions for Youtube extractors"""
e00eb564 254
3462ffa8 255 _RESERVED_NAMES = (
3cd786db 256 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
3619f78d 257 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
258 r'browse|oembed|get_video_info|iframe_api|s/player|'
cd7c66cf 259 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 260
3619f78d 261 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
262
b2e8bc1b 263 _NETRC_MACHINE = 'youtube'
3619f78d 264
b2e8bc1b
JMF
265 # If True it will raise an error if no login info is provided
266 _LOGIN_REQUIRED = False
267
d9190e44
RH
268 _INVIDIOUS_SITES = (
269 # invidious-redirect websites
270 r'(?:www\.)?redirect\.invidious\.io',
271 r'(?:(?:www|dev)\.)?invidio\.us',
272 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
273 r'(?:www\.)?invidious\.pussthecat\.org',
274 r'(?:www\.)?invidious\.zee\.li',
275 r'(?:www\.)?invidious\.ethibox\.fr',
276 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
277 # youtube-dl invidious instances list
278 r'(?:(?:www|no)\.)?invidiou\.sh',
279 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
280 r'(?:www\.)?invidious\.kabi\.tk',
281 r'(?:www\.)?invidious\.mastodon\.host',
282 r'(?:www\.)?invidious\.zapashcanon\.fr',
283 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
284 r'(?:www\.)?invidious\.tinfoil-hat\.net',
285 r'(?:www\.)?invidious\.himiko\.cloud',
286 r'(?:www\.)?invidious\.reallyancient\.tech',
287 r'(?:www\.)?invidious\.tube',
288 r'(?:www\.)?invidiou\.site',
289 r'(?:www\.)?invidious\.site',
290 r'(?:www\.)?invidious\.xyz',
291 r'(?:www\.)?invidious\.nixnet\.xyz',
292 r'(?:www\.)?invidious\.048596\.xyz',
293 r'(?:www\.)?invidious\.drycat\.fr',
294 r'(?:www\.)?inv\.skyn3t\.in',
295 r'(?:www\.)?tube\.poal\.co',
296 r'(?:www\.)?tube\.connect\.cafe',
297 r'(?:www\.)?vid\.wxzm\.sx',
298 r'(?:www\.)?vid\.mint\.lgbt',
299 r'(?:www\.)?vid\.puffyan\.us',
300 r'(?:www\.)?yewtu\.be',
301 r'(?:www\.)?yt\.elukerio\.org',
302 r'(?:www\.)?yt\.lelux\.fi',
303 r'(?:www\.)?invidious\.ggc-project\.de',
304 r'(?:www\.)?yt\.maisputain\.ovh',
305 r'(?:www\.)?ytprivate\.com',
306 r'(?:www\.)?invidious\.13ad\.de',
307 r'(?:www\.)?invidious\.toot\.koeln',
308 r'(?:www\.)?invidious\.fdn\.fr',
309 r'(?:www\.)?watch\.nettohikari\.com',
310 r'(?:www\.)?invidious\.namazso\.eu',
311 r'(?:www\.)?invidious\.silkky\.cloud',
312 r'(?:www\.)?invidious\.exonip\.de',
313 r'(?:www\.)?invidious\.riverside\.rocks',
314 r'(?:www\.)?invidious\.blamefran\.net',
315 r'(?:www\.)?invidious\.moomoo\.de',
316 r'(?:www\.)?ytb\.trom\.tf',
317 r'(?:www\.)?yt\.cyberhost\.uk',
318 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
319 r'(?:www\.)?qklhadlycap4cnod\.onion',
320 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
321 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
322 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
323 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
324 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
325 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
326 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
327 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
328 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
329 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
330 )
331
b2e8bc1b 332 def _login(self):
83317f69 333 """
334 Attempt to log in to YouTube.
83317f69 335 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
336 """
9d5d4d64 337
982ee69a
MB
338 if (self._LOGIN_REQUIRED
339 and self.get_param('cookiefile') is None
340 and self.get_param('cookiesfrombrowser') is None):
9d5d4d64 341 self.raise_login_required(
342 'Login details are needed to download this content', method='cookies')
68217024 343 username, password = self._get_login_info()
9d5d4d64 344 if username:
24b0a72b 345 self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
b2e8bc1b 346
cce889b9 347 def _initialize_consent(self):
348 cookies = self._get_cookies('https://www.youtube.com/')
349 if cookies.get('__Secure-3PSID'):
350 return
351 consent_id = None
352 consent = cookies.get('CONSENT')
353 if consent:
354 if 'YES' in consent.value:
355 return
356 consent_id = self._search_regex(
357 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
358 if not consent_id:
359 consent_id = random.randint(100, 999)
360 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 361
b2e8bc1b 362 def _real_initialize(self):
cce889b9 363 self._initialize_consent()
24b0a72b 364 self._login()
c5e8d7af 365
a0566bbf 366 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 367 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
368 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 369
000c15a4 370 def _get_default_ytcfg(self, client='web'):
371 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 372
000c15a4 373 def _get_innertube_host(self, client='web'):
374 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 375
000c15a4 376 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 377 # try_get but with fallback to default ytcfg client values when present
378 _func = lambda y: try_get(y, getter, expected_type)
379 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
380
000c15a4 381 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 382 return self._ytcfg_get_safe(
383 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
384 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
109dd3b2 385
000c15a4 386 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 387 return self._ytcfg_get_safe(
388 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
389 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
109dd3b2 390
000c15a4 391 def _extract_api_key(self, ytcfg=None, default_client='web'):
109dd3b2 392 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
393
000c15a4 394 def _extract_context(self, ytcfg=None, default_client='web'):
109dd3b2 395 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
396 context = _get_context(ytcfg)
397 if context:
398 return context
399
400 context = _get_context(self._get_default_ytcfg(default_client))
401 if not ytcfg:
402 return context
403
404 # Recreate the client context (required)
405 context['client'].update({
406 'clientVersion': self._extract_client_version(ytcfg, default_client),
407 'clientName': self._extract_client_name(ytcfg, default_client),
408 })
409 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
410 if visitor_data:
411 context['client']['visitorData'] = visitor_data
412 return context
413
cf87314d 414 _SAPISID = None
415
109dd3b2 416 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 417 time_now = round(time.time())
cf87314d 418 if self._SAPISID is None:
419 yt_cookies = self._get_cookies('https://www.youtube.com')
420 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
421 # See: https://github.com/yt-dlp/yt-dlp/issues/393
422 sapisid_cookie = dict_get(
423 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
424 if sapisid_cookie and sapisid_cookie.value:
425 self._SAPISID = sapisid_cookie.value
426 self.write_debug('Extracted SAPISID cookie')
427 # SAPISID cookie is required if not already present
428 if not yt_cookies.get('SAPISID'):
429 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
430 self._set_cookie(
431 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
432 else:
433 self._SAPISID = False
434 if not self._SAPISID:
435 return None
1974e99f 436 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
437 sapisidhash = hashlib.sha1(
cf87314d 438 f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
1974e99f 439 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
440
441 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 442 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 443 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 444
109dd3b2 445 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 446 data.update(query)
11f9be09 447 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 448 real_headers.update({'content-type': 'application/json'})
449 if headers:
450 real_headers.update(headers)
545cc85d 451 return self._download_json(
109dd3b2 452 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 453 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 454 data=json.dumps(data).encode('utf8'), headers=real_headers,
455 query={'key': api_key or self._extract_api_key()})
456
ac56cf38 457 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
458 data = self._search_regex(
459 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
460 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
461 if data:
462 return self._parse_json(data, item_id, fatal=fatal)
0c148415 463
99e9e001 464 @staticmethod
465 def _extract_session_index(*data):
466 """
467 Index of current account in account list.
468 See: https://github.com/yt-dlp/yt-dlp/pull/519
469 """
470 for ytcfg in data:
471 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
472 if session_index is not None:
473 return session_index
474
475 # Deprecated?
476 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca
M
477 if ytcfg:
478 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
479 if token:
480 return token
99e9e001 481 if webpage:
482 return self._search_regex(
483 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
484 'identity token', default=None, fatal=False)
a1c5d2ca
M
485
486 @staticmethod
fe93e2c4 487 def _extract_account_syncid(*args):
8ea3f7b9 488 """
489 Extract syncId required to download private playlists of secondary channels
fe93e2c4 490 @params response and/or ytcfg
8ea3f7b9 491 """
fe93e2c4 492 for data in args:
493 # ytcfg includes channel_syncid if on secondary channel
494 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
495 if delegated_sid:
496 return delegated_sid
497 sync_ids = (try_get(
498 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
e6f21b3d 499 lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
fe93e2c4 500 if len(sync_ids) >= 2 and sync_ids[1]:
501 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
502 # and just "user_syncid||" for primary channel. We only want the channel_syncid
503 return sync_ids[0]
a1c5d2ca 504
ac56cf38 505 @staticmethod
506 def _extract_visitor_data(*args):
507 """
508 Extracts visitorData from an API response or ytcfg
509 Appears to be used to track session state
510 """
9222c381 511 return get_first(
512 args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
513 expected_type=str)
ac56cf38 514
99e9e001 515 @property
516 def is_authenticated(self):
517 return bool(self._generate_sapisidhash_header())
518
11f9be09 519 def extract_ytcfg(self, video_id, webpage):
8c54a305 520 if not webpage:
521 return {}
29f7c58a 522 return self._parse_json(
523 self._search_regex(
524 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 525 default='{}'), video_id, fatal=False) or {}
526
11f9be09 527 def generate_api_headers(
99e9e001 528 self, *, ytcfg=None, account_syncid=None, session_index=None,
529 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
530
11f9be09 531 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
f4f751af 532 headers = {
109dd3b2 533 'X-YouTube-Client-Name': compat_str(
11f9be09 534 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
535 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 536 'Origin': origin,
537 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
538 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
ac56cf38 539 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
99e9e001 540 }
541 if session_index is None:
314ee305 542 session_index = self._extract_session_index(ytcfg)
543 if account_syncid or session_index is not None:
544 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 545
109dd3b2 546 auth = self._generate_sapisidhash_header(origin)
f4f751af 547 if auth is not None:
548 headers['Authorization'] = auth
109dd3b2 549 headers['X-Origin'] = origin
99e9e001 550 return {h: v for h, v in headers.items() if v is not None}
29f7c58a 551
2d6659b9 552 @staticmethod
553 def _build_api_continuation_query(continuation, ctp=None):
554 query = {
555 'continuation': continuation
556 }
557 # TODO: Inconsistency with clickTrackingParams.
558 # Currently we have a fixed ctp contained within context (from ytcfg)
559 # and a ctp in root query for continuation.
560 if ctp:
561 query['clickTracking'] = {'clickTrackingParams': ctp}
562 return query
563
2d6659b9 564 @classmethod
565 def _extract_next_continuation_data(cls, renderer):
566 next_continuation = try_get(
567 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
568 lambda x: x['continuation']['reloadContinuationData']), dict)
569 if not next_continuation:
570 return
571 continuation = next_continuation.get('continuation')
572 if not continuation:
573 return
574 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 575 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 576
577 @classmethod
578 def _extract_continuation_ep_data(cls, continuation_ep: dict):
579 if isinstance(continuation_ep, dict):
580 continuation = try_get(
581 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
582 if not continuation:
583 return
584 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 585 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 586
587 @classmethod
588 def _extract_continuation(cls, renderer):
589 next_continuation = cls._extract_next_continuation_data(renderer)
590 if next_continuation:
591 return next_continuation
fe93e2c4 592
2d6659b9 593 contents = []
594 for key in ('contents', 'items'):
595 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 596
2d6659b9 597 for content in contents:
598 if not isinstance(content, dict):
599 continue
600 continuation_ep = try_get(
601 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
602 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
603 dict)
604 continuation = cls._extract_continuation_ep_data(continuation_ep)
605 if continuation:
606 return continuation
607
fe93e2c4 608 @classmethod
609 def _extract_alerts(cls, data):
109dd3b2 610 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
611 if not isinstance(alert_dict, dict):
612 continue
613 for alert in alert_dict.values():
614 alert_type = alert.get('type')
615 if not alert_type:
616 continue
052e1350 617 message = cls._get_text(alert, 'text')
109dd3b2 618 if message:
619 yield alert_type, message
620
c0ac49bc 621 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
109dd3b2 622 errors = []
623 warnings = []
624 for alert_type, alert_message in alerts:
641ad5d8 625 if alert_type.lower() == 'error' and fatal:
109dd3b2 626 errors.append([alert_type, alert_message])
627 else:
628 warnings.append([alert_type, alert_message])
629
630 for alert_type, alert_message in (warnings + errors[:-1]):
c0ac49bc 631 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
109dd3b2 632 if errors:
633 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
634
635 def _extract_and_report_alerts(self, data, *args, **kwargs):
636 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
637
47193e02 638 def _extract_badges(self, renderer: dict):
639 badges = set()
640 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
641 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
642 if label:
643 badges.add(label.lower())
644 return badges
645
646 @staticmethod
052e1350 647 def _get_text(data, *path_list, max_runs=None):
648 for path in path_list or [None]:
649 if path is None:
650 obj = [data]
651 else:
652 obj = traverse_obj(data, path, default=[])
653 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
654 obj = [obj]
655 for item in obj:
656 text = try_get(item, lambda x: x['simpleText'], compat_str)
657 if text:
658 return text
659 runs = try_get(item, lambda x: x['runs'], list) or []
660 if not runs and isinstance(item, list):
661 runs = item
662
663 runs = runs[:min(len(runs), max_runs or len(runs))]
664 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
665 if text:
666 return text
47193e02 667
109dd3b2 668 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
669 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 670 default_client='web'):
109dd3b2 671 response = None
672 last_error = None
673 count = -1
674 retries = self.get_param('extractor_retries', 3)
675 if check_get_keys is None:
676 check_get_keys = []
677 while count < retries:
678 count += 1
679 if last_error:
c0ac49bc 680 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
109dd3b2 681 try:
682 response = self._call_api(
683 ep=ep, fatal=True, headers=headers,
684 video_id=item_id, query=query,
685 context=self._extract_context(ytcfg, default_client),
686 api_key=self._extract_api_key(ytcfg, default_client),
687 api_hostname=api_hostname, default_client=default_client,
688 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
689 except ExtractorError as e:
9c0d7f49 690 if isinstance(e.cause, network_exceptions):
641ad5d8 691 if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
692 e.cause.seek(0)
693 yt_error = try_get(
694 self._parse_json(e.cause.read().decode(), item_id, fatal=False),
695 lambda x: x['error']['message'], compat_str)
696 if yt_error:
697 self._report_alerts([('ERROR', yt_error)], fatal=False)
109dd3b2 698 # Downloading page may result in intermittent 5xx HTTP error
699 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
9c0d7f49 700 # We also want to catch all other network exceptions since errors in later pages can be troublesome
701 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
702 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
526d74ec 703 last_error = error_to_compat_str(e.cause or e.msg)
9c0d7f49 704 if count < retries:
705 continue
109dd3b2 706 if fatal:
707 raise
708 else:
709 self.report_warning(error_to_compat_str(e))
710 return
711
712 else:
109dd3b2 713 try:
ac56cf38 714 self._extract_and_report_alerts(response, only_once=True)
109dd3b2 715 except ExtractorError as e:
c0ac49bc 716 # YouTube servers may return errors we want to retry on in a 200 OK response
717 # See: https://github.com/yt-dlp/yt-dlp/issues/839
718 if 'unknown error' in e.msg.lower():
719 last_error = e.msg
720 continue
109dd3b2 721 if fatal:
722 raise
723 self.report_warning(error_to_compat_str(e))
724 return
725 if not check_get_keys or dict_get(response, check_get_keys):
726 break
727 # Youtube sometimes sends incomplete data
728 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
729 last_error = 'Incomplete data received'
730 if count >= retries:
731 if fatal:
732 raise ExtractorError(last_error)
733 else:
734 self.report_warning(last_error)
735 return
736 return response
737
9297939e 738 @staticmethod
739 def is_music_url(url):
740 return re.match(r'https?://music\.youtube\.com/', url) is not None
741
30a074c2 742 def _extract_video(self, renderer):
743 video_id = renderer.get('videoId')
052e1350 744 title = self._get_text(renderer, 'title')
745 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 746 duration = parse_duration(self._get_text(
747 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
052e1350 748 view_count_text = self._get_text(renderer, 'viewCountText') or ''
30a074c2 749 view_count = str_to_int(self._search_regex(
750 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
751 'view count', default=None))
fe93e2c4 752
052e1350 753 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
fe93e2c4 754
30a074c2 755 return {
39ed931e 756 '_type': 'url',
30a074c2 757 'ie_key': YoutubeIE.ie_key(),
758 'id': video_id,
5e3f2f8f 759 'url': f'https://www.youtube.com/watch?v={video_id}',
30a074c2 760 'title': title,
761 'description': description,
762 'duration': duration,
763 'view_count': view_count,
764 'uploader': uploader,
765 }
766
0c148415 767
360e1ca5 768class YoutubeIE(YoutubeBaseInfoExtractor):
96565c7e 769 IE_DESC = 'YouTube'
cb7dfeea 770 _VALID_URL = r"""(?x)^
c5e8d7af 771 (
edb53e2d 772 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 773 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
774 (?:www\.)?deturl\.com/www\.youtube\.com|
775 (?:www\.)?pwnyoutube\.com|
776 (?:www\.)?hooktube\.com|
777 (?:www\.)?yourepeat\.com|
778 tube\.majestyc\.net|
779 %(invidious)s|
780 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
781 (?:.*?\#/)? # handle anchor (#/) redirect urls
782 (?: # the various things that can precede the ID:
8fc54b12 783 (?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/
c5e8d7af 784 |(?: # or the v= param in all its forms
f7000f3a 785 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 786 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 787 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
788 v=
789 )
f4b05232 790 ))
cbaed4bb
S
791 |(?:
792 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
793 vid\.plus| # or vid.plus/xxxx
794 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 795 %(invidious)s
cbaed4bb 796 )/
edb53e2d 797 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 798 )
c5e8d7af 799 )? # all until now is optional -> you can pass the naked ID
201c1459 800 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 801 (?(1).+)? # if we found the ID, everything can follow
9297939e 802 (?:\#|$)""" % {
d9190e44 803 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
bc2ca1bb 804 }
e40c758c 805 _PLAYER_INFO_RE = (
cc2db878 806 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
807 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 808 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 809 )
2c62dc26 810 _formats = {
c2d3cb4c 811 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
812 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
813 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
814 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
815 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
816 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
817 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
818 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 819 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 820 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
821 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
822 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
823 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
824 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
825 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 826 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 827 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
828 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 829
830
831 # 3D videos
c2d3cb4c 832 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
833 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
834 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
835 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 836 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
837 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
838 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 839
96fb5605 840 # Apple HTTP Live Streaming
11f12195 841 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 842 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
843 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
844 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
845 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
846 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 847 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
848 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
849
850 # DASH mp4 video
d23028a8
S
851 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
852 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
853 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
854 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
855 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 856 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
857 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
858 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
859 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
860 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
861 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
862 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 863
f6f1fc92 864 # Dash mp4 audio
d23028a8
S
865 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
866 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
867 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
868 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
869 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
870 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
871 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
872
873 # Dash webm
d23028a8
S
874 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
875 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
876 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
877 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
878 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
879 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
880 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
881 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
882 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
883 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
884 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
885 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
886 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
887 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
888 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 889 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
890 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
891 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
892 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
893 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
894 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
895 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
896
897 # Dash webm audio
d23028a8
S
898 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
899 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 900
0857baad 901 # Dash webm audio with opus inside
d23028a8
S
902 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
903 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
904 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 905
ce6b9a2d
PH
906 # RTMP (unnamed)
907 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
908
909 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
910 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
911 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
912 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
913 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
914 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
915 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
916 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
917 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 918 }
29f7c58a 919 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 920
fd5c4aab
S
921 _GEO_BYPASS = False
922
78caa52a 923 IE_NAME = 'youtube'
2eb88d95
PH
924 _TESTS = [
925 {
2d3d2997 926 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
927 'info_dict': {
928 'id': 'BaW_jenozKc',
929 'ext': 'mp4',
3867038a 930 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
931 'uploader': 'Philipp Hagemeister',
932 'uploader_id': 'phihag',
ec85ded8 933 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
934 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
935 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 936 'upload_date': '20121002',
3867038a 937 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 938 'categories': ['Science & Technology'],
3867038a 939 'tags': ['youtube-dl'],
556dbe7f 940 'duration': 10,
dbdaaa23 941 'view_count': int,
3e7c1224
PH
942 'like_count': int,
943 'dislike_count': int,
7c80519c 944 'start_time': 1,
297a564b 945 'end_time': 9,
2eb88d95 946 }
0e853ca4 947 },
fccd3771 948 {
4bc3a23e
PH
949 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
950 'note': 'Embed-only video (#1746)',
951 'info_dict': {
952 'id': 'yZIXLfi8CZQ',
953 'ext': 'mp4',
954 'upload_date': '20120608',
955 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
956 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
957 'uploader': 'SET India',
94bfcd23 958 'uploader_id': 'setindia',
ec85ded8 959 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 960 'age_limit': 18,
545cc85d 961 },
962 'skip': 'Private video',
fccd3771 963 },
11b56058 964 {
8bdd16b4 965 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
966 'note': 'Use the first video ID in the URL',
967 'info_dict': {
968 'id': 'BaW_jenozKc',
969 'ext': 'mp4',
3867038a 970 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
971 'uploader': 'Philipp Hagemeister',
972 'uploader_id': 'phihag',
ec85ded8 973 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 974 'upload_date': '20121002',
3867038a 975 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 976 'categories': ['Science & Technology'],
3867038a 977 'tags': ['youtube-dl'],
556dbe7f 978 'duration': 10,
dbdaaa23 979 'view_count': int,
11b56058
PM
980 'like_count': int,
981 'dislike_count': int,
34a7de29
S
982 },
983 'params': {
984 'skip_download': True,
985 },
11b56058 986 },
dd27fd17 987 {
2d3d2997 988 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
989 'note': '256k DASH audio (format 141) via DASH manifest',
990 'info_dict': {
991 'id': 'a9LDPn-MO4I',
992 'ext': 'm4a',
993 'upload_date': '20121002',
994 'uploader_id': '8KVIDEO',
ec85ded8 995 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
996 'description': '',
997 'uploader': '8KVIDEO',
998 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 999 },
4bc3a23e
PH
1000 'params': {
1001 'youtube_include_dash_manifest': True,
1002 'format': '141',
4919603f 1003 },
de3c7fe0 1004 'skip': 'format 141 not served anymore',
dd27fd17 1005 },
8bdd16b4 1006 # DASH manifest with encrypted signature
1007 {
1008 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1009 'info_dict': {
1010 'id': 'IB3lcPjvWLA',
1011 'ext': 'm4a',
1012 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1013 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1014 'duration': 244,
1015 'uploader': 'AfrojackVEVO',
1016 'uploader_id': 'AfrojackVEVO',
1017 'upload_date': '20131011',
cc2db878 1018 'abr': 129.495,
8bdd16b4 1019 },
1020 'params': {
1021 'youtube_include_dash_manifest': True,
1022 'format': '141/bestaudio[ext=m4a]',
1023 },
1024 },
65c2fde2 1025 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1026 {
65c2fde2 1027 'note': 'Embed allowed age-gate video',
2d3d2997 1028 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1029 'info_dict': {
1030 'id': 'HtVdAasjOgU',
1031 'ext': 'mp4',
1032 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1033 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1034 'duration': 142,
c522adb1
JMF
1035 'uploader': 'The Witcher',
1036 'uploader_id': 'WitcherGame',
ec85ded8 1037 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1038 'upload_date': '20140605',
34952f09 1039 'age_limit': 18,
c522adb1
JMF
1040 },
1041 },
65c2fde2 1042 {
1043 'note': 'Age-gate video with embed allowed in public site',
1044 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1045 'info_dict': {
1046 'id': 'HsUATh_Nc2U',
1047 'ext': 'mp4',
1048 'title': 'Godzilla 2 (Official Video)',
1049 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1050 'upload_date': '20200408',
1051 'uploader_id': 'FlyingKitty900',
1052 'uploader': 'FlyingKitty',
1053 'age_limit': 18,
1054 },
1055 },
1056 {
1057 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1058 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1059 'info_dict': {
1060 'id': 'Tq92D6wQ1mg',
1061 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1062 'ext': 'mp4',
1063 'upload_date': '20191227',
65c2fde2 1064 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1065 'uploader': 'Projekt Melody',
1066 'description': 'md5:17eccca93a786d51bc67646756894066',
1067 'age_limit': 18,
1068 },
1069 },
1070 {
1071 'note': 'Non-Agegated non-embeddable video',
1072 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1073 'info_dict': {
1074 'id': 'MeJVWBSsPAY',
1075 'ext': 'mp4',
1076 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1077 'uploader': 'Herr Lurik',
1078 'uploader_id': 'st3in234',
1079 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1080 'upload_date': '20130730',
1081 },
1082 },
1083 {
1084 'note': 'Non-bypassable age-gated video',
1085 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1086 'only_matching': True,
1087 },
8bdd16b4 1088 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1089 # YouTube Red ad is not captured for creator
1090 {
1091 'url': '__2ABJjxzNo',
1092 'info_dict': {
1093 'id': '__2ABJjxzNo',
1094 'ext': 'mp4',
1095 'duration': 266,
1096 'upload_date': '20100430',
1097 'uploader_id': 'deadmau5',
1098 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1099 'creator': 'deadmau5',
1100 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1101 'uploader': 'deadmau5',
1102 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1103 'alt_title': 'Some Chords',
8bdd16b4 1104 },
1105 'expected_warnings': [
1106 'DASH manifest missing',
1107 ]
1108 },
067aa17e 1109 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1110 {
1111 'url': 'lqQg6PlCWgI',
1112 'info_dict': {
1113 'id': 'lqQg6PlCWgI',
1114 'ext': 'mp4',
556dbe7f 1115 'duration': 6085,
90227264 1116 'upload_date': '20150827',
cbe2bd91 1117 'uploader_id': 'olympic',
ec85ded8 1118 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1119 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
11f9be09 1120 'uploader': 'Olympics',
cbe2bd91
PH
1121 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1122 },
1123 'params': {
1124 'skip_download': 'requires avconv',
e52a40ab 1125 }
cbe2bd91 1126 },
6271f1ca
PH
1127 # Non-square pixels
1128 {
1129 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1130 'info_dict': {
1131 'id': '_b-2C3KPAM0',
1132 'ext': 'mp4',
1133 'stretched_ratio': 16 / 9.,
556dbe7f 1134 'duration': 85,
6271f1ca
PH
1135 'upload_date': '20110310',
1136 'uploader_id': 'AllenMeow',
ec85ded8 1137 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1138 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1139 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1140 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1141 },
06b491eb
S
1142 },
1143 # url_encoded_fmt_stream_map is empty string
1144 {
1145 'url': 'qEJwOuvDf7I',
1146 'info_dict': {
1147 'id': 'qEJwOuvDf7I',
f57b7835 1148 'ext': 'webm',
06b491eb
S
1149 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1150 'description': '',
1151 'upload_date': '20150404',
1152 'uploader_id': 'spbelect',
1153 'uploader': 'Наблюдатели Петербурга',
1154 },
1155 'params': {
1156 'skip_download': 'requires avconv',
e323cf3f
S
1157 },
1158 'skip': 'This live event has ended.',
06b491eb 1159 },
067aa17e 1160 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1161 {
1162 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1163 'info_dict': {
1164 'id': 'FIl7x6_3R5Y',
eb6793ba 1165 'ext': 'webm',
da77d856
S
1166 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1167 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1168 'duration': 220,
da77d856
S
1169 'upload_date': '20150625',
1170 'uploader_id': 'dorappi2000',
ec85ded8 1171 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1172 'uploader': 'dorappi2000',
eb6793ba 1173 'formats': 'mincount:31',
da77d856 1174 },
eb6793ba 1175 'skip': 'not actual anymore',
2ee8f5d8 1176 },
8a1a26ce
YCH
1177 # DASH manifest with segment_list
1178 {
1179 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1180 'md5': '8ce563a1d667b599d21064e982ab9e31',
1181 'info_dict': {
1182 'id': 'CsmdDsKjzN8',
1183 'ext': 'mp4',
17ee98e1 1184 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1185 'uploader': 'Airtek',
1186 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1187 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1188 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1189 },
1190 'params': {
1191 'youtube_include_dash_manifest': True,
1192 'format': '135', # bestvideo
be49068d
S
1193 },
1194 'skip': 'This live event has ended.',
2ee8f5d8 1195 },
cf7e015f
S
1196 {
1197 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1198 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1199 'info_dict': {
545cc85d 1200 'id': 'jvGDaLqkpTg',
1201 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1202 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1203 },
1204 'playlist': [{
1205 'info_dict': {
545cc85d 1206 'id': 'jvGDaLqkpTg',
cf7e015f 1207 'ext': 'mp4',
545cc85d 1208 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1209 'description': 'md5:e03b909557865076822aa169218d6a5d',
1210 'duration': 10643,
1211 'upload_date': '20161111',
1212 'uploader': 'Team PGP',
1213 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1214 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1215 },
1216 }, {
1217 'info_dict': {
545cc85d 1218 'id': '3AKt1R1aDnw',
cf7e015f 1219 'ext': 'mp4',
545cc85d 1220 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1221 'description': 'md5:e03b909557865076822aa169218d6a5d',
1222 'duration': 10991,
1223 'upload_date': '20161111',
1224 'uploader': 'Team PGP',
1225 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1226 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1227 },
1228 }, {
1229 'info_dict': {
545cc85d 1230 'id': 'RtAMM00gpVc',
cf7e015f 1231 'ext': 'mp4',
545cc85d 1232 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1233 'description': 'md5:e03b909557865076822aa169218d6a5d',
1234 'duration': 10995,
1235 'upload_date': '20161111',
1236 'uploader': 'Team PGP',
1237 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1238 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1239 },
1240 }, {
1241 'info_dict': {
545cc85d 1242 'id': '6N2fdlP3C5U',
cf7e015f 1243 'ext': 'mp4',
545cc85d 1244 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1245 'description': 'md5:e03b909557865076822aa169218d6a5d',
1246 'duration': 10990,
1247 'upload_date': '20161111',
1248 'uploader': 'Team PGP',
1249 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1250 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1251 },
1252 }],
1253 'params': {
1254 'skip_download': True,
1255 },
65c2fde2 1256 'skip': 'Not multifeed anymore',
cbaed4bb 1257 },
f9f49d87 1258 {
067aa17e 1259 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1260 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1261 'info_dict': {
1262 'id': 'gVfLd0zydlo',
1263 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1264 },
1265 'playlist_count': 2,
be49068d 1266 'skip': 'Not multifeed anymore',
f9f49d87 1267 },
cbaed4bb 1268 {
2d3d2997 1269 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1270 'only_matching': True,
0e49d9a6 1271 },
6d4fc66b 1272 {
2d3d2997 1273 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1274 'only_matching': True,
1275 },
0e49d9a6 1276 {
067aa17e 1277 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1278 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1279 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1280 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1281 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1282 'info_dict': {
1283 'id': 'lsguqyKfVQg',
1284 'ext': 'mp4',
1285 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1286 'alt_title': 'Dark Walk',
0e49d9a6 1287 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1288 'duration': 133,
0e49d9a6
LL
1289 'upload_date': '20151119',
1290 'uploader_id': 'IronSoulElf',
ec85ded8 1291 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1292 'uploader': 'IronSoulElf',
11f9be09 1293 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1294 'track': 'Dark Walk',
1295 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1296 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1297 },
1298 'params': {
1299 'skip_download': True,
1300 },
1301 },
61f92af1 1302 {
067aa17e 1303 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1304 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1305 'only_matching': True,
1306 },
313dfc45
LL
1307 {
1308 # Video with yt:stretch=17:0
1309 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1310 'info_dict': {
1311 'id': 'Q39EVAstoRM',
1312 'ext': 'mp4',
1313 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1314 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1315 'upload_date': '20151107',
1316 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1317 'uploader': 'CH GAMER DROID',
1318 },
1319 'params': {
1320 'skip_download': True,
1321 },
be49068d 1322 'skip': 'This video does not exist.',
313dfc45 1323 },
201c1459 1324 {
1325 # Video with incomplete 'yt:stretch=16:'
1326 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1327 'only_matching': True,
1328 },
7caf9830
S
1329 {
1330 # Video licensed under Creative Commons
1331 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1332 'info_dict': {
1333 'id': 'M4gD1WSo5mA',
1334 'ext': 'mp4',
1335 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1336 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1337 'duration': 721,
7caf9830
S
1338 'upload_date': '20150127',
1339 'uploader_id': 'BerkmanCenter',
ec85ded8 1340 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1341 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1342 'license': 'Creative Commons Attribution license (reuse allowed)',
1343 },
1344 'params': {
1345 'skip_download': True,
1346 },
1347 },
fd050249
S
1348 {
1349 # Channel-like uploader_url
1350 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1351 'info_dict': {
1352 'id': 'eQcmzGIKrzg',
1353 'ext': 'mp4',
1354 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1355 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1356 'duration': 4060,
fd050249 1357 'upload_date': '20151119',
eb6793ba 1358 'uploader': 'Bernie Sanders',
fd050249 1359 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1360 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1361 'license': 'Creative Commons Attribution license (reuse allowed)',
1362 },
1363 'params': {
1364 'skip_download': True,
1365 },
1366 },
040ac686
S
1367 {
1368 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1369 'only_matching': True,
7f29cf54
S
1370 },
1371 {
067aa17e 1372 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1373 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1374 'only_matching': True,
6496ccb4
S
1375 },
1376 {
1377 # Rental video preview
1378 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1379 'info_dict': {
1380 'id': 'uGpuVWrhIzE',
1381 'ext': 'mp4',
1382 'title': 'Piku - Trailer',
1383 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1384 'upload_date': '20150811',
1385 'uploader': 'FlixMatrix',
1386 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1387 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1388 'license': 'Standard YouTube License',
1389 },
1390 'params': {
1391 'skip_download': True,
1392 },
eb6793ba 1393 'skip': 'This video is not available.',
022a5d66 1394 },
12afdc2a
S
1395 {
1396 # YouTube Red video with episode data
1397 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1398 'info_dict': {
1399 'id': 'iqKdEhx-dD4',
1400 'ext': 'mp4',
1401 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1402 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1403 'duration': 2085,
12afdc2a
S
1404 'upload_date': '20170118',
1405 'uploader': 'Vsauce',
1406 'uploader_id': 'Vsauce',
1407 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1408 'series': 'Mind Field',
1409 'season_number': 1,
1410 'episode_number': 1,
1411 },
1412 'params': {
1413 'skip_download': True,
1414 },
1415 'expected_warnings': [
1416 'Skipping DASH manifest',
1417 ],
1418 },
c7121fa7
S
1419 {
1420 # The following content has been identified by the YouTube community
1421 # as inappropriate or offensive to some audiences.
1422 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1423 'info_dict': {
1424 'id': '6SJNVb0GnPI',
1425 'ext': 'mp4',
1426 'title': 'Race Differences in Intelligence',
1427 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1428 'duration': 965,
1429 'upload_date': '20140124',
1430 'uploader': 'New Century Foundation',
1431 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1432 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1433 },
1434 'params': {
1435 'skip_download': True,
1436 },
545cc85d 1437 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1438 },
022a5d66
S
1439 {
1440 # itag 212
1441 'url': '1t24XAntNCY',
1442 'only_matching': True,
fd5c4aab
S
1443 },
1444 {
1445 # geo restricted to JP
1446 'url': 'sJL6WA-aGkQ',
1447 'only_matching': True,
1448 },
cd5a74a2
S
1449 {
1450 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1451 'only_matching': True,
1452 },
bc2ca1bb 1453 {
1454 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1455 'only_matching': True,
1456 },
1457 {
1458 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1459 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1460 'only_matching': True,
1461 },
825cd268
RA
1462 {
1463 # DRM protected
1464 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1465 'only_matching': True,
4fe54c12
S
1466 },
1467 {
1468 # Video with unsupported adaptive stream type formats
1469 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1470 'info_dict': {
1471 'id': 'Z4Vy8R84T1U',
1472 'ext': 'mp4',
1473 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1474 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1475 'duration': 433,
1476 'upload_date': '20130923',
1477 'uploader': 'Amelia Putri Harwita',
1478 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1479 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1480 'formats': 'maxcount:10',
1481 },
1482 'params': {
1483 'skip_download': True,
1484 'youtube_include_dash_manifest': False,
1485 },
5429d6a9 1486 'skip': 'not actual anymore',
5caabd3c 1487 },
1488 {
822b9d9c 1489 # Youtube Music Auto-generated description
5caabd3c 1490 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1491 'info_dict': {
1492 'id': 'MgNrAu2pzNs',
1493 'ext': 'mp4',
1494 'title': 'Voyeur Girl',
1495 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1496 'upload_date': '20190312',
5429d6a9
S
1497 'uploader': 'Stephen - Topic',
1498 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1499 'artist': 'Stephen',
1500 'track': 'Voyeur Girl',
1501 'album': 'it\'s too much love to know my dear',
1502 'release_date': '20190313',
1503 'release_year': 2019,
1504 },
1505 'params': {
1506 'skip_download': True,
1507 },
1508 },
66b48727
RA
1509 {
1510 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1511 'only_matching': True,
1512 },
011e75e6
S
1513 {
1514 # invalid -> valid video id redirection
1515 'url': 'DJztXj2GPfl',
1516 'info_dict': {
1517 'id': 'DJztXj2GPfk',
1518 'ext': 'mp4',
1519 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1520 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1521 'upload_date': '20090125',
1522 'uploader': 'Prochorowka',
1523 'uploader_id': 'Prochorowka',
1524 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1525 'artist': 'Panjabi MC',
1526 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1527 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1528 },
1529 'params': {
1530 'skip_download': True,
1531 },
545cc85d 1532 'skip': 'Video unavailable',
ea74e00b
DP
1533 },
1534 {
1535 # empty description results in an empty string
1536 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1537 'info_dict': {
1538 'id': 'x41yOUIvK2k',
1539 'ext': 'mp4',
1540 'title': 'IMG 3456',
1541 'description': '',
1542 'upload_date': '20170613',
1543 'uploader_id': 'ElevageOrVert',
1544 'uploader': 'ElevageOrVert',
1545 },
1546 'params': {
1547 'skip_download': True,
1548 },
1549 },
a0566bbf 1550 {
29f7c58a 1551 # with '};' inside yt initial data (see [1])
1552 # see [2] for an example with '};' inside ytInitialPlayerResponse
1553 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1554 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1555 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1556 'info_dict': {
1557 'id': 'CHqg6qOn4no',
1558 'ext': 'mp4',
1559 'title': 'Part 77 Sort a list of simple types in c#',
1560 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1561 'upload_date': '20130831',
1562 'uploader_id': 'kudvenkat',
1563 'uploader': 'kudvenkat',
1564 },
1565 'params': {
1566 'skip_download': True,
1567 },
1568 },
29f7c58a 1569 {
1570 # another example of '};' in ytInitialData
1571 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1572 'only_matching': True,
1573 },
1574 {
1575 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1576 'only_matching': True,
1577 },
545cc85d 1578 {
cc2db878 1579 # https://github.com/ytdl-org/youtube-dl/pull/28094
1580 'url': 'OtqTfy26tG0',
1581 'info_dict': {
1582 'id': 'OtqTfy26tG0',
1583 'ext': 'mp4',
1584 'title': 'Burn Out',
1585 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1586 'upload_date': '20141120',
1587 'uploader': 'The Cinematic Orchestra - Topic',
1588 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1589 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1590 'artist': 'The Cinematic Orchestra',
1591 'track': 'Burn Out',
1592 'album': 'Every Day',
1593 'release_data': None,
1594 'release_year': None,
1595 },
1596 'params': {
1597 'skip_download': True,
1598 },
545cc85d 1599 },
bc2ca1bb 1600 {
1601 # controversial video, only works with bpctr when authenticated with cookies
1602 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1603 'only_matching': True,
1604 },
a1a7907b 1605 {
1606 # controversial video, requires bpctr/contentCheckOk
1607 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1608 'info_dict': {
1609 'id': 'SZJvDhaSDnc',
1610 'ext': 'mp4',
1611 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1612 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1613 'uploader': 'CBS This Morning',
11f9be09 1614 'uploader_id': 'CBSThisMorning',
a1a7907b 1615 'upload_date': '20140716',
1616 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1617 }
1618 },
f7ad7160 1619 {
1620 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1621 'url': 'cBvYw8_A0vQ',
1622 'info_dict': {
1623 'id': 'cBvYw8_A0vQ',
1624 'ext': 'mp4',
1625 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1626 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1627 'upload_date': '20201120',
1628 'uploader': 'Walk around Japan',
1629 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1630 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1631 },
1632 'params': {
1633 'skip_download': True,
1634 },
0fb983f6 1635 }, {
1636 # Has multiple audio streams
1637 'url': 'WaOKSUlf4TM',
1638 'only_matching': True
9297939e 1639 }, {
1640 # Requires Premium: has format 141 when requested using YTM url
1641 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1642 'only_matching': True
1643 }, {
120916da 1644 # multiple subtitles with same lang_code
1645 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1646 'only_matching': True,
109dd3b2 1647 }, {
1648 # Force use android client fallback
1649 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1650 'info_dict': {
1651 'id': 'YOelRv7fMxY',
11f9be09 1652 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 1653 'ext': '3gp',
1654 'upload_date': '20210624',
1655 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1656 'uploader': 'colinfurze',
11f9be09 1657 'uploader_id': 'colinfurze',
109dd3b2 1658 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
11f9be09 1659 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
109dd3b2 1660 },
1661 'params': {
1662 'format': '17', # 3gp format available on android
1663 'extractor_args': {'youtube': {'player_client': ['android']}},
1664 },
120916da 1665 },
109dd3b2 1666 {
1667 # Skip download of additional client configs (remix client config in this case)
1668 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1669 'only_matching': True,
1670 'params': {
1671 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1672 },
8fc54b12 1673 }, {
1674 # shorts
1675 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1676 'only_matching': True,
9222c381 1677 }, {
1678 'note': 'Storyboards',
1679 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
1680 'info_dict': {
1681 'id': '5KLPxDtMqe8',
1682 'ext': 'mhtml',
1683 'format_id': 'sb0',
1684 'title': 'Your Brain is Plastic',
1685 'uploader_id': 'scishow',
1686 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
1687 'upload_date': '20140324',
1688 'uploader': 'SciShow',
1689 }, 'params': {'format': 'mhtml', 'skip_download': True}
1690 }
2eb88d95
PH
1691 ]
1692
201c1459 1693 @classmethod
1694 def suitable(cls, url):
4dfbf869 1695 from ..utils import parse_qs
1696
201c1459 1697 qs = parse_qs(url)
1698 if qs.get('list', [None])[0]:
1699 return False
1700 return super(YoutubeIE, cls).suitable(url)
1701
e0df6211
PH
1702 def __init__(self, *args, **kwargs):
1703 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1704 self._code_cache = {}
83799698 1705 self._player_cache = {}
e0df6211 1706
b6de707d 1707 def _extract_player_url(self, *ytcfgs, webpage=None):
1708 player_url = traverse_obj(
1709 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1710 get_all=False, expected_type=compat_str)
11f9be09 1711 if not player_url:
b6de707d 1712 return
109dd3b2 1713 if player_url.startswith('//'):
1714 player_url = 'https:' + player_url
1715 elif not re.match(r'https?://', player_url):
1716 player_url = compat_urlparse.urljoin(
1717 'https://www.youtube.com', player_url)
1718 return player_url
1719
b6de707d 1720 def _download_player_url(self, video_id, fatal=False):
1721 res = self._download_webpage(
1722 'https://www.youtube.com/iframe_api',
1723 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1724 if res:
1725 player_version = self._search_regex(
1726 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1727 if player_version:
1728 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1729
60064c53
PH
1730 def _signature_cache_id(self, example_sig):
1731 """ Return a string representation of a signature """
78caa52a 1732 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1733
e40c758c
S
1734 @classmethod
1735 def _extract_player_info(cls, player_url):
1736 for player_re in cls._PLAYER_INFO_RE:
1737 id_m = re.search(player_re, player_url)
1738 if id_m:
1739 break
1740 else:
c081b35c 1741 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1742 return id_m.group('id')
e40c758c 1743
404f611f 1744 def _load_player(self, video_id, player_url, fatal=True):
109dd3b2 1745 player_id = self._extract_player_info(player_url)
1746 if player_id not in self._code_cache:
1276a43a 1747 code = self._download_webpage(
109dd3b2 1748 player_url, video_id, fatal=fatal,
1749 note='Downloading player ' + player_id,
1750 errnote='Download of %s failed' % player_url)
1276a43a 1751 if code:
1752 self._code_cache[player_id] = code
404f611f 1753 return self._code_cache.get(player_id)
109dd3b2 1754
e40c758c 1755 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1756 player_id = self._extract_player_info(player_url)
e0df6211 1757
c4417ddb 1758 # Read from filesystem cache
545cc85d 1759 func_id = 'js_%s_%s' % (
1760 player_id, self._signature_cache_id(example_sig))
c4417ddb 1761 assert os.path.basename(func_id) == func_id
a0e07d31 1762
69ea8ca4 1763 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1764 if cache_spec is not None:
78caa52a 1765 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1766
404f611f 1767 code = self._load_player(video_id, player_url)
1768 if code:
109dd3b2 1769 res = self._parse_sig_js(code)
e0df6211 1770
109dd3b2 1771 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1772 cache_res = res(test_string)
1773 cache_spec = [ord(c) for c in cache_res]
83799698 1774
109dd3b2 1775 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1776 return res
83799698 1777
60064c53 1778 def _print_sig_code(self, func, example_sig):
404f611f 1779 if not self.get_param('youtube_print_sig_code'):
1780 return
1781
edf3e38e
PH
1782 def gen_sig_code(idxs):
1783 def _genslice(start, end, step):
78caa52a 1784 starts = '' if start == 0 else str(start)
8bcc8756 1785 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1786 steps = '' if step == 1 else (':%d' % step)
78caa52a 1787 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1788
1789 step = None
7af808a5
PH
1790 # Quelch pyflakes warnings - start will be set when step is set
1791 start = '(Never used)'
edf3e38e
PH
1792 for i, prev in zip(idxs[1:], idxs[:-1]):
1793 if step is not None:
1794 if i - prev == step:
1795 continue
1796 yield _genslice(start, prev, step)
1797 step = None
1798 continue
1799 if i - prev in [-1, 1]:
1800 step = i - prev
1801 start = prev
1802 continue
1803 else:
78caa52a 1804 yield 's[%d]' % prev
edf3e38e 1805 if step is None:
78caa52a 1806 yield 's[%d]' % i
edf3e38e
PH
1807 else:
1808 yield _genslice(start, i, step)
1809
78caa52a 1810 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1811 cache_res = func(test_string)
edf3e38e 1812 cache_spec = [ord(c) for c in cache_res]
78caa52a 1813 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1814 signature_id_tuple = '(%s)' % (
1815 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1816 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1817 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1818 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1819
e0df6211
PH
1820 def _parse_sig_js(self, jscode):
1821 funcname = self._search_regex(
abefc03f
S
1822 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1823 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
1824 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1825 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1826 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1827 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1828 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1829 # Obsolete patterns
1830 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1831 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1832 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1833 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1834 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1835 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1836 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1837 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1838 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1839
1840 jsi = JSInterpreter(jscode)
1841 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1842 return lambda s: initial_function([s])
1843
545cc85d 1844 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1845 """Turn the encrypted s field into a working signature"""
6b37f0be 1846
c8bf86d5 1847 if player_url is None:
69ea8ca4 1848 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1849
c8bf86d5 1850 try:
62af3a0e 1851 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1852 if player_id not in self._player_cache:
1853 func = self._extract_signature_function(
60064c53 1854 video_id, player_url, s
c8bf86d5
PH
1855 )
1856 self._player_cache[player_id] = func
1857 func = self._player_cache[player_id]
404f611f 1858 self._print_sig_code(func, s)
c8bf86d5
PH
1859 return func(s)
1860 except Exception as e:
404f611f 1861 raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
1862
1863 def _decrypt_nsig(self, s, video_id, player_url):
1864 """Turn the encrypted n field into a working signature"""
1865 if player_url is None:
1866 raise ExtractorError('Cannot decrypt nsig without player_url')
1867 if player_url.startswith('//'):
1868 player_url = 'https:' + player_url
1869 elif not re.match(r'https?://', player_url):
1870 player_url = compat_urlparse.urljoin(
1871 'https://www.youtube.com', player_url)
1872
1873 sig_id = ('nsig_value', s)
1874 if sig_id in self._player_cache:
1875 return self._player_cache[sig_id]
1876
1877 try:
1878 player_id = ('nsig', player_url)
1879 if player_id not in self._player_cache:
1880 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
1881 func = self._player_cache[player_id]
1882 self._player_cache[sig_id] = func(s)
1883 self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
1884 return self._player_cache[sig_id]
1885 except Exception as e:
aa9369a2 1886 raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
404f611f 1887
1888 def _extract_n_function_name(self, jscode):
1889 return self._search_regex(
1890 (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
1891 jscode, 'Initial JS player n function name', group='nfunc')
1892
1893 def _extract_n_function(self, video_id, player_url):
1894 player_id = self._extract_player_info(player_url)
1895 func_code = self._downloader.cache.load('youtube-nsig', player_id)
1896
1897 if func_code:
1898 jsi = JSInterpreter(func_code)
1899 else:
1900 jscode = self._load_player(video_id, player_url)
1901 funcname = self._extract_n_function_name(jscode)
1902 jsi = JSInterpreter(jscode)
1903 func_code = jsi.extract_function_code(funcname)
1904 self._downloader.cache.store('youtube-nsig', player_id, func_code)
1905
1906 if self.get_param('youtube_print_sig_code'):
1907 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
1908
1909 return lambda s: jsi.extract_function_from_code(*func_code)([s])
e0df6211 1910
109dd3b2 1911 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1912 """
1913 Extract signatureTimestamp (sts)
1914 Required to tell API what sig/player version is in use.
1915 """
1916 sts = None
1917 if isinstance(ytcfg, dict):
1918 sts = int_or_none(ytcfg.get('STS'))
1919
1920 if not sts:
1921 # Attempt to extract from player
1922 if player_url is None:
1923 error_msg = 'Cannot extract signature timestamp without player_url.'
1924 if fatal:
1925 raise ExtractorError(error_msg)
1926 self.report_warning(error_msg)
1927 return
404f611f 1928 code = self._load_player(video_id, player_url, fatal=fatal)
1929 if code:
109dd3b2 1930 sts = int_or_none(self._search_regex(
1931 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1932 'JS player signature timestamp', group='sts', fatal=fatal))
1933 return sts
1934
11f9be09 1935 def _mark_watched(self, video_id, player_responses):
9222c381 1936 playback_url = get_first(
1937 player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1938 expected_type=url_or_none)
d77ab8e2 1939 if not playback_url:
352d63fd 1940 self.report_warning('Unable to mark watched')
d77ab8e2
S
1941 return
1942 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1943 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1944
1945 # cpn generation algorithm is reverse engineered from base.js.
1946 # In fact it works even with dummy cpn.
1947 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1948 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1949
1950 qs.update({
1951 'ver': ['2'],
1952 'cpn': [cpn],
1953 })
1954 playback_url = compat_urlparse.urlunparse(
15707c7e 1955 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1956
1957 self._download_webpage(
1958 playback_url, video_id, 'Marking watched',
1959 'Unable to mark watched', fatal=False)
1960
66c9fa36
S
1961 @staticmethod
1962 def _extract_urls(webpage):
1963 # Embedded YouTube player
1964 entries = [
1965 unescapeHTML(mobj.group('url'))
1966 for mobj in re.finditer(r'''(?x)
1967 (?:
1968 <iframe[^>]+?src=|
1969 data-video-url=|
1970 <embed[^>]+?src=|
1971 embedSWF\(?:\s*|
1972 <object[^>]+data=|
1973 new\s+SWFObject\(
1974 )
1975 (["\'])
1976 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1977 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1978 \1''', webpage)]
1979
1980 # lazyYT YouTube embed
1981 entries.extend(list(map(
1982 unescapeHTML,
1983 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1984
1985 # Wordpress "YouTube Video Importer" plugin
1986 matches = re.findall(r'''(?x)<div[^>]+
1987 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1988 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1989 entries.extend(m[-1] for m in matches)
1990
1991 return entries
1992
1993 @staticmethod
1994 def _extract_url(webpage):
1995 urls = YoutubeIE._extract_urls(webpage)
1996 return urls[0] if urls else None
1997
97665381
PH
1998 @classmethod
1999 def extract_id(cls, url):
2000 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 2001 if mobj is None:
69ea8ca4 2002 raise ExtractorError('Invalid URL: %s' % url)
5ad28e7f 2003 return mobj.group('id')
c5e8d7af 2004
7c365c21 2005 def _extract_chapters_from_json(self, data, duration):
2006 chapter_list = traverse_obj(
2007 data, (
2008 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2009 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2010 ), expected_type=list)
2011
2012 return self._extract_chapters(
2013 chapter_list,
2014 chapter_time=lambda chapter: float_or_none(
2015 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2016 chapter_title=lambda chapter: traverse_obj(
2017 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2018 duration=duration)
2019
2020 def _extract_chapters_from_engagement_panel(self, data, duration):
2021 content_list = traverse_obj(
8bdd16b4 2022 data,
7c365c21 2023 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2024 expected_type=list, default=[])
052e1350 2025 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2026 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 2027
2028 return next((
2029 filter(None, (
2030 self._extract_chapters(
2031 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2032 chapter_time, chapter_title, duration)
2033 for contents in content_list
2034 ))), [])
2035
2036 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 2037 chapters = []
7c365c21 2038 last_chapter = {'start_time': 0}
2039 for idx, chapter in enumerate(chapter_list or []):
2040 title = chapter_title(chapter)
84213ea8
S
2041 start_time = chapter_time(chapter)
2042 if start_time is None:
2043 continue
7c365c21 2044 last_chapter['end_time'] = start_time
2045 if start_time < last_chapter['start_time']:
2046 if idx == 1:
2047 chapters.pop()
2048 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2049 else:
2050 self.report_warning(f'Invalid start time for chapter "{title}"')
2051 continue
2052 last_chapter = {'start_time': start_time, 'title': title}
2053 chapters.append(last_chapter)
2054 last_chapter['end_time'] = duration
84213ea8
S
2055 return chapters
2056
545cc85d 2057 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2058 return self._parse_json(self._search_regex(
2059 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2060 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 2061
d92f5d5a 2062 @staticmethod
2063 def parse_time_text(time_text):
2064 """
2065 Parse the comment time text
2066 time_text is in the format 'X units ago (edited)'
2067 """
2068 time_text_split = time_text.split(' ')
2069 if len(time_text_split) >= 3:
da503b7a 2070 try:
2071 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2072 except ValueError:
2073 return None
d92f5d5a 2074
a1c5d2ca
M
2075 def _extract_comment(self, comment_renderer, parent=None):
2076 comment_id = comment_renderer.get('commentId')
2077 if not comment_id:
2078 return
fe93e2c4 2079
052e1350 2080 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 2081
49bd8c66 2082 # note: timestamp is an estimate calculated from the current time and time_text
052e1350 2083 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
fe93e2c4 2084 time_text_dt = self.parse_time_text(time_text)
2085 if isinstance(time_text_dt, datetime.datetime):
2086 timestamp = calendar.timegm(time_text_dt.timetuple())
052e1350 2087 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca
M
2088 author_id = try_get(comment_renderer,
2089 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2090
49bd8c66 2091 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2092 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2093 author_thumbnail = try_get(comment_renderer,
2094 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2095
2096 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2097 is_favorited = 'creatorHeart' in (try_get(
2098 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2099 return {
2100 'id': comment_id,
2101 'text': text,
d92f5d5a 2102 'timestamp': timestamp,
a1c5d2ca
M
2103 'time_text': time_text,
2104 'like_count': votes,
97524332 2105 'is_favorited': is_favorited,
a1c5d2ca
M
2106 'author': author,
2107 'author_id': author_id,
2108 'author_thumbnail': author_thumbnail,
2109 'author_is_uploader': author_is_uploader,
2110 'parent': parent or 'root'
2111 }
2112
99e9e001 2113 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, comment_counts=None):
2d6659b9 2114
2115 def extract_header(contents):
2d6659b9 2116 _continuation = None
2117 for content in contents:
2118 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
fe93e2c4 2119 expected_comment_count = parse_count(self._get_text(
052e1350 2120 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
fe93e2c4 2121
2d6659b9 2122 if expected_comment_count:
fe93e2c4 2123 comment_counts[1] = expected_comment_count
2124 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2d6659b9 2125 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2126 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2127
2128 sort_menu_item = try_get(
2129 comments_header_renderer,
2130 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2131 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2132
2133 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2134 if not _continuation:
2135 continue
2136
2137 sort_text = sort_menu_item.get('title')
2138 if isinstance(sort_text, compat_str):
2139 sort_text = sort_text.lower()
2140 else:
2141 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2142 self.to_screen('Sorting comments by %s' % sort_text)
2143 break
a2160aa4 2144 return _continuation
a1c5d2ca 2145
2d6659b9 2146 def extract_thread(contents):
a1c5d2ca
M
2147 if not parent:
2148 comment_counts[2] = 0
2149 for content in contents:
2150 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2151 comment_renderer = try_get(
2152 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2153 content, (lambda x: x['commentRenderer'], dict))
2154
2155 if not comment_renderer:
2156 continue
2157 comment = self._extract_comment(comment_renderer, parent)
2158 if not comment:
2159 continue
2160 comment_counts[0] += 1
2161 yield comment
2162 # Attempt to get the replies
2163 comment_replies_renderer = try_get(
2164 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2165
2166 if comment_replies_renderer:
2167 comment_counts[2] += 1
2168 comment_entries_iter = self._comment_entries(
99e9e001 2169 comment_replies_renderer, ytcfg, video_id,
2170 parent=comment.get('id'), comment_counts=comment_counts)
a1c5d2ca
M
2171
2172 for reply_comment in comment_entries_iter:
2173 yield reply_comment
2174
2d6659b9 2175 # YouTube comments have a max depth of 2
2176 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2177 if max_depth == 1 and parent:
2178 return
a1c5d2ca
M
2179 if not comment_counts:
2180 # comment so far, est. total comments, current comment thread #
2181 comment_counts = [0, 0, 0]
a1c5d2ca 2182
2d6659b9 2183 continuation = self._extract_continuation(root_continuation_data)
fe93e2c4 2184 if continuation and len(continuation['continuation']) < 27:
2d6659b9 2185 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2186 continuation_token = self._generate_comment_continuation(video_id)
fe93e2c4 2187 continuation = self._build_api_continuation_query(continuation_token, None)
2d6659b9 2188
aae16f6e 2189 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2190 if message and not parent:
2191 self.report_warning(message, video_id=video_id)
2192
2d6659b9 2193 visitor_data = None
2194 is_first_continuation = parent is None
a1c5d2ca
M
2195
2196 for page_num in itertools.count(0):
2197 if not continuation:
2198 break
99e9e001 2199 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
2d6659b9 2200 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2201 if page_num == 0:
2202 if is_first_continuation:
2203 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2204 else:
2d6659b9 2205 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2206 comment_counts[2], comment_prog_str)
2207 else:
2208 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2209 ' ' if parent else '', ' replies' if parent else '',
2210 page_num, comment_prog_str)
2211
2212 response = self._extract_response(
fe93e2c4 2213 item_id=None, query=continuation,
2d6659b9 2214 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2215 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca
M
2216 if not response:
2217 break
f4f751af 2218 visitor_data = try_get(
2219 response,
2220 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2221 compat_str) or visitor_data
a1c5d2ca 2222
2d6659b9 2223 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca 2224
2d6659b9 2225 continuation = None
2226 if isinstance(continuation_contents, list):
2227 for continuation_section in continuation_contents:
2228 if not isinstance(continuation_section, dict):
2229 continue
2230 continuation_items = try_get(
2231 continuation_section,
2232 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2233 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2234 list) or []
2235 if is_first_continuation:
a2160aa4 2236 continuation = extract_header(continuation_items)
2d6659b9 2237 is_first_continuation = False
2238 if continuation:
2239 break
2240 continue
2241 count = 0
2242 for count, entry in enumerate(extract_thread(continuation_items)):
2243 yield entry
2244 continuation = self._extract_continuation({'contents': continuation_items})
2245 if continuation:
2246 # Sometimes YouTube provides a continuation without any comments
2247 # In most cases we end up just downloading these with very little comments to come.
2248 if count == 0:
2249 if not parent:
2250 self.report_warning('No comments received - assuming end of comments')
2251 continuation = None
a1c5d2ca
M
2252 break
2253
2d6659b9 2254 # Deprecated response structure
2255 elif isinstance(continuation_contents, dict):
2256 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2257 for key, continuation_renderer in continuation_contents.items():
2258 if key not in known_continuation_renderers:
2259 continue
2260 if not isinstance(continuation_renderer, dict):
2261 continue
2262 if is_first_continuation:
2263 header_continuation_items = [continuation_renderer.get('header') or {}]
a2160aa4 2264 continuation = extract_header(header_continuation_items)
2d6659b9 2265 is_first_continuation = False
2266 if continuation:
2267 break
a1c5d2ca 2268
2d6659b9 2269 # Sometimes YouTube provides a continuation without any comments
2270 # In most cases we end up just downloading these with very little comments to come.
2271 count = 0
2272 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2273 yield entry
2274 continuation = self._extract_continuation(continuation_renderer)
2275 if count == 0:
2276 if not parent:
2277 self.report_warning('No comments received - assuming end of comments')
2278 continuation = None
2279 break
a1c5d2ca 2280
2d6659b9 2281 @staticmethod
2282 def _generate_comment_continuation(video_id):
2283 """
2284 Generates initial comment section continuation token from given video id
2285 """
2286 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2287 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2288 new_continuation_intlist = list(itertools.chain.from_iterable(
2289 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2290 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2291
a2160aa4 2292 def _get_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2293 """Entry for comment extraction"""
2d6659b9 2294 def _real_comment_extract(contents):
aae16f6e 2295 renderer = next((
2296 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2297 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2298 yield from self._comment_entries(renderer, ytcfg, video_id)
99e9e001 2299
a2160aa4 2300 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
65524694 2301 # Force English regardless of account setting to prevent parsing issues
2302 # See: https://github.com/yt-dlp/yt-dlp/issues/532
2303 ytcfg = copy.deepcopy(ytcfg)
2304 traverse_obj(
2305 ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
a2160aa4 2306 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
a1c5d2ca 2307
109dd3b2 2308 @staticmethod
99e9e001 2309 def _get_checkok_params():
2310 return {'contentCheckOk': True, 'racyCheckOk': True}
2311
2312 @classmethod
2313 def _generate_player_context(cls, sts=None):
109dd3b2 2314 context = {
2315 'html5Preference': 'HTML5_PREF_WANTS',
2316 }
2317 if sts is not None:
2318 context['signatureTimestamp'] = sts
2319 return {
2320 'playbackContext': {
2321 'contentPlaybackContext': context
a1a7907b 2322 },
99e9e001 2323 **cls._get_checkok_params()
109dd3b2 2324 }
2325
e7e94f2a
D
2326 @staticmethod
2327 def _is_agegated(player_response):
2328 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 2329 return True
e7e94f2a
D
2330
2331 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2332 AGE_GATE_REASONS = (
2333 'confirm your age', 'age-restricted', 'inappropriate', # reason
2334 'age_verification_required', 'age_check_required', # status
2335 )
2336 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2337
2338 @staticmethod
2339 def _is_unplayable(player_response):
2340 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 2341
99e9e001 2342 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
109dd3b2 2343
11f9be09 2344 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2345 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 2346 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 2347 headers = self.generate_api_headers(
99e9e001 2348 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 2349
11f9be09 2350 yt_query = {'videoId': video_id}
2351 yt_query.update(self._generate_player_context(sts))
2352 return self._extract_response(
2353 item_id=video_id, ep='player', query=yt_query,
379e44ed 2354 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 2355 default_client=client,
11f9be09 2356 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2357 ) or None
2358
11f9be09 2359 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 2360 requested_clients = []
d0d012d4 2361 default = ['android', 'web']
000c15a4 2362 allowed_clients = sorted(
2363 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2364 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 2365 for client in self._configuration_arg('player_client'):
2366 if client in allowed_clients:
2367 requested_clients.append(client)
d0d012d4 2368 elif client == 'default':
2369 requested_clients.extend(default)
b4c055ba 2370 elif client == 'all':
2371 requested_clients.extend(allowed_clients)
2372 else:
2373 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 2374 if not requested_clients:
d0d012d4 2375 requested_clients = default
cf7e015f 2376
11f9be09 2377 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2378 requested_clients.extend(
e7e94f2a 2379 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 2380
11f9be09 2381 return orderedSet(requested_clients)
cf7e015f 2382
c0bc527b
M
2383 def _extract_player_ytcfg(self, client, video_id):
2384 url = {
2385 'web_music': 'https://music.youtube.com',
2386 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2387 }.get(client)
2388 if not url:
2389 return {}
2390 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2391 return self.extract_ytcfg(video_id, webpage) or {}
2392
99e9e001 2393 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
11f9be09 2394 initial_pr = None
2395 if webpage:
2396 initial_pr = self._extract_yt_initial_variable(
2397 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2398 video_id, 'initial player response')
6b09401b 2399
c0bc527b
M
2400 original_clients = clients
2401 clients = clients[::-1]
b6de707d 2402 prs = []
e7e94f2a
D
2403
2404 def append_client(client_name):
2405 if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2406 clients.append(client_name)
2407
379e44ed 2408 # Android player_response does not have microFormats which are needed for
2409 # extraction of some data. So we return the initial_pr with formats
2410 # stripped out even if not requested by the user
2411 # See: https://github.com/yt-dlp/yt-dlp/issues/501
379e44ed 2412 if initial_pr:
2413 pr = dict(initial_pr)
2414 pr['streamingData'] = None
b6de707d 2415 prs.append(pr)
379e44ed 2416
2417 last_error = None
b6de707d 2418 tried_iframe_fallback = False
2419 player_url = None
c0bc527b
M
2420 while clients:
2421 client = clients.pop()
11f9be09 2422 player_ytcfg = master_ytcfg if client == 'web' else {}
c0bc527b
M
2423 if 'configs' not in self._configuration_arg('player_skip'):
2424 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
c0bc527b 2425
b6de707d 2426 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2427 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2428 if 'js' in self._configuration_arg('player_skip'):
2429 require_js_player = False
2430 player_url = None
2431
2432 if not player_url and not tried_iframe_fallback and require_js_player:
2433 player_url = self._download_player_url(video_id)
2434 tried_iframe_fallback = True
2435
379e44ed 2436 try:
2437 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
99e9e001 2438 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
379e44ed 2439 except ExtractorError as e:
2440 if last_error:
2441 self.report_warning(last_error)
2442 last_error = e
2443 continue
2444
11f9be09 2445 if pr:
b6de707d 2446 prs.append(pr)
c0bc527b 2447
e7e94f2a 2448 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
99e9e001 2449 if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
e7e94f2a
D
2450 append_client(client.replace('_agegate', '_creator'))
2451 elif self._is_agegated(pr):
2452 append_client(f'{client}_agegate')
c0bc527b 2453
379e44ed 2454 if last_error:
b6de707d 2455 if not len(prs):
379e44ed 2456 raise last_error
2457 self.report_warning(last_error)
b6de707d 2458 return prs, player_url
11f9be09 2459
2460 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
a0bb6ce5 2461 itags, stream_ids = {}, []
2a9c6dcd 2462 itag_qualities, res_qualities = {}, {}
d3fc8074 2463 q = qualities([
2a9c6dcd 2464 # Normally tiny is the smallest video-only formats. But
2465 # audio-only formats with unknown quality may get tagged as tiny
2466 'tiny',
2467 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 2468 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2469 ])
11f9be09 2470 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 2471
545cc85d 2472 for fmt in streaming_formats:
2473 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2474 continue
321bf820 2475
cc2db878 2476 itag = str_or_none(fmt.get('itag'))
9297939e 2477 audio_track = fmt.get('audioTrack') or {}
2478 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2479 if stream_id in stream_ids:
2480 continue
2481
cc2db878 2482 quality = fmt.get('quality')
2a9c6dcd 2483 height = int_or_none(fmt.get('height'))
d3fc8074 2484 if quality == 'tiny' or not quality:
2485 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 2486 # The 3gp format (17) in android client has a quality of "small",
2487 # but is actually worse than other formats
2488 if itag == '17':
2489 quality = 'tiny'
2490 if quality:
2491 if itag:
2492 itag_qualities[itag] = quality
2493 if height:
2494 res_qualities[height] = quality
cc2db878 2495 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2496 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2497 # number of fragment that would subsequently requested with (`&sq=N`)
2498 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2499 continue
2500
545cc85d 2501 fmt_url = fmt.get('url')
2502 if not fmt_url:
2503 sc = compat_parse_qs(fmt.get('signatureCipher'))
2504 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2505 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2506 if not (sc and fmt_url and encrypted_sig):
2507 continue
545cc85d 2508 if not player_url:
201e9eaa 2509 continue
545cc85d 2510 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2511 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2512 fmt_url += '&' + sp + '=' + signature
2513
404f611f 2514 query = parse_qs(fmt_url)
2515 throttled = False
2516 if query.get('ratebypass') != ['yes'] and query.get('n'):
2517 try:
2518 fmt_url = update_url_query(fmt_url, {
2519 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
2520 except ExtractorError as e:
aa9369a2 2521 self.report_warning(
2522 f'nsig extraction failed: You may experience throttling for some formats\n'
2523 f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
404f611f 2524 throttled = True
2525
545cc85d 2526 if itag:
a0bb6ce5 2527 itags[itag] = 'https'
9297939e 2528 stream_ids.append(stream_id)
2529
cc2db878 2530 tbr = float_or_none(
2531 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2532 dct = {
2533 'asr': int_or_none(fmt.get('audioSampleRate')),
2534 'filesize': int_or_none(fmt.get('contentLength')),
2535 'format_id': itag,
34921b43 2536 'format_note': join_nonempty(
26e8e044 2537 '%s%s' % (audio_track.get('displayName') or '',
2538 ' (default)' if audio_track.get('audioIsDefault') else ''),
404f611f 2539 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
34921b43 2540 throttled and 'THROTTLED', delim=', '),
c18d4482 2541 'source_preference': -10 if throttled else -1,
a4211baf 2542 'fps': int_or_none(fmt.get('fps')) or None,
2a9c6dcd 2543 'height': height,
dca3ff4a 2544 'quality': q(quality),
cc2db878 2545 'tbr': tbr,
545cc85d 2546 'url': fmt_url,
2a9c6dcd 2547 'width': int_or_none(fmt.get('width')),
0fb983f6 2548 'language': audio_track.get('id', '').split('.')[0],
26e8e044 2549 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
545cc85d 2550 }
60bdb7bd 2551 mime_mobj = re.match(
2552 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2553 if mime_mobj:
2554 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2555 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 2556 no_audio = dct.get('acodec') == 'none'
2557 no_video = dct.get('vcodec') == 'none'
2558 if no_audio:
2559 dct['vbr'] = tbr
2560 if no_video:
2561 dct['abr'] = tbr
2562 if no_audio or no_video:
545cc85d 2563 dct['downloader_options'] = {
2564 # Youtube throttles chunks >~10M
2565 'http_chunk_size': 10485760,
bf1317d2 2566 }
7c60c33e 2567 if dct.get('ext'):
2568 dct['container'] = dct['ext'] + '_dash'
11f9be09 2569 yield dct
545cc85d 2570
4bb6b02f 2571 skip_manifests = self._configuration_arg('skip')
57015a4a 2572 get_dash = (
2573 (not is_live or self._configuration_arg('include_live_dash'))
2574 and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
5d3a0e79 2575 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2576
a0bb6ce5 2577 def process_manifest_format(f, proto, itag):
2578 if itag in itags:
2579 if itags[itag] == proto or f'{itag}-{proto}' in itags:
2580 return False
2581 itag = f'{itag}-{proto}'
2582 if itag:
2583 f['format_id'] = itag
2584 itags[itag] = proto
2585
2586 f['quality'] = next((
2587 q(qdict[val])
e339d25a 2588 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
a0bb6ce5 2589 if val in qdict), -1)
2590 return True
2a9c6dcd 2591
11f9be09 2592 for sd in streaming_data:
5d3a0e79 2593 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2594 if hls_manifest_url:
2a9c6dcd 2595 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
a0bb6ce5 2596 if process_manifest_format(f, 'hls', self._search_regex(
2597 r'/itag/(\d+)', f['url'], 'itag', default=None)):
2598 yield f
545cc85d 2599
5d3a0e79 2600 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2601 if dash_manifest_url:
2a9c6dcd 2602 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
a0bb6ce5 2603 if process_manifest_format(f, 'dash', f['format_id']):
2604 f['filesize'] = int_or_none(self._search_regex(
2605 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
2606 yield f
11f9be09 2607
720c3099 2608 def _extract_storyboard(self, player_responses, duration):
2609 spec = get_first(
2610 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
2611 if not spec:
2612 return
2613 base_url = spec.pop()
2614 L = len(spec) - 1
2615 for i, args in enumerate(spec):
2616 args = args.split('#')
2617 counts = list(map(int_or_none, args[:5]))
2618 if len(args) != 8 or not all(counts):
2619 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
2620 continue
2621 width, height, frame_count, cols, rows = counts
2622 N, sigh = args[6:]
2623
2624 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
2625 fragment_count = frame_count / (cols * rows)
2626 fragment_duration = duration / fragment_count
2627 yield {
2628 'format_id': f'sb{i}',
2629 'format_note': 'storyboard',
2630 'ext': 'mhtml',
2631 'protocol': 'mhtml',
2632 'acodec': 'none',
2633 'vcodec': 'none',
2634 'url': url,
2635 'width': width,
2636 'height': height,
2637 'fragments': [{
2638 'path': url.replace('$M', str(j)),
2639 'duration': min(fragment_duration, duration - (j * fragment_duration)),
2640 } for j in range(math.ceil(fragment_count))],
2641 }
2642
11f9be09 2643 def _real_extract(self, url):
2644 url, smuggled_data = unsmuggle_url(url, {})
2645 video_id = self._match_id(url)
2646
2647 base_url = self.http_scheme() + '//www.youtube.com/'
2648 webpage_url = base_url + 'watch?v=' + video_id
b6de707d 2649 webpage = None
2650 if 'webpage' not in self._configuration_arg('player_skip'):
2651 webpage = self._download_webpage(
2652 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
11f9be09 2653
2654 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 2655
b6de707d 2656 player_responses, player_url = self._extract_player_responses(
11f9be09 2657 self._get_requested_clients(url, smuggled_data),
99e9e001 2658 video_id, webpage, master_ytcfg)
11f9be09 2659
11f9be09 2660 playability_statuses = traverse_obj(
2661 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2662
2663 trailer_video_id = get_first(
2664 playability_statuses,
2665 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2666 expected_type=str)
2667 if trailer_video_id:
2668 return self.url_result(
2669 trailer_video_id, self.ie_key(), trailer_video_id)
2670
2671 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2672 if webpage else (lambda x: None))
2673
2674 video_details = traverse_obj(
2675 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2676 microformats = traverse_obj(
2677 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2678 expected_type=dict, default=[])
2679 video_title = (
2680 get_first(video_details, 'title')
2681 or self._get_text(microformats, (..., 'title'))
2682 or search_meta(['og:title', 'twitter:title', 'title']))
2683 video_description = get_first(video_details, 'shortDescription')
2684
d89257f3 2685 multifeed_metadata_list = get_first(
2686 player_responses,
2687 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2688 expected_type=str)
2689 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
2690 if self.get_param('noplaylist'):
11f9be09 2691 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
d89257f3 2692 else:
2693 entries = []
2694 feed_ids = []
2695 for feed in multifeed_metadata_list.split(','):
2696 # Unquote should take place before split on comma (,) since textual
2697 # fields may contain comma as well (see
2698 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2699 feed_data = compat_parse_qs(
2700 compat_urllib_parse_unquote_plus(feed))
2701
2702 def feed_entry(name):
2703 return try_get(
2704 feed_data, lambda x: x[name][0], compat_str)
2705
2706 feed_id = feed_entry('id')
2707 if not feed_id:
2708 continue
2709 feed_title = feed_entry('title')
2710 title = video_title
2711 if feed_title:
2712 title += ' (%s)' % feed_title
2713 entries.append({
2714 '_type': 'url_transparent',
2715 'ie_key': 'Youtube',
2716 'url': smuggle_url(
2717 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2718 {'force_singlefeed': True}),
2719 'title': title,
2720 })
2721 feed_ids.append(feed_id)
2722 self.to_screen(
2723 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2724 % (', '.join(feed_ids), video_id))
2725 return self.playlist_result(
2726 entries, video_id, video_title, video_description)
11f9be09 2727
7ea65411 2728 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
11f9be09 2729 is_live = get_first(video_details, 'isLive')
7ea65411 2730 if is_live is None:
2731 is_live = get_first(live_broadcast_details, 'isLiveNow')
11f9be09 2732
2733 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2734 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
bf1317d2 2735
545cc85d 2736 if not formats:
11f9be09 2737 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 2738 self.report_drm(video_id)
11f9be09 2739 pemr = get_first(
2740 playability_statuses,
2741 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2742 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2743 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 2744 if subreason:
545cc85d 2745 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 2746 countries = get_first(microformats, 'availableCountries')
545cc85d 2747 if not countries:
2748 regions_allowed = search_meta('regionsAllowed')
2749 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2750 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 2751 reason += f'. {subreason}'
545cc85d 2752 if reason:
b7da73eb 2753 self.raise_no_formats(reason, expected=True)
bf1317d2 2754
11f9be09 2755 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 2756 if not keywords and webpage:
2757 keywords = [
2758 unescapeHTML(m.group('content'))
2759 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2760 for keyword in keywords:
2761 if keyword.startswith('yt:stretch='):
201c1459 2762 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2763 if mobj:
2764 # NB: float is intentional for forcing float division
2765 w, h = (float(v) for v in mobj.groups())
2766 if w > 0 and h > 0:
2767 ratio = w / h
2768 for f in formats:
2769 if f.get('vcodec') != 'none':
2770 f['stretched_ratio'] = ratio
2771 break
6449cd80 2772
545cc85d 2773 thumbnails = []
11f9be09 2774 thumbnail_dicts = traverse_obj(
2775 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2776 expected_type=dict, default=[])
2777 for thumbnail in thumbnail_dicts:
2778 thumbnail_url = thumbnail.get('url')
2779 if not thumbnail_url:
2780 continue
2781 # Sometimes youtube gives a wrong thumbnail URL. See:
2782 # https://github.com/yt-dlp/yt-dlp/issues/233
2783 # https://github.com/ytdl-org/youtube-dl/issues/28023
2784 if 'maxresdefault' in thumbnail_url:
2785 thumbnail_url = thumbnail_url.split('?')[0]
2786 thumbnails.append({
2787 'url': thumbnail_url,
2788 'height': int_or_none(thumbnail.get('height')),
2789 'width': int_or_none(thumbnail.get('width')),
2790 })
ff2751ac 2791 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2792 if thumbnail_url:
2793 thumbnails.append({
2794 'url': thumbnail_url,
ff2751ac 2795 })
fccf5021 2796 original_thumbnails = thumbnails.copy()
2797
0ba692ac 2798 # The best resolution thumbnails sometimes does not appear in the webpage
2799 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 2800 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
e820fbaa 2801 thumbnail_names = [
2802 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
cca80fe6 2803 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2804 'mqdefault', 'mq1', 'mq2', 'mq3',
2805 'default', '1', '2', '3'
2806 ]
cca80fe6 2807 n_thumbnail_names = len(thumbnail_names)
0ba692ac 2808 thumbnails.extend({
2809 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2810 video_id=video_id, name=name, ext=ext,
2811 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 2812 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 2813 for thumb in thumbnails:
cca80fe6 2814 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 2815 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 2816 self._remove_duplicate_formats(thumbnails)
fccf5021 2817 self._downloader._sort_thumbnails(original_thumbnails)
545cc85d 2818
7ea65411 2819 category = get_first(microformats, 'category') or search_meta('genre')
2820 channel_id = str_or_none(
2821 get_first(video_details, 'channelId')
2822 or get_first(microformats, 'externalChannelId')
2823 or search_meta('channelId'))
2824 duration = int_or_none(
2825 get_first(video_details, 'lengthSeconds')
2826 or get_first(microformats, 'lengthSeconds')
2827 or parse_duration(search_meta('duration'))) or None
2828 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2829
2830 live_content = get_first(video_details, 'isLiveContent')
2831 is_upcoming = get_first(video_details, 'isUpcoming')
2832 if is_live is None:
2833 if is_upcoming or live_content is False:
2834 is_live = False
2835 if is_upcoming is None and (live_content or is_live):
2836 is_upcoming = False
2837 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2838 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2839 if not duration and live_endtime and live_starttime:
2840 duration = live_endtime - live_starttime
2841
720c3099 2842 formats.extend(self._extract_storyboard(player_responses, duration))
2843
2844 # Source is given priority since formats that throttle are given lower source_preference
2845 # When throttling issue is fully fixed, remove this
2846 self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
2847
545cc85d 2848 info = {
2849 'id': video_id,
2850 'title': self._live_title(video_title) if is_live else video_title,
2851 'formats': formats,
2852 'thumbnails': thumbnails,
fccf5021 2853 # The best thumbnail that we are sure exists. Prevents unnecessary
2854 # URL checking if user don't care about getting the best possible thumbnail
2855 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
545cc85d 2856 'description': video_description,
2857 'upload_date': unified_strdate(
11f9be09 2858 get_first(microformats, 'uploadDate')
545cc85d 2859 or search_meta('uploadDate')),
11f9be09 2860 'uploader': get_first(video_details, 'author'),
545cc85d 2861 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2862 'uploader_url': owner_profile_url,
2863 'channel_id': channel_id,
11f9be09 2864 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
545cc85d 2865 'duration': duration,
2866 'view_count': int_or_none(
11f9be09 2867 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 2868 or search_meta('interactionCount')),
11f9be09 2869 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 2870 'age_limit': 18 if (
11f9be09 2871 get_first(microformats, 'isFamilySafe') is False
545cc85d 2872 or search_meta('isFamilyFriendly') == 'false'
2873 or search_meta('og:restrictions:age') == '18+') else 0,
2874 'webpage_url': webpage_url,
2875 'categories': [category] if category else None,
2876 'tags': keywords,
11f9be09 2877 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 2878 'is_live': is_live,
2879 'was_live': (False if is_live or is_upcoming or live_content is False
2880 else None if is_live is None or is_upcoming is None
2881 else live_content),
2882 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2883 'release_timestamp': live_starttime,
545cc85d 2884 }
b477fc13 2885
3944e7af 2886 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 2887 if pctr:
ecdc9049 2888 def get_lang_code(track):
2889 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
2890 or track.get('languageCode'))
2891
2892 # Converted into dicts to remove duplicates
2893 captions = {
2894 get_lang_code(sub): sub
2895 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2896 translation_languages = {
2897 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
2898 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2899
774d79cc 2900 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2901 lang_subs = container.setdefault(lang_code, [])
545cc85d 2902 for fmt in self._SUBTITLE_FORMATS:
2903 query.update({
2904 'fmt': fmt,
2905 })
2906 lang_subs.append({
2907 'ext': fmt,
2908 'url': update_url_query(base_url, query),
774d79cc 2909 'name': sub_name,
545cc85d 2910 })
7e72694b 2911
ecdc9049 2912 subtitles, automatic_captions = {}, {}
2913 for lang_code, caption_track in captions.items():
2914 base_url = caption_track.get('baseUrl')
545cc85d 2915 if not base_url:
2916 continue
ecdc9049 2917 lang_name = self._get_text(caption_track, 'name', max_runs=1)
545cc85d 2918 if caption_track.get('kind') != 'asr':
545cc85d 2919 if not lang_code:
2920 continue
2921 process_language(
ecdc9049 2922 subtitles, base_url, lang_code, lang_name, {})
2923 if not caption_track.get('isTranslatable'):
2924 continue
3944e7af 2925 for trans_code, trans_name in translation_languages.items():
2926 if not trans_code:
545cc85d 2927 continue
ecdc9049 2928 if caption_track.get('kind') != 'asr':
2929 trans_code += f'-{lang_code}'
2930 trans_name += format_field(lang_name, template=' from %s')
545cc85d 2931 process_language(
ecdc9049 2932 automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
2933 info['automatic_captions'] = automatic_captions
2934 info['subtitles'] = subtitles
7e72694b 2935
545cc85d 2936 parsed_url = compat_urllib_parse_urlparse(url)
2937 for component in [parsed_url.fragment, parsed_url.query]:
2938 query = compat_parse_qs(component)
2939 for k, v in query.items():
2940 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2941 d_k += '_time'
2942 if d_k not in info and k in s_ks:
2943 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2944
2945 # Youtube Music Auto-generated description
822b9d9c 2946 if video_description:
38d70284 2947 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2948 if mobj:
822b9d9c
RA
2949 release_year = mobj.group('release_year')
2950 release_date = mobj.group('release_date')
2951 if release_date:
2952 release_date = release_date.replace('-', '')
2953 if not release_year:
545cc85d 2954 release_year = release_date[:4]
2955 info.update({
2956 'album': mobj.group('album'.strip()),
2957 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2958 'track': mobj.group('track').strip(),
2959 'release_date': release_date,
cc2db878 2960 'release_year': int_or_none(release_year),
545cc85d 2961 })
7e72694b 2962
545cc85d 2963 initial_data = None
2964 if webpage:
2965 initial_data = self._extract_yt_initial_variable(
2966 webpage, self._YT_INITIAL_DATA_RE, video_id,
2967 'yt initial data')
2968 if not initial_data:
99e9e001 2969 query = {'videoId': video_id}
2970 query.update(self._get_checkok_params())
109dd3b2 2971 initial_data = self._extract_response(
2972 item_id=video_id, ep='next', fatal=False,
99e9e001 2973 ytcfg=master_ytcfg, query=query,
2974 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 2975 note='Downloading initial data API JSON')
545cc85d 2976
c60ee3a2 2977 try:
2978 # This will error if there is no livechat
2979 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
ecdc9049 2980 info.setdefault('subtitles', {})['live_chat'] = [{
c60ee3a2 2981 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2982 'video_id': video_id,
2983 'ext': 'json',
f6745c49 2984 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 2985 }]
2986 except (KeyError, IndexError, TypeError):
2987 pass
545cc85d 2988
2989 if initial_data:
7c365c21 2990 info['chapters'] = (
2991 self._extract_chapters_from_json(initial_data, duration)
2992 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2993 or None)
545cc85d 2994
2995 contents = try_get(
2996 initial_data,
2997 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2998 list) or []
2999 for content in contents:
3000 vpir = content.get('videoPrimaryInfoRenderer')
3001 if vpir:
3002 stl = vpir.get('superTitleLink')
3003 if stl:
fe93e2c4 3004 stl = self._get_text(stl)
545cc85d 3005 if try_get(
3006 vpir,
3007 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3008 info['location'] = stl
3009 else:
3010 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3011 if mobj:
3012 info.update({
3013 'series': mobj.group(1),
3014 'season_number': int(mobj.group(2)),
3015 'episode_number': int(mobj.group(3)),
3016 })
3017 for tlb in (try_get(
3018 vpir,
3019 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3020 list) or []):
3021 tbr = tlb.get('toggleButtonRenderer') or {}
3022 for getter, regex in [(
3023 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3024 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3025 lambda x: x['accessibility'],
3026 lambda x: x['accessibilityData']['accessibilityData'],
3027 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3028 label = (try_get(tbr, getter, dict) or {}).get('label')
3029 if label:
3030 mobj = re.match(regex, label)
3031 if mobj:
3032 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3033 break
3034 sbr_tooltip = try_get(
3035 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3036 if sbr_tooltip:
3037 like_count, dislike_count = sbr_tooltip.split(' / ')
3038 info.update({
3039 'like_count': str_to_int(like_count),
3040 'dislike_count': str_to_int(dislike_count),
3041 })
3042 vsir = content.get('videoSecondaryInfoRenderer')
3043 if vsir:
052e1350 3044 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
545cc85d 3045 rows = try_get(
3046 vsir,
3047 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3048 list) or []
3049 multiple_songs = False
3050 for row in rows:
3051 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3052 multiple_songs = True
3053 break
3054 for row in rows:
3055 mrr = row.get('metadataRowRenderer') or {}
3056 mrr_title = mrr.get('title')
3057 if not mrr_title:
3058 continue
052e1350 3059 mrr_title = self._get_text(mrr, 'title')
3060 mrr_contents_text = self._get_text(mrr, ('contents', 0))
545cc85d 3061 if mrr_title == 'License':
3062 info['license'] = mrr_contents_text
3063 elif not multiple_songs:
3064 if mrr_title == 'Album':
3065 info['album'] = mrr_contents_text
3066 elif mrr_title == 'Artist':
3067 info['artist'] = mrr_contents_text
3068 elif mrr_title == 'Song':
3069 info['track'] = mrr_contents_text
3070
3071 fallbacks = {
3072 'channel': 'uploader',
3073 'channel_id': 'uploader_id',
3074 'channel_url': 'uploader_url',
3075 }
3076 for to, frm in fallbacks.items():
3077 if not info.get(to):
3078 info[to] = info.get(frm)
3079
3080 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3081 v = info.get(s_k)
3082 if v:
3083 info[d_k] = v
b84071c0 3084
11f9be09 3085 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3086 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3087 is_membersonly = None
b28f8d24 3088 is_premium = None
c224251a
M
3089 if initial_data and is_private is not None:
3090 is_membersonly = False
b28f8d24 3091 is_premium = False
47193e02 3092 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3093 badge_labels = set()
3094 for content in contents:
3095 if not isinstance(content, dict):
3096 continue
3097 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3098 for badge_label in badge_labels:
3099 if badge_label.lower() == 'members only':
3100 is_membersonly = True
3101 elif badge_label.lower() == 'premium':
3102 is_premium = True
3103 elif badge_label.lower() == 'unlisted':
3104 is_unlisted = True
c224251a 3105
c224251a
M
3106 info['availability'] = self._availability(
3107 is_private=is_private,
b28f8d24 3108 needs_premium=is_premium,
c224251a
M
3109 needs_subscription=is_membersonly,
3110 needs_auth=info['age_limit'] >= 18,
3111 is_unlisted=None if is_private is None else is_unlisted)
3112
a2160aa4 3113 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3114
11f9be09 3115 self.mark_watched(video_id, player_responses)
d77ab8e2 3116
545cc85d 3117 return info
c5e8d7af 3118
5f6a1245 3119
8bdd16b4 3120class YoutubeTabIE(YoutubeBaseInfoExtractor):
96565c7e 3121 IE_DESC = 'YouTube Tabs'
70d5c17b 3122 _VALID_URL = r'''(?x)
3123 https?://
3124 (?:\w+\.)?
3125 (?:
3126 youtube(?:kids)?\.com|
d9190e44 3127 %(invidious)s
70d5c17b 3128 )/
3129 (?:
fe03a6cd 3130 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 3131 (?P<not_channel>
9ba5705a 3132 feed/|hashtag/|
70d5c17b 3133 (?:playlist|watch)\?.*?\blist=
3134 )|
d9190e44 3135 (?!(?:%(reserved_names)s)\b) # Direct URLs
70d5c17b 3136 )
3137 (?P<id>[^/?\#&]+)
d9190e44
RH
3138 ''' % {
3139 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
3140 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
3141 }
8bdd16b4 3142 IE_NAME = 'youtube:tab'
3143
81127aa5 3144 _TESTS = [{
da692b79 3145 'note': 'playlists, multipage',
8bdd16b4 3146 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3147 'playlist_mincount': 94,
3148 'info_dict': {
3149 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3150 'title': 'Игорь Клейнер - Playlists',
3151 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3152 'uploader': 'Игорь Клейнер',
3153 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 3154 },
3155 }, {
da692b79 3156 'note': 'playlists, multipage, different order',
8bdd16b4 3157 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3158 'playlist_mincount': 94,
3159 'info_dict': {
3160 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3161 'title': 'Игорь Клейнер - Playlists',
3162 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3163 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3164 'uploader': 'Игорь Клейнер',
8bdd16b4 3165 },
201c1459 3166 }, {
da692b79 3167 'note': 'playlists, series',
201c1459 3168 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3169 'playlist_mincount': 5,
3170 'info_dict': {
3171 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3172 'title': '3Blue1Brown - Playlists',
3173 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 3174 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3175 'uploader': '3Blue1Brown',
201c1459 3176 },
8bdd16b4 3177 }, {
da692b79 3178 'note': 'playlists, singlepage',
8bdd16b4 3179 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3180 'playlist_mincount': 4,
3181 'info_dict': {
3182 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3183 'title': 'ThirstForScience - Playlists',
3184 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 3185 'uploader': 'ThirstForScience',
3186 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 3187 }
3188 }, {
3189 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3190 'only_matching': True,
3191 }, {
da692b79 3192 'note': 'basic, single video playlist',
0e30a7b9 3193 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3194 'info_dict': {
0e30a7b9 3195 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3196 'uploader': 'Sergey M.',
3197 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3198 'title': 'youtube-dl public playlist',
81127aa5 3199 },
0e30a7b9 3200 'playlist_count': 1,
9291475f 3201 }, {
da692b79 3202 'note': 'empty playlist',
0e30a7b9 3203 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3204 'info_dict': {
0e30a7b9 3205 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3206 'uploader': 'Sergey M.',
3207 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3208 'title': 'youtube-dl empty playlist',
9291475f
PH
3209 },
3210 'playlist_count': 0,
3211 }, {
da692b79 3212 'note': 'Home tab',
8bdd16b4 3213 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3214 'info_dict': {
8bdd16b4 3215 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3216 'title': 'lex will - Home',
3217 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3218 'uploader': 'lex will',
3219 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3220 },
8bdd16b4 3221 'playlist_mincount': 2,
9291475f 3222 }, {
da692b79 3223 'note': 'Videos tab',
8bdd16b4 3224 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3225 'info_dict': {
8bdd16b4 3226 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3227 'title': 'lex will - Videos',
3228 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3229 'uploader': 'lex will',
3230 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3231 },
8bdd16b4 3232 'playlist_mincount': 975,
9291475f 3233 }, {
da692b79 3234 'note': 'Videos tab, sorted by popular',
8bdd16b4 3235 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3236 'info_dict': {
8bdd16b4 3237 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3238 'title': 'lex will - Videos',
3239 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3240 'uploader': 'lex will',
3241 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3242 },
8bdd16b4 3243 'playlist_mincount': 199,
9291475f 3244 }, {
da692b79 3245 'note': 'Playlists tab',
8bdd16b4 3246 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3247 'info_dict': {
8bdd16b4 3248 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3249 'title': 'lex will - Playlists',
3250 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3251 'uploader': 'lex will',
3252 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3253 },
8bdd16b4 3254 'playlist_mincount': 17,
ac7553d0 3255 }, {
da692b79 3256 'note': 'Community tab',
8bdd16b4 3257 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3258 'info_dict': {
8bdd16b4 3259 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3260 'title': 'lex will - Community',
3261 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3262 'uploader': 'lex will',
3263 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3264 },
3265 'playlist_mincount': 18,
87dadd45 3266 }, {
da692b79 3267 'note': 'Channels tab',
8bdd16b4 3268 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3269 'info_dict': {
8bdd16b4 3270 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3271 'title': 'lex will - Channels',
3272 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3273 'uploader': 'lex will',
3274 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3275 },
deaec5af 3276 'playlist_mincount': 12,
cd684175 3277 }, {
3278 'note': 'Search tab',
3279 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3280 'playlist_mincount': 40,
3281 'info_dict': {
3282 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3283 'title': '3Blue1Brown - Search - linear algebra',
3284 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3285 'uploader': '3Blue1Brown',
3286 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3287 },
6b08cdf6 3288 }, {
a0566bbf 3289 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3290 'only_matching': True,
3291 }, {
a0566bbf 3292 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3293 'only_matching': True,
3294 }, {
a0566bbf 3295 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3296 'only_matching': True,
3297 }, {
3298 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3299 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3300 'info_dict': {
3301 'title': '29C3: Not my department',
3302 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3303 'uploader': 'Christiaan008',
3304 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3305 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3306 },
3307 'playlist_count': 96,
3308 }, {
3309 'note': 'Large playlist',
3310 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3311 'info_dict': {
8bdd16b4 3312 'title': 'Uploads from Cauchemar',
3313 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3314 'uploader': 'Cauchemar',
3315 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3316 },
8bdd16b4 3317 'playlist_mincount': 1123,
3318 }, {
da692b79 3319 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3320 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3321 'only_matching': True,
4b7df0d3
JMF
3322 }, {
3323 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3324 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3325 'info_dict': {
acf757f4
PH
3326 'title': 'Uploads from Interstellar Movie',
3327 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3328 'uploader': 'Interstellar Movie',
8bdd16b4 3329 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3330 },
481cc733 3331 'playlist_mincount': 21,
358de58c 3332 }, {
3333 'note': 'Playlist with "show unavailable videos" button',
3334 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3335 'info_dict': {
3336 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3337 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3338 'uploader': 'Phim Siêu Nhân Nhật Bản',
3339 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3340 },
da692b79 3341 'playlist_mincount': 200,
5d342002 3342 }, {
da692b79 3343 'note': 'Playlist with unavailable videos in page 7',
5d342002 3344 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3345 'info_dict': {
3346 'title': 'Uploads from BlankTV',
3347 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3348 'uploader': 'BlankTV',
3349 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3350 },
da692b79 3351 'playlist_mincount': 1000,
8bdd16b4 3352 }, {
da692b79 3353 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3354 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3355 'info_dict': {
3356 'title': 'Data Analysis with Dr Mike Pound',
3357 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3358 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3359 'uploader': 'Computerphile',
deaec5af 3360 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3361 },
3362 'playlist_mincount': 11,
3363 }, {
a0566bbf 3364 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3365 'only_matching': True,
dacb3a86 3366 }, {
da692b79 3367 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3368 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3369 'info_dict': {
3370 'id': 'FqZTN594JQw',
3371 'ext': 'webm',
3372 'title': "Smiley's People 01 detective, Adventure Series, Action",
3373 'uploader': 'STREEM',
3374 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3375 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3376 'upload_date': '20150526',
3377 'license': 'Standard YouTube License',
3378 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3379 'categories': ['People & Blogs'],
3380 'tags': list,
dbdaaa23 3381 'view_count': int,
dacb3a86
S
3382 'like_count': int,
3383 'dislike_count': int,
3384 },
3385 'params': {
3386 'skip_download': True,
3387 },
13a75688 3388 'skip': 'This video is not available.',
dacb3a86 3389 'add_ie': [YoutubeIE.ie_key()],
481cc733 3390 }, {
8bdd16b4 3391 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3392 'only_matching': True,
66b48727 3393 }, {
8bdd16b4 3394 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3395 'only_matching': True,
a0566bbf 3396 }, {
3397 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3398 'info_dict': {
57015a4a 3399 'id': '3yImotZU3tw', # This will keep changing
a0566bbf 3400 'ext': 'mp4',
deaec5af 3401 'title': compat_str,
a0566bbf 3402 'uploader': 'Sky News',
3403 'uploader_id': 'skynews',
3404 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3405 'upload_date': r're:\d{8}',
3406 'description': compat_str,
a0566bbf 3407 'categories': ['News & Politics'],
3408 'tags': list,
3409 'like_count': int,
3410 'dislike_count': int,
3411 },
3412 'params': {
3413 'skip_download': True,
3414 },
da692b79 3415 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3416 }, {
3417 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3418 'info_dict': {
3419 'id': 'a48o2S1cPoo',
3420 'ext': 'mp4',
3421 'title': 'The Young Turks - Live Main Show',
3422 'uploader': 'The Young Turks',
3423 'uploader_id': 'TheYoungTurks',
3424 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3425 'upload_date': '20150715',
3426 'license': 'Standard YouTube License',
3427 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3428 'categories': ['News & Politics'],
3429 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3430 'like_count': int,
3431 'dislike_count': int,
3432 },
3433 'params': {
3434 'skip_download': True,
3435 },
3436 'only_matching': True,
3437 }, {
3438 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3439 'only_matching': True,
3440 }, {
3441 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3442 'only_matching': True,
09f1580e 3443 }, {
3444 'note': 'A channel that is not live. Should raise error',
3445 'url': 'https://www.youtube.com/user/numberphile/live',
3446 'only_matching': True,
3d3dddc9 3447 }, {
3448 'url': 'https://www.youtube.com/feed/trending',
3449 'only_matching': True,
3450 }, {
3d3dddc9 3451 'url': 'https://www.youtube.com/feed/library',
3452 'only_matching': True,
3453 }, {
3d3dddc9 3454 'url': 'https://www.youtube.com/feed/history',
3455 'only_matching': True,
3456 }, {
3d3dddc9 3457 'url': 'https://www.youtube.com/feed/subscriptions',
3458 'only_matching': True,
3459 }, {
3d3dddc9 3460 'url': 'https://www.youtube.com/feed/watch_later',
3461 'only_matching': True,
3462 }, {
ac56cf38 3463 'note': 'Recommended - redirects to home page.',
3d3dddc9 3464 'url': 'https://www.youtube.com/feed/recommended',
3465 'only_matching': True,
29f7c58a 3466 }, {
da692b79 3467 'note': 'inline playlist with not always working continuations',
29f7c58a 3468 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3469 'only_matching': True,
29f7c58a 3470 }, {
3471 'url': 'https://www.youtube.com/course',
3472 'only_matching': True,
3473 }, {
3474 'url': 'https://www.youtube.com/zsecurity',
3475 'only_matching': True,
3476 }, {
3477 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3478 'only_matching': True,
3479 }, {
3480 'url': 'https://www.youtube.com/TheYoungTurks/live',
3481 'only_matching': True,
39ed931e 3482 }, {
3483 'url': 'https://www.youtube.com/hashtag/cctv9',
3484 'info_dict': {
3485 'id': 'cctv9',
3486 'title': '#cctv9',
3487 },
3488 'playlist_mincount': 350,
201c1459 3489 }, {
3490 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3491 'only_matching': True,
9297939e 3492 }, {
da692b79 3493 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3494 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3495 'only_matching': True
fe03a6cd 3496 }, {
3497 'note': '/browse/ should redirect to /channel/',
3498 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3499 'only_matching': True
3500 }, {
3501 'note': 'VLPL, should redirect to playlist?list=PL...',
3502 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3503 'info_dict': {
3504 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3505 'uploader': 'NoCopyrightSounds',
3506 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3507 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3508 'title': 'NCS Releases',
3509 },
3510 'playlist_mincount': 166,
18db7548 3511 }, {
3512 'note': 'Topic, should redirect to playlist?list=UU...',
3513 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3514 'info_dict': {
3515 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3516 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3517 'title': 'Uploads from Royalty Free Music - Topic',
3518 'uploader': 'Royalty Free Music - Topic',
3519 },
3520 'expected_warnings': [
3521 'A channel/user page was given',
3522 'The URL does not have a videos tab',
3523 ],
3524 'playlist_mincount': 101,
3525 }, {
3526 'note': 'Topic without a UU playlist',
3527 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3528 'info_dict': {
3529 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3530 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3531 },
3532 'expected_warnings': [
3533 'A channel/user page was given',
3534 'The URL does not have a videos tab',
3535 'Falling back to channel URL',
3536 ],
3537 'playlist_mincount': 9,
abcdd12b 3538 }, {
3539 'note': 'Youtube music Album',
3540 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3541 'info_dict': {
3542 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3543 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3544 },
3545 'playlist_count': 50,
47193e02 3546 }, {
3547 'note': 'unlisted single video playlist',
3548 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3549 'info_dict': {
3550 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3551 'uploader': 'colethedj',
3552 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3553 'title': 'yt-dlp unlisted playlist test',
3554 'availability': 'unlisted'
3555 },
3556 'playlist_count': 1,
ac56cf38 3557 }, {
3558 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
3559 'url': 'https://www.youtube.com/feed/recommended',
3560 'info_dict': {
3561 'id': 'recommended',
3562 'title': 'recommended',
3563 },
3564 'playlist_mincount': 50,
3565 'params': {
3566 'skip_download': True,
3567 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
3568 },
3569 }, {
3570 'note': 'API Fallback: /videos tab, sorted by oldest first',
3571 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
3572 'info_dict': {
3573 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
3574 'title': 'Cody\'sLab - Videos',
3575 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
3576 'uploader': 'Cody\'sLab',
3577 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
3578 },
3579 'playlist_mincount': 650,
3580 'params': {
3581 'skip_download': True,
3582 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
3583 },
3584 }, {
3585 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
3586 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3587 'info_dict': {
3588 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3589 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3590 'title': 'Uploads from Royalty Free Music - Topic',
3591 'uploader': 'Royalty Free Music - Topic',
3592 },
3593 'expected_warnings': [
3594 'A channel/user page was given',
3595 'The URL does not have a videos tab',
3596 ],
3597 'playlist_mincount': 101,
3598 'params': {
3599 'skip_download': True,
3600 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
3601 },
29f7c58a 3602 }]
3603
3604 @classmethod
3605 def suitable(cls, url):
3606 return False if YoutubeIE.suitable(url) else super(
3607 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3608
3609 def _extract_channel_id(self, webpage):
3610 channel_id = self._html_search_meta(
3611 'channelId', webpage, 'channel id', default=None)
3612 if channel_id:
3613 return channel_id
3614 channel_url = self._html_search_meta(
3615 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3616 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3617 'twitter:app:url:googleplay'), webpage, 'channel url')
3618 return self._search_regex(
3619 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3620 channel_url, 'channel id')
15f6397c 3621
8bdd16b4 3622 @staticmethod
cd7c66cf 3623 def _extract_basic_item_renderer(item):
3624 # Modified from _extract_grid_item_renderer
201c1459 3625 known_basic_renderers = (
3626 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3627 )
3628 for key, renderer in item.items():
201c1459 3629 if not isinstance(renderer, dict):
cd7c66cf 3630 continue
201c1459 3631 elif key in known_basic_renderers:
3632 return renderer
3633 elif key.startswith('grid') and key.endswith('Renderer'):
3634 return renderer
8bdd16b4 3635
8bdd16b4 3636 def _grid_entries(self, grid_renderer):
3637 for item in grid_renderer['items']:
3638 if not isinstance(item, dict):
39b62db1 3639 continue
cd7c66cf 3640 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3641 if not isinstance(renderer, dict):
3642 continue
052e1350 3643 title = self._get_text(renderer, 'title')
fe93e2c4 3644
8bdd16b4 3645 # playlist
3646 playlist_id = renderer.get('playlistId')
3647 if playlist_id:
3648 yield self.url_result(
3649 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3650 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3651 video_title=title)
201c1459 3652 continue
8bdd16b4 3653 # video
3654 video_id = renderer.get('videoId')
3655 if video_id:
3656 yield self._extract_video(renderer)
201c1459 3657 continue
8bdd16b4 3658 # channel
3659 channel_id = renderer.get('channelId')
3660 if channel_id:
8bdd16b4 3661 yield self.url_result(
3662 'https://www.youtube.com/channel/%s' % channel_id,
3663 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3664 continue
3665 # generic endpoint URL support
3666 ep_url = urljoin('https://www.youtube.com/', try_get(
3667 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3668 compat_str))
3669 if ep_url:
3670 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3671 if ie.suitable(ep_url):
3672 yield self.url_result(
3673 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3674 break
8bdd16b4 3675
3d3dddc9 3676 def _shelf_entries_from_content(self, shelf_renderer):
3677 content = shelf_renderer.get('content')
3678 if not isinstance(content, dict):
8bdd16b4 3679 return
cd7c66cf 3680 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3681 if renderer:
3682 # TODO: add support for nested playlists so each shelf is processed
3683 # as separate playlist
3684 # TODO: this includes only first N items
3685 for entry in self._grid_entries(renderer):
3686 yield entry
3687 renderer = content.get('horizontalListRenderer')
3688 if renderer:
3689 # TODO
3690 pass
8bdd16b4 3691
29f7c58a 3692 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3693 ep = try_get(
3694 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3695 compat_str)
3696 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3697 if shelf_url:
29f7c58a 3698 # Skipping links to another channels, note that checking for
3699 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3700 # will not work
3701 if skip_channels and '/channels?' in shelf_url:
3702 return
052e1350 3703 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 3704 yield self.url_result(shelf_url, video_title=title)
3705 # Shelf may not contain shelf URL, fallback to extraction from content
3706 for entry in self._shelf_entries_from_content(shelf_renderer):
3707 yield entry
c5e8d7af 3708
8bdd16b4 3709 def _playlist_entries(self, video_list_renderer):
3710 for content in video_list_renderer['contents']:
3711 if not isinstance(content, dict):
3712 continue
3713 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3714 if not isinstance(renderer, dict):
3715 continue
3716 video_id = renderer.get('videoId')
3717 if not video_id:
3718 continue
3719 yield self._extract_video(renderer)
07aeced6 3720
3462ffa8 3721 def _rich_entries(self, rich_grid_renderer):
3722 renderer = try_get(
70d5c17b 3723 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3724 video_id = renderer.get('videoId')
3725 if not video_id:
3726 return
3727 yield self._extract_video(renderer)
3728
8bdd16b4 3729 def _video_entry(self, video_renderer):
3730 video_id = video_renderer.get('videoId')
3731 if video_id:
3732 return self._extract_video(video_renderer)
dacb3a86 3733
8bdd16b4 3734 def _post_thread_entries(self, post_thread_renderer):
3735 post_renderer = try_get(
3736 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3737 if not post_renderer:
3738 return
3739 # video attachment
3740 video_renderer = try_get(
895b0931 3741 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3742 video_id = video_renderer.get('videoId')
3743 if video_id:
3744 entry = self._extract_video(video_renderer)
8bdd16b4 3745 if entry:
3746 yield entry
895b0931 3747 # playlist attachment
3748 playlist_id = try_get(
3749 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3750 if playlist_id:
3751 yield self.url_result(
e28f1c0a 3752 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3753 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3754 # inline video links
3755 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3756 for run in runs:
3757 if not isinstance(run, dict):
3758 continue
3759 ep_url = try_get(
3760 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3761 if not ep_url:
3762 continue
3763 if not YoutubeIE.suitable(ep_url):
3764 continue
3765 ep_video_id = YoutubeIE._match_id(ep_url)
3766 if video_id == ep_video_id:
3767 continue
895b0931 3768 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3769
8bdd16b4 3770 def _post_thread_continuation_entries(self, post_thread_continuation):
3771 contents = post_thread_continuation.get('contents')
3772 if not isinstance(contents, list):
3773 return
3774 for content in contents:
3775 renderer = content.get('backstagePostThreadRenderer')
3776 if not isinstance(renderer, dict):
3777 continue
3778 for entry in self._post_thread_entries(renderer):
3779 yield entry
07aeced6 3780
39ed931e 3781 r''' # unused
3782 def _rich_grid_entries(self, contents):
3783 for content in contents:
3784 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3785 if video_renderer:
3786 entry = self._video_entry(video_renderer)
3787 if entry:
3788 yield entry
3789 '''
ac56cf38 3790 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
3462ffa8 3791
70d5c17b 3792 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3793 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3794 for content in contents:
3795 if not isinstance(content, dict):
8bdd16b4 3796 continue
70d5c17b 3797 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3798 if not is_renderer:
70d5c17b 3799 renderer = content.get('richItemRenderer')
3462ffa8 3800 if renderer:
3801 for entry in self._rich_entries(renderer):
3802 yield entry
3803 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3804 continue
3462ffa8 3805 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3806 for isr_content in isr_contents:
3807 if not isinstance(isr_content, dict):
3808 continue
69184e41 3809
3810 known_renderers = {
3811 'playlistVideoListRenderer': self._playlist_entries,
3812 'gridRenderer': self._grid_entries,
3813 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3814 'backstagePostThreadRenderer': self._post_thread_entries,
3815 'videoRenderer': lambda x: [self._video_entry(x)],
3816 }
3817 for key, renderer in isr_content.items():
3818 if key not in known_renderers:
3819 continue
3820 for entry in known_renderers[key](renderer):
3821 if entry:
3822 yield entry
3462ffa8 3823 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3824 break
70d5c17b 3825
3462ffa8 3826 if not continuation_list[0]:
3827 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3828
3829 if not continuation_list[0]:
3830 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3831
ac56cf38 3832 continuation_list = [None] # Python 2 does not support nonlocal
29f7c58a 3833 tab_content = try_get(tab, lambda x: x['content'], dict)
3834 if not tab_content:
3835 return
3462ffa8 3836 parent_renderer = (
29f7c58a 3837 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3838 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3839 for entry in extract_entries(parent_renderer):
3840 yield entry
3462ffa8 3841 continuation = continuation_list[0]
d069eca7 3842
8bdd16b4 3843 for page_num in itertools.count(1):
3844 if not continuation:
3845 break
99e9e001 3846 headers = self.generate_api_headers(
3847 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 3848 response = self._extract_response(
3849 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3850 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3851 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3852
3853 if not response:
8bdd16b4 3854 break
ac56cf38 3855 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
3856 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
3857 visitor_data = self._extract_visitor_data(response) or visitor_data
ebf1b291 3858
69184e41 3859 known_continuation_renderers = {
3860 'playlistVideoListContinuation': self._playlist_entries,
3861 'gridContinuation': self._grid_entries,
3862 'itemSectionContinuation': self._post_thread_continuation_entries,
3863 'sectionListContinuation': extract_entries, # for feeds
3864 }
8bdd16b4 3865 continuation_contents = try_get(
69184e41 3866 response, lambda x: x['continuationContents'], dict) or {}
3867 continuation_renderer = None
3868 for key, value in continuation_contents.items():
3869 if key not in known_continuation_renderers:
3462ffa8 3870 continue
69184e41 3871 continuation_renderer = value
3872 continuation_list = [None]
3873 for entry in known_continuation_renderers[key](continuation_renderer):
3874 yield entry
3875 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3876 break
3877 if continuation_renderer:
3878 continue
c5e8d7af 3879
a1b535bd 3880 known_renderers = {
3881 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3882 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 3883 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 3884 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3885 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3886 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3887 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3888 }
cce889b9 3889 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3890 continuation_items = try_get(
cce889b9 3891 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3892 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3893 video_items_renderer = None
3894 for key, value in continuation_item.items():
3895 if key not in known_renderers:
8bdd16b4 3896 continue
a1b535bd 3897 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3898 continuation_list = [None]
a1b535bd 3899 for entry in known_renderers[key][0](video_items_renderer):
3900 yield entry
9ba5705a 3901 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3902 break
3903 if video_items_renderer:
3904 continue
8bdd16b4 3905 break
9558dcec 3906
8bdd16b4 3907 @staticmethod
3908 def _extract_selected_tab(tabs):
3909 for tab in tabs:
cd684175 3910 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3911 if renderer.get('selected') is True:
3912 return renderer
2b3c2546 3913 else:
8bdd16b4 3914 raise ExtractorError('Unable to find selected tab')
b82f815f 3915
47193e02 3916 @classmethod
3917 def _extract_uploader(cls, data):
8bdd16b4 3918 uploader = {}
47193e02 3919 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3920 owner = try_get(
3921 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3922 if owner:
3923 uploader['uploader'] = owner.get('text')
3924 uploader['uploader_id'] = try_get(
3925 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3926 uploader['uploader_url'] = urljoin(
3927 'https://www.youtube.com/',
3928 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3929 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3930
ac56cf38 3931 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
b60419c5 3932 playlist_id = title = description = channel_url = channel_name = channel_id = None
ac56cf38 3933 thumbnails_list = []
3934 tags = []
b60419c5 3935
8bdd16b4 3936 selected_tab = self._extract_selected_tab(tabs)
3937 renderer = try_get(
3938 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3939 if renderer:
b60419c5 3940 channel_name = renderer.get('title')
3941 channel_url = renderer.get('channelUrl')
3942 channel_id = renderer.get('externalId')
39ed931e 3943 else:
64c0d954 3944 renderer = try_get(
3945 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3946
8bdd16b4 3947 if renderer:
3948 title = renderer.get('title')
ecc97af3 3949 description = renderer.get('description', '')
b60419c5 3950 playlist_id = channel_id
3951 tags = renderer.get('keywords', '').split()
3952 thumbnails_list = (
3953 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3954 or try_get(
47193e02 3955 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3956 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
ff84930c 3957 list)
b60419c5 3958 or [])
3959
3960 thumbnails = []
3961 for t in thumbnails_list:
3962 if not isinstance(t, dict):
3963 continue
3964 thumbnail_url = url_or_none(t.get('url'))
3965 if not thumbnail_url:
3966 continue
3967 thumbnails.append({
3968 'url': thumbnail_url,
3969 'width': int_or_none(t.get('width')),
3970 'height': int_or_none(t.get('height')),
3971 })
3462ffa8 3972 if playlist_id is None:
70d5c17b 3973 playlist_id = item_id
3974 if title is None:
39ed931e 3975 title = (
3976 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3977 or playlist_id)
b60419c5 3978 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3979 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3980 metadata = {
3981 'playlist_id': playlist_id,
3982 'playlist_title': title,
3983 'playlist_description': description,
3984 'uploader': channel_name,
3985 'uploader_id': channel_id,
3986 'uploader_url': channel_url,
3987 'thumbnails': thumbnails,
3988 'tags': tags,
3989 }
47193e02 3990 availability = self._extract_availability(data)
3991 if availability:
3992 metadata['availability'] = availability
b60419c5 3993 if not channel_id:
3994 metadata.update(self._extract_uploader(data))
3995 metadata.update({
3996 'channel': metadata['uploader'],
3997 'channel_id': metadata['uploader_id'],
3998 'channel_url': metadata['uploader_url']})
3999 return self.playlist_result(
d069eca7 4000 self._entries(
ac56cf38 4001 selected_tab, playlist_id, ytcfg,
4002 self._extract_account_syncid(ytcfg, data),
4003 self._extract_visitor_data(data, ytcfg)),
b60419c5 4004 **metadata)
73c4ac2c 4005
ac56cf38 4006 def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
4007 first_id = last_id = response = None
2be71994 4008 for page_num in itertools.count(1):
cd7c66cf 4009 videos = list(self._playlist_entries(playlist))
4010 if not videos:
4011 return
2be71994 4012 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4013 if start >= len(videos):
4014 return
4015 for video in videos[start:]:
4016 if video['id'] == first_id:
4017 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4018 return
4019 yield video
4020 first_id = first_id or videos[0]['id']
4021 last_id = videos[-1]['id']
79360d99 4022 watch_endpoint = try_get(
4023 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
ac56cf38 4024 headers = self.generate_api_headers(
4025 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4026 visitor_data=self._extract_visitor_data(response, data, ytcfg))
79360d99 4027 query = {
4028 'playlistId': playlist_id,
4029 'videoId': watch_endpoint.get('videoId') or last_id,
4030 'index': watch_endpoint.get('index') or len(videos),
4031 'params': watch_endpoint.get('params') or 'OAE%3D'
4032 }
4033 response = self._extract_response(
4034 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 4035 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 4036 check_get_keys='contents'
4037 )
cd7c66cf 4038 playlist = try_get(
79360d99 4039 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 4040
ac56cf38 4041 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
8bdd16b4 4042 title = playlist.get('title') or try_get(
4043 data, lambda x: x['titleText']['simpleText'], compat_str)
4044 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 4045
4046 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 4047 playlist_url = urljoin(url, try_get(
4048 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4049 compat_str))
4050 if playlist_url and playlist_url != url:
4051 return self.url_result(
4052 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4053 video_title=title)
cd7c66cf 4054
8bdd16b4 4055 return self.playlist_result(
ac56cf38 4056 self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
cd7c66cf 4057 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4058
47193e02 4059 def _extract_availability(self, data):
4060 """
4061 Gets the availability of a given playlist/tab.
4062 Note: Unless YouTube tells us explicitly, we do not assume it is public
4063 @param data: response
4064 """
4065 is_private = is_unlisted = None
4066 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4067 badge_labels = self._extract_badges(renderer)
4068
4069 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4070 privacy_dropdown_entries = try_get(
4071 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4072 for renderer_dict in privacy_dropdown_entries:
4073 is_selected = try_get(
4074 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4075 if not is_selected:
4076 continue
052e1350 4077 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
47193e02 4078 if label:
4079 badge_labels.add(label.lower())
4080 break
4081
4082 for badge_label in badge_labels:
4083 if badge_label == 'unlisted':
4084 is_unlisted = True
4085 elif badge_label == 'private':
4086 is_private = True
4087 elif badge_label == 'public':
4088 is_unlisted = is_private = False
4089 return self._availability(is_private, False, False, False, is_unlisted)
4090
4091 @staticmethod
4092 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4093 sidebar_renderer = try_get(
4094 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4095 for item in sidebar_renderer:
4096 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4097 if renderer:
4098 return renderer
4099
ac56cf38 4100 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
358de58c 4101 """
4102 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4103 """
5d342002 4104 browse_id = params = None
47193e02 4105 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4106 if not renderer:
4107 return
4108 menu_renderer = try_get(
4109 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4110 for menu_item in menu_renderer:
4111 if not isinstance(menu_item, dict):
358de58c 4112 continue
47193e02 4113 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4114 text = try_get(
4115 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4116 if not text or text.lower() != 'show unavailable videos':
4117 continue
4118 browse_endpoint = try_get(
4119 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4120 browse_id = browse_endpoint.get('browseId')
4121 params = browse_endpoint.get('params')
4122 break
5d342002 4123
11f9be09 4124 headers = self.generate_api_headers(
99e9e001 4125 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
ac56cf38 4126 visitor_data=self._extract_visitor_data(data, ytcfg))
47193e02 4127 query = {
4128 'params': params or 'wgYCCAA=',
4129 'browseId': browse_id or 'VL%s' % item_id
4130 }
4131 return self._extract_response(
4132 item_id=item_id, headers=headers, query=query,
fe93e2c4 4133 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4134 note='Downloading API JSON with unavailable videos')
358de58c 4135
ac56cf38 4136 def _extract_webpage(self, url, item_id, fatal=True):
a06916d9 4137 retries = self.get_param('extractor_retries', 3)
62bff2c1 4138 count = -1
ac56cf38 4139 webpage = data = last_error = None
14fdfea9 4140 while count < retries:
62bff2c1 4141 count += 1
14fdfea9 4142 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 4143 # See: https://github.com/yt-dlp/yt-dlp/issues/116
ac56cf38 4144 if last_error:
c705177d 4145 self.report_warning('%s. Retrying ...' % last_error)
ac56cf38 4146 try:
4147 webpage = self._download_webpage(
4148 url, item_id,
4149 note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
4150 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4151 except ExtractorError as e:
4152 if isinstance(e.cause, network_exceptions):
4153 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
4154 last_error = error_to_compat_str(e.cause or e.msg)
4155 if count < retries:
4156 continue
4157 if fatal:
4158 raise
4159 self.report_warning(error_to_compat_str(e))
14fdfea9 4160 break
ac56cf38 4161 else:
4162 try:
4163 self._extract_and_report_alerts(data)
4164 except ExtractorError as e:
4165 if fatal:
4166 raise
4167 self.report_warning(error_to_compat_str(e))
4168 break
4169
4170 if dict_get(data, ('contents', 'currentVideoEndpoint')):
4171 break
4172
4173 last_error = 'Incomplete yt initial data received'
4174 if count >= retries:
4175 if fatal:
4176 raise ExtractorError(last_error)
4177 self.report_warning(last_error)
4178 break
4179
cd7c66cf 4180 return webpage, data
4181
ac56cf38 4182 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4183 data = None
4184 if 'webpage' not in self._configuration_arg('skip'):
4185 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4186 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
4187 if not data:
4188 if not ytcfg and self.is_authenticated:
4189 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
4190 if 'authcheck' not in self._configuration_arg('skip') and fatal:
4191 raise ExtractorError(
4192 msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
4193 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4194 expected=True)
4195 self.report_warning(msg, only_once=True)
4196 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4197 return data, ytcfg
4198
4199 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4200 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4201 resolve_response = self._extract_response(
4202 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4203 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4204 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4205 for ep_key, ep in endpoints.items():
4206 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4207 if params:
4208 return self._extract_response(
4209 item_id=item_id, query=params, ep=ep, headers=headers,
4210 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
4211 check_get_keys=('contents', 'currentVideoEndpoint'))
4212 err_note = 'Failed to resolve url (does the playlist exist?)'
4213 if fatal:
4214 raise ExtractorError(err_note, expected=True)
4215 self.report_warning(err_note, item_id)
4216
9297939e 4217 @staticmethod
4218 def _smuggle_data(entries, data):
4219 for entry in entries:
4220 if data:
4221 entry['url'] = smuggle_url(entry['url'], data)
4222 yield entry
4223
cd7c66cf 4224 def _real_extract(self, url):
9297939e 4225 url, smuggled_data = unsmuggle_url(url, {})
4226 if self.is_music_url(url):
4227 smuggled_data['is_music_url'] = True
fe03a6cd 4228 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4229 if info_dict.get('entries'):
4230 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4231 return info_dict
4232
fe03a6cd 4233 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4234
4235 def __real_extract(self, url, smuggled_data):
cd7c66cf 4236 item_id = self._match_id(url)
4237 url = compat_urlparse.urlunparse(
4238 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4239 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4240
fe03a6cd 4241 def get_mobj(url):
4242 mobj = self._url_re.match(url).groupdict()
07cce701 4243 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4244 return mobj
4245
4246 mobj = get_mobj(url)
4247 # Youtube returns incomplete data if tabname is not lower case
4248 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
fe03a6cd 4249 if is_channel:
4250 if smuggled_data.get('is_music_url'):
4251 if item_id[:2] == 'VL':
4252 # Youtube music VL channels have an equivalent playlist
4253 item_id = item_id[2:]
4254 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4255 elif item_id[:2] == 'MP':
ac56cf38 4256 # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
4257 mdata = self._extract_tab_endpoint(
4258 'https://music.youtube.com/channel/%s' % item_id, item_id, default_client='web_music')
4259 murl = traverse_obj(
4260 mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'), get_all=False, expected_type=compat_str)
4261 if not murl:
4262 raise ExtractorError('Failed to resolve album to playlist.')
4263 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
fe03a6cd 4264 elif mobj['channel_type'] == 'browse':
4265 # Youtube music /browse/ should be changed to /channel/
4266 pre = 'https://www.youtube.com/channel/%s' % item_id
4267 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4268 # Home URLs should redirect to /videos/
6a39ee13 4269 self.report_warning(
cd7c66cf 4270 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4271 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4272 tab = '/videos'
4273
4274 url = ''.join((pre, tab, post))
4275 mobj = get_mobj(url)
cd7c66cf 4276
4277 # Handle both video/playlist URLs
201c1459 4278 qs = parse_qs(url)
cd7c66cf 4279 video_id = qs.get('v', [None])[0]
4280 playlist_id = qs.get('list', [None])[0]
4281
fe03a6cd 4282 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4283 if not playlist_id:
fe03a6cd 4284 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4285 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4286 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4287 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4288 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4289 mobj = get_mobj(url)
cd7c66cf 4290
4291 if video_id and playlist_id:
a06916d9 4292 if self.get_param('noplaylist'):
cd7c66cf 4293 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
5e3f2f8f 4294 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4295 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4296
ac56cf38 4297 data, ytcfg = self._extract_data(url, item_id)
14fdfea9 4298
18db7548 4299 tabs = try_get(
4300 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4301 if tabs:
4302 selected_tab = self._extract_selected_tab(tabs)
4303 tab_name = selected_tab.get('title', '')
09f1580e 4304 if 'no-youtube-channel-redirect' not in compat_opts:
4305 if mobj['tab'] == '/live':
4306 # Live tab should have redirected to the video
4307 raise ExtractorError('The channel is not currently live', expected=True)
4308 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4309 if not mobj['not_channel'] and item_id[:2] == 'UC':
4310 # Topic channels don't have /videos. Use the equivalent playlist instead
4311 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4312 pl_id = 'UU%s' % item_id[2:]
4313 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4314 try:
ac56cf38 4315 data, ytcfg, item_id, url = *self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True), pl_id, pl_url
09f1580e 4316 except ExtractorError:
4317 self.report_warning('The playlist gave error. Falling back to channel URL')
4318 else:
4319 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4320
4321 self.write_debug('Final URL: %s' % url)
4322
358de58c 4323 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4324 if 'no-youtube-unavailable-videos' not in compat_opts:
ac56cf38 4325 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
c0ac49bc 4326 self._extract_and_report_alerts(data, only_once=True)
8bdd16b4 4327 tabs = try_get(
4328 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4329 if tabs:
ac56cf38 4330 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
cd7c66cf 4331
8bdd16b4 4332 playlist = try_get(
4333 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4334 if playlist:
ac56cf38 4335 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
cd7c66cf 4336
a0566bbf 4337 video_id = try_get(
4338 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4339 compat_str) or video_id
8bdd16b4 4340 if video_id:
09f1580e 4341 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4342 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
5e3f2f8f 4343 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4344
8bdd16b4 4345 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4346
c5e8d7af 4347
8bdd16b4 4348class YoutubePlaylistIE(InfoExtractor):
96565c7e 4349 IE_DESC = 'YouTube playlists'
8bdd16b4 4350 _VALID_URL = r'''(?x)(?:
4351 (?:https?://)?
4352 (?:\w+\.)?
4353 (?:
4354 (?:
4355 youtube(?:kids)?\.com|
d9190e44 4356 %(invidious)s
8bdd16b4 4357 )
4358 /.*?\?.*?\blist=
4359 )?
4360 (?P<id>%(playlist_id)s)
d9190e44
RH
4361 )''' % {
4362 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
4363 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4364 }
8bdd16b4 4365 IE_NAME = 'youtube:playlist'
cdc628a4 4366 _TESTS = [{
8bdd16b4 4367 'note': 'issue #673',
4368 'url': 'PLBB231211A4F62143',
cdc628a4 4369 'info_dict': {
8bdd16b4 4370 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4371 'id': 'PLBB231211A4F62143',
4372 'uploader': 'Wickydoo',
4373 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 4374 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
8bdd16b4 4375 },
4376 'playlist_mincount': 29,
4377 }, {
4378 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4379 'info_dict': {
4380 'title': 'YDL_safe_search',
4381 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4382 },
4383 'playlist_count': 2,
4384 'skip': 'This playlist is private',
9558dcec 4385 }, {
8bdd16b4 4386 'note': 'embedded',
4387 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4388 'playlist_count': 4,
9558dcec 4389 'info_dict': {
8bdd16b4 4390 'title': 'JODA15',
4391 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4392 'uploader': 'milan',
4393 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4394 }
cdc628a4 4395 }, {
8bdd16b4 4396 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
11f9be09 4397 'playlist_mincount': 654,
8bdd16b4 4398 'info_dict': {
4399 'title': '2018 Chinese New Singles (11/6 updated)',
4400 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4401 'uploader': 'LBK',
4402 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 4403 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
8bdd16b4 4404 }
daa0df9e 4405 }, {
29f7c58a 4406 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4407 'only_matching': True,
4408 }, {
4409 # music album playlist
4410 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4411 'only_matching': True,
4412 }]
4413
4414 @classmethod
4415 def suitable(cls, url):
201c1459 4416 if YoutubeTabIE.suitable(url):
4417 return False
49a57e70 4418 from ..utils import parse_qs
201c1459 4419 qs = parse_qs(url)
4420 if qs.get('v', [None])[0]:
4421 return False
4422 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4423
4424 def _real_extract(self, url):
4425 playlist_id = self._match_id(url)
46953e7e 4426 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4427 url = update_url_query(
4428 'https://www.youtube.com/playlist',
4429 parse_qs(url) or {'list': playlist_id})
4430 if is_music_url:
4431 url = smuggle_url(url, {'is_music_url': True})
4432 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4433
4434
4435class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4436 IE_DESC = 'youtu.be'
29f7c58a 4437 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4438 _TESTS = [{
8bdd16b4 4439 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4440 'info_dict': {
4441 'id': 'yeWKywCrFtk',
4442 'ext': 'mp4',
4443 'title': 'Small Scale Baler and Braiding Rugs',
4444 'uploader': 'Backus-Page House Museum',
4445 'uploader_id': 'backuspagemuseum',
4446 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4447 'upload_date': '20161008',
4448 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4449 'categories': ['Nonprofits & Activism'],
4450 'tags': list,
4451 'like_count': int,
4452 'dislike_count': int,
4453 },
4454 'params': {
4455 'noplaylist': True,
4456 'skip_download': True,
4457 },
39e7107d 4458 }, {
8bdd16b4 4459 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4460 'only_matching': True,
cdc628a4
PH
4461 }]
4462
8bdd16b4 4463 def _real_extract(self, url):
5ad28e7f 4464 mobj = self._match_valid_url(url)
29f7c58a 4465 video_id = mobj.group('id')
4466 playlist_id = mobj.group('playlist_id')
8bdd16b4 4467 return self.url_result(
29f7c58a 4468 update_url_query('https://www.youtube.com/watch', {
4469 'v': video_id,
4470 'list': playlist_id,
4471 'feature': 'youtu.be',
4472 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4473
4474
4475class YoutubeYtUserIE(InfoExtractor):
96565c7e 4476 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
8bdd16b4 4477 _VALID_URL = r'ytuser:(?P<id>.+)'
4478 _TESTS = [{
4479 'url': 'ytuser:phihag',
4480 'only_matching': True,
4481 }]
4482
4483 def _real_extract(self, url):
4484 user_id = self._match_id(url)
4485 return self.url_result(
c586f9e8 4486 'https://www.youtube.com/user/%s/videos' % user_id,
8bdd16b4 4487 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4488
b05654f0 4489
3d3dddc9 4490class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4491 IE_NAME = 'youtube:favorites'
96565c7e 4492 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
70d5c17b 4493 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4494 _LOGIN_REQUIRED = True
4495 _TESTS = [{
4496 'url': ':ytfav',
4497 'only_matching': True,
4498 }, {
4499 'url': ':ytfavorites',
4500 'only_matching': True,
4501 }]
4502
4503 def _real_extract(self, url):
4504 return self.url_result(
4505 'https://www.youtube.com/playlist?list=LL',
4506 ie=YoutubeTabIE.ie_key())
4507
4508
79360d99 4509class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
96565c7e 4510 IE_DESC = 'YouTube searches'
78caa52a 4511 IE_NAME = 'youtube:search'
b05654f0 4512 _SEARCH_KEY = 'ytsearch'
6c894ea1 4513 _SEARCH_PARAMS = None
9dd8e46a 4514 _TESTS = []
b05654f0 4515
cc16383f 4516 def _search_results(self, query):
a5c56234 4517 data = {'query': query}
6c894ea1
U
4518 if self._SEARCH_PARAMS:
4519 data['params'] = self._SEARCH_PARAMS
fe93e2c4 4520 continuation = {}
6c894ea1 4521 for page_num in itertools.count(1):
fe93e2c4 4522 data.update(continuation)
79360d99 4523 search = self._extract_response(
4524 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4525 check_get_keys=('contents', 'onResponseReceivedCommands')
4526 )
6c894ea1 4527 if not search:
b4c08069 4528 break
6c894ea1
U
4529 slr_contents = try_get(
4530 search,
4531 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4532 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4533 list)
4534 if not slr_contents:
a22b2fd1 4535 break
0366ae87 4536
0366ae87
M
4537 # Youtube sometimes adds promoted content to searches,
4538 # changing the index location of videos and token.
4539 # So we search through all entries till we find them.
fe93e2c4 4540 continuation = None
30a074c2 4541 for slr_content in slr_contents:
fe93e2c4 4542 if not continuation:
4543 continuation = self._extract_continuation({'contents': [slr_content]})
a96c6d15 4544
30a074c2 4545 isr_contents = try_get(
4546 slr_content,
4547 lambda x: x['itemSectionRenderer']['contents'],
4548 list)
9da76d30 4549 if not isr_contents:
30a074c2 4550 continue
4551 for content in isr_contents:
4552 if not isinstance(content, dict):
4553 continue
4554 video = content.get('videoRenderer')
4555 if not isinstance(video, dict):
4556 continue
4557 video_id = video.get('videoId')
4558 if not video_id:
4559 continue
4560
4561 yield self._extract_video(video)
0366ae87 4562
fe93e2c4 4563 if not continuation:
6c894ea1 4564 break
b05654f0 4565
c9ae7b95 4566
a3dd9248 4567class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4568 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4569 _SEARCH_KEY = 'ytsearchdate'
96565c7e 4570 IE_DESC = 'YouTube searches, newest videos first'
6c894ea1 4571 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4572
c9ae7b95 4573
386e1dd9 4574class YoutubeSearchURLIE(YoutubeSearchIE):
96565c7e 4575 IE_DESC = 'YouTube search URLs with sorting and filter support'
386e1dd9 4576 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
96565c7e 4577 _SEARCH_KEY = None
386e1dd9 4578 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4579 # _MAX_RESULTS = 100
3462ffa8 4580 _TESTS = [{
4581 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4582 'playlist_mincount': 5,
4583 'info_dict': {
11f9be09 4584 'id': 'youtube-dl test video',
3462ffa8 4585 'title': 'youtube-dl test video',
4586 }
4587 }, {
4588 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4589 'only_matching': True,
4590 }]
4591
386e1dd9 4592 @classmethod
4593 def _make_valid_url(cls):
4594 return cls._VALID_URL
4595
3462ffa8 4596 def _real_extract(self, url):
4dfbf869 4597 qs = parse_qs(url)
386e1dd9 4598 query = (qs.get('search_query') or qs.get('q'))[0]
4599 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4600 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4601
4602
4603class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4604 """
25f14e9f 4605 Base class for feed extractors
3d3dddc9 4606 Subclasses must define the _FEED_NAME property.
d7ae0639 4607 """
b2e8bc1b 4608 _LOGIN_REQUIRED = True
ef2f3c7f 4609 _TESTS = []
d7ae0639
JMF
4610
4611 @property
4612 def IE_NAME(self):
78caa52a 4613 return 'youtube:%s' % self._FEED_NAME
04cc9617 4614
3853309f 4615 def _real_extract(self, url):
3d3dddc9 4616 return self.url_result(
4617 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4618 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4619
4620
ef2f3c7f 4621class YoutubeWatchLaterIE(InfoExtractor):
4622 IE_NAME = 'youtube:watchlater'
96565c7e 4623 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
3d3dddc9 4624 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4625 _TESTS = [{
8bdd16b4 4626 'url': ':ytwatchlater',
bc7a9cd8
S
4627 'only_matching': True,
4628 }]
25f14e9f
S
4629
4630 def _real_extract(self, url):
ef2f3c7f 4631 return self.url_result(
4632 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4633
4634
25f14e9f 4635class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
96565c7e 4636 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
3d3dddc9 4637 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4638 _FEED_NAME = 'recommended'
45db527f 4639 _LOGIN_REQUIRED = False
3d3dddc9 4640 _TESTS = [{
4641 'url': ':ytrec',
4642 'only_matching': True,
4643 }, {
4644 'url': ':ytrecommended',
4645 'only_matching': True,
4646 }, {
4647 'url': 'https://youtube.com',
4648 'only_matching': True,
4649 }]
1ed5b5c9 4650
1ed5b5c9 4651
25f14e9f 4652class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
96565c7e 4653 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
3d3dddc9 4654 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4655 _FEED_NAME = 'subscriptions'
3d3dddc9 4656 _TESTS = [{
4657 'url': ':ytsubs',
4658 'only_matching': True,
4659 }, {
4660 'url': ':ytsubscriptions',
4661 'only_matching': True,
4662 }]
1ed5b5c9 4663
1ed5b5c9 4664
25f14e9f 4665class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
96565c7e 4666 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
a5c56234 4667 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4668 _FEED_NAME = 'history'
3d3dddc9 4669 _TESTS = [{
4670 'url': ':ythistory',
4671 'only_matching': True,
4672 }]
1ed5b5c9
JMF
4673
4674
15870e90
PH
4675class YoutubeTruncatedURLIE(InfoExtractor):
4676 IE_NAME = 'youtube:truncated_url'
4677 IE_DESC = False # Do not list
975d35db 4678 _VALID_URL = r'''(?x)
b95aab84
PH
4679 (?:https?://)?
4680 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4681 (?:watch\?(?:
c4808c60 4682 feature=[a-z_]+|
b95aab84
PH
4683 annotation_id=annotation_[^&]+|
4684 x-yt-cl=[0-9]+|
c1708b89 4685 hl=[^&]*|
287be8c6 4686 t=[0-9]+
b95aab84
PH
4687 )?
4688 |
4689 attribution_link\?a=[^&]+
4690 )
4691 $
975d35db 4692 '''
15870e90 4693
c4808c60 4694 _TESTS = [{
2d3d2997 4695 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4696 'only_matching': True,
dc2fc736 4697 }, {
2d3d2997 4698 'url': 'https://www.youtube.com/watch?',
dc2fc736 4699 'only_matching': True,
b95aab84
PH
4700 }, {
4701 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4702 'only_matching': True,
4703 }, {
4704 'url': 'https://www.youtube.com/watch?feature=foo',
4705 'only_matching': True,
c1708b89
PH
4706 }, {
4707 'url': 'https://www.youtube.com/watch?hl=en-GB',
4708 'only_matching': True,
287be8c6
PH
4709 }, {
4710 'url': 'https://www.youtube.com/watch?t=2372',
4711 'only_matching': True,
c4808c60
PH
4712 }]
4713
15870e90
PH
4714 def _real_extract(self, url):
4715 raise ExtractorError(
78caa52a
PH
4716 'Did you forget to quote the URL? Remember that & is a meta '
4717 'character in most shells, so you want to put the URL in quotes, '
3867038a 4718 'like youtube-dl '
2d3d2997 4719 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4720 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4721 expected=True)
772fd5cc
PH
4722
4723
3cd786db 4724class YoutubeClipIE(InfoExtractor):
4725 IE_NAME = 'youtube:clip'
4726 IE_DESC = False # Do not list
4727 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4728
4729 def _real_extract(self, url):
4730 self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4731 return self.url_result(url, 'Generic')
4732
4733
772fd5cc
PH
4734class YoutubeTruncatedIDIE(InfoExtractor):
4735 IE_NAME = 'youtube:truncated_id'
4736 IE_DESC = False # Do not list
b95aab84 4737 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4738
4739 _TESTS = [{
4740 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4741 'only_matching': True,
4742 }]
4743
4744 def _real_extract(self, url):
4745 video_id = self._match_id(url)
4746 raise ExtractorError(
4747 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4748 expected=True)