]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[youtube:comments] Add more options for limiting number of comments extracted (#1626)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
109dd3b2 6import copy
fe93e2c4 7import datetime
a5c56234 8import hashlib
0ca96d48 9import itertools
c5e8d7af 10import json
720c3099 11import math
c4417ddb 12import os.path
d77ab8e2 13import random
c5e8d7af 14import re
46383212 15import sys
8a784c74 16import time
e0df6211 17import traceback
c5e8d7af 18
b05654f0 19from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 20from ..compat import (
edf3e38e 21 compat_chr,
29f7c58a 22 compat_HTTPError,
c5e8d7af 23 compat_parse_qs,
545cc85d 24 compat_str,
7fd002c0 25 compat_urllib_parse_unquote_plus,
15707c7e 26 compat_urllib_parse_urlencode,
7c80519c 27 compat_urllib_parse_urlparse,
7c61bd36 28 compat_urlparse,
4bb4a188 29)
545cc85d 30from ..jsinterp import JSInterpreter
4bb4a188 31from ..utils import (
720c3099 32 bug_reports_message,
c5e8d7af 33 clean_html,
d92f5d5a 34 datetime_from_str,
11f9be09 35 dict_get,
358de58c 36 error_to_compat_str,
c5e8d7af 37 ExtractorError,
2d30521a 38 float_or_none,
11f9be09 39 format_field,
dd27fd17 40 int_or_none,
641ad5d8 41 is_html,
34921b43 42 join_nonempty,
94278f72 43 mimetype2ext,
9c0d7f49 44 network_exceptions,
a6213a49 45 NO_DEFAULT,
11f9be09 46 orderedSet,
6310acf5 47 parse_codecs,
49bd8c66 48 parse_count,
7c80519c 49 parse_duration,
7ea65411 50 parse_iso8601,
4dfbf869 51 parse_qs,
dca3ff4a 52 qualities,
c0ac49bc 53 remove_end,
3995d37d 54 remove_start,
cf7e015f 55 smuggle_url,
dbdaaa23 56 str_or_none,
c93d53f5 57 str_to_int,
7c365c21 58 traverse_obj,
556dbe7f 59 try_get,
c5e8d7af
PH
60 unescapeHTML,
61 unified_strdate,
cf7e015f 62 unsmuggle_url,
8bdd16b4 63 update_url_query,
21c340b8 64 url_or_none,
fe93e2c4 65 urljoin,
7c365c21 66 variadic,
c5e8d7af
PH
67)
68
5f6a1245 69
720c3099 70def get_first(obj, keys, **kwargs):
71 return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
72
73
000c15a4 74# any clients starting with _ cannot be explicity requested by the user
75INNERTUBE_CLIENTS = {
76 'web': {
77 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
78 'INNERTUBE_CONTEXT': {
79 'client': {
80 'clientName': 'WEB',
81 'clientVersion': '2.20210622.10.00',
82 }
83 },
84 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
85 },
86 'web_embedded': {
87 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
88 'INNERTUBE_CONTEXT': {
89 'client': {
90 'clientName': 'WEB_EMBEDDED_PLAYER',
91 'clientVersion': '1.20210620.0.1',
92 },
93 },
94 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
95 },
96 'web_music': {
97 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
98 'INNERTUBE_HOST': 'music.youtube.com',
99 'INNERTUBE_CONTEXT': {
100 'client': {
101 'clientName': 'WEB_REMIX',
102 'clientVersion': '1.20210621.00.00',
103 }
104 },
105 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
106 },
e7e94f2a
D
107 'web_creator': {
108 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
109 'INNERTUBE_CONTEXT': {
110 'client': {
111 'clientName': 'WEB_CREATOR',
112 'clientVersion': '1.20210621.00.00',
113 }
114 },
115 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
116 },
000c15a4 117 'android': {
118 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
119 'INNERTUBE_CONTEXT': {
120 'client': {
121 'clientName': 'ANDROID',
122 'clientVersion': '16.20',
123 }
124 },
125 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
b6de707d 126 'REQUIRE_JS_PLAYER': False
000c15a4 127 },
128 'android_embedded': {
129 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
130 'INNERTUBE_CONTEXT': {
131 'client': {
132 'clientName': 'ANDROID_EMBEDDED_PLAYER',
133 'clientVersion': '16.20',
134 },
135 },
b6de707d 136 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
137 'REQUIRE_JS_PLAYER': False
000c15a4 138 },
139 'android_music': {
140 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
141 'INNERTUBE_HOST': 'music.youtube.com',
142 'INNERTUBE_CONTEXT': {
143 'client': {
144 'clientName': 'ANDROID_MUSIC',
145 'clientVersion': '4.32',
146 }
147 },
148 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
b6de707d 149 'REQUIRE_JS_PLAYER': False
000c15a4 150 },
e7e94f2a
D
151 'android_creator': {
152 'INNERTUBE_CONTEXT': {
153 'client': {
154 'clientName': 'ANDROID_CREATOR',
155 'clientVersion': '21.24.100',
156 },
157 },
b6de707d 158 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
159 'REQUIRE_JS_PLAYER': False
e7e94f2a 160 },
3619f78d 161 # ios has HLS live streams
162 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
000c15a4 163 'ios': {
164 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
165 'INNERTUBE_CONTEXT': {
166 'client': {
167 'clientName': 'IOS',
168 'clientVersion': '16.20',
169 }
170 },
b6de707d 171 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
172 'REQUIRE_JS_PLAYER': False
000c15a4 173 },
174 'ios_embedded': {
175 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
176 'INNERTUBE_CONTEXT': {
177 'client': {
178 'clientName': 'IOS_MESSAGES_EXTENSION',
179 'clientVersion': '16.20',
180 },
181 },
b6de707d 182 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
183 'REQUIRE_JS_PLAYER': False
000c15a4 184 },
185 'ios_music': {
186 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
187 'INNERTUBE_HOST': 'music.youtube.com',
188 'INNERTUBE_CONTEXT': {
189 'client': {
190 'clientName': 'IOS_MUSIC',
191 'clientVersion': '4.32',
192 },
193 },
b6de707d 194 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
195 'REQUIRE_JS_PLAYER': False
000c15a4 196 },
e7e94f2a
D
197 'ios_creator': {
198 'INNERTUBE_CONTEXT': {
199 'client': {
200 'clientName': 'IOS_CREATOR',
201 'clientVersion': '21.24.100',
202 },
203 },
b6de707d 204 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
205 'REQUIRE_JS_PLAYER': False
e7e94f2a 206 },
3619f78d 207 # mweb has 'ultralow' formats
208 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 209 'mweb': {
210 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
211 'INNERTUBE_CONTEXT': {
212 'client': {
213 'clientName': 'MWEB',
214 'clientVersion': '2.20210721.07.00',
215 }
216 },
217 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
218 },
219}
220
221
222def build_innertube_clients():
65c2fde2 223 third_party = {
224 'embedUrl': 'https://google.com', # Can be any valid URL
225 }
000c15a4 226 base_clients = ('android', 'web', 'ios', 'mweb')
227 priority = qualities(base_clients[::-1])
228
229 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 230 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 231 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 232 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 233 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
234 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
235
236 if client in base_clients:
237 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
238 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
65c2fde2 239 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 240 agegate_ytcfg['priority'] -= 1
241 elif client.endswith('_embedded'):
65c2fde2 242 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 243 ytcfg['priority'] -= 2
244 else:
245 ytcfg['priority'] -= 3
246
247
248build_innertube_clients()
249
250
de7f3446 251class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 252 """Provide base functions for Youtube extractors"""
e00eb564 253
3462ffa8 254 _RESERVED_NAMES = (
3cd786db 255 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
3619f78d 256 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
257 r'browse|oembed|get_video_info|iframe_api|s/player|'
cd7c66cf 258 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 259
3619f78d 260 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
261
b2e8bc1b 262 _NETRC_MACHINE = 'youtube'
3619f78d 263
b2e8bc1b
JMF
264 # If True it will raise an error if no login info is provided
265 _LOGIN_REQUIRED = False
266
d9190e44
RH
267 _INVIDIOUS_SITES = (
268 # invidious-redirect websites
269 r'(?:www\.)?redirect\.invidious\.io',
270 r'(?:(?:www|dev)\.)?invidio\.us',
271 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
272 r'(?:www\.)?invidious\.pussthecat\.org',
273 r'(?:www\.)?invidious\.zee\.li',
274 r'(?:www\.)?invidious\.ethibox\.fr',
275 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
276 # youtube-dl invidious instances list
277 r'(?:(?:www|no)\.)?invidiou\.sh',
278 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
279 r'(?:www\.)?invidious\.kabi\.tk',
280 r'(?:www\.)?invidious\.mastodon\.host',
281 r'(?:www\.)?invidious\.zapashcanon\.fr',
282 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
283 r'(?:www\.)?invidious\.tinfoil-hat\.net',
284 r'(?:www\.)?invidious\.himiko\.cloud',
285 r'(?:www\.)?invidious\.reallyancient\.tech',
286 r'(?:www\.)?invidious\.tube',
287 r'(?:www\.)?invidiou\.site',
288 r'(?:www\.)?invidious\.site',
289 r'(?:www\.)?invidious\.xyz',
290 r'(?:www\.)?invidious\.nixnet\.xyz',
291 r'(?:www\.)?invidious\.048596\.xyz',
292 r'(?:www\.)?invidious\.drycat\.fr',
293 r'(?:www\.)?inv\.skyn3t\.in',
294 r'(?:www\.)?tube\.poal\.co',
295 r'(?:www\.)?tube\.connect\.cafe',
296 r'(?:www\.)?vid\.wxzm\.sx',
297 r'(?:www\.)?vid\.mint\.lgbt',
298 r'(?:www\.)?vid\.puffyan\.us',
299 r'(?:www\.)?yewtu\.be',
300 r'(?:www\.)?yt\.elukerio\.org',
301 r'(?:www\.)?yt\.lelux\.fi',
302 r'(?:www\.)?invidious\.ggc-project\.de',
303 r'(?:www\.)?yt\.maisputain\.ovh',
304 r'(?:www\.)?ytprivate\.com',
305 r'(?:www\.)?invidious\.13ad\.de',
306 r'(?:www\.)?invidious\.toot\.koeln',
307 r'(?:www\.)?invidious\.fdn\.fr',
308 r'(?:www\.)?watch\.nettohikari\.com',
309 r'(?:www\.)?invidious\.namazso\.eu',
310 r'(?:www\.)?invidious\.silkky\.cloud',
311 r'(?:www\.)?invidious\.exonip\.de',
312 r'(?:www\.)?invidious\.riverside\.rocks',
313 r'(?:www\.)?invidious\.blamefran\.net',
314 r'(?:www\.)?invidious\.moomoo\.de',
315 r'(?:www\.)?ytb\.trom\.tf',
316 r'(?:www\.)?yt\.cyberhost\.uk',
317 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
318 r'(?:www\.)?qklhadlycap4cnod\.onion',
319 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
320 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
321 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
322 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
323 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
324 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
325 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
326 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
327 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
328 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
329 )
330
b2e8bc1b 331 def _login(self):
83317f69 332 """
333 Attempt to log in to YouTube.
83317f69 334 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
335 """
9d5d4d64 336
982ee69a
MB
337 if (self._LOGIN_REQUIRED
338 and self.get_param('cookiefile') is None
339 and self.get_param('cookiesfrombrowser') is None):
9d5d4d64 340 self.raise_login_required(
341 'Login details are needed to download this content', method='cookies')
68217024 342 username, password = self._get_login_info()
9d5d4d64 343 if username:
24b0a72b 344 self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
b2e8bc1b 345
cce889b9 346 def _initialize_consent(self):
347 cookies = self._get_cookies('https://www.youtube.com/')
348 if cookies.get('__Secure-3PSID'):
349 return
350 consent_id = None
351 consent = cookies.get('CONSENT')
352 if consent:
353 if 'YES' in consent.value:
354 return
355 consent_id = self._search_regex(
356 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
357 if not consent_id:
358 consent_id = random.randint(100, 999)
359 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 360
b2e8bc1b 361 def _real_initialize(self):
cce889b9 362 self._initialize_consent()
24b0a72b 363 self._login()
c5e8d7af 364
a0566bbf 365 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 366 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
367 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 368
000c15a4 369 def _get_default_ytcfg(self, client='web'):
370 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 371
000c15a4 372 def _get_innertube_host(self, client='web'):
373 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 374
000c15a4 375 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 376 # try_get but with fallback to default ytcfg client values when present
377 _func = lambda y: try_get(y, getter, expected_type)
378 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
379
000c15a4 380 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 381 return self._ytcfg_get_safe(
382 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
383 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
109dd3b2 384
000c15a4 385 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 386 return self._ytcfg_get_safe(
387 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
388 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
109dd3b2 389
000c15a4 390 def _extract_api_key(self, ytcfg=None, default_client='web'):
109dd3b2 391 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
392
000c15a4 393 def _extract_context(self, ytcfg=None, default_client='web'):
109dd3b2 394 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
395 context = _get_context(ytcfg)
396 if context:
397 return context
398
399 context = _get_context(self._get_default_ytcfg(default_client))
400 if not ytcfg:
401 return context
402
403 # Recreate the client context (required)
404 context['client'].update({
405 'clientVersion': self._extract_client_version(ytcfg, default_client),
406 'clientName': self._extract_client_name(ytcfg, default_client),
407 })
408 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
409 if visitor_data:
410 context['client']['visitorData'] = visitor_data
411 return context
412
cf87314d 413 _SAPISID = None
414
109dd3b2 415 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 416 time_now = round(time.time())
cf87314d 417 if self._SAPISID is None:
418 yt_cookies = self._get_cookies('https://www.youtube.com')
419 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
420 # See: https://github.com/yt-dlp/yt-dlp/issues/393
421 sapisid_cookie = dict_get(
422 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
423 if sapisid_cookie and sapisid_cookie.value:
424 self._SAPISID = sapisid_cookie.value
425 self.write_debug('Extracted SAPISID cookie')
426 # SAPISID cookie is required if not already present
427 if not yt_cookies.get('SAPISID'):
428 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
429 self._set_cookie(
430 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
431 else:
432 self._SAPISID = False
433 if not self._SAPISID:
434 return None
1974e99f 435 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
436 sapisidhash = hashlib.sha1(
cf87314d 437 f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
1974e99f 438 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
439
440 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 441 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 442 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 443
109dd3b2 444 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 445 data.update(query)
11f9be09 446 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 447 real_headers.update({'content-type': 'application/json'})
448 if headers:
449 real_headers.update(headers)
545cc85d 450 return self._download_json(
109dd3b2 451 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 452 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 453 data=json.dumps(data).encode('utf8'), headers=real_headers,
454 query={'key': api_key or self._extract_api_key()})
455
ac56cf38 456 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
457 data = self._search_regex(
458 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
459 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
460 if data:
461 return self._parse_json(data, item_id, fatal=fatal)
0c148415 462
99e9e001 463 @staticmethod
464 def _extract_session_index(*data):
465 """
466 Index of current account in account list.
467 See: https://github.com/yt-dlp/yt-dlp/pull/519
468 """
469 for ytcfg in data:
470 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
471 if session_index is not None:
472 return session_index
473
474 # Deprecated?
475 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca
M
476 if ytcfg:
477 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
478 if token:
479 return token
99e9e001 480 if webpage:
481 return self._search_regex(
482 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
483 'identity token', default=None, fatal=False)
a1c5d2ca
M
484
485 @staticmethod
fe93e2c4 486 def _extract_account_syncid(*args):
8ea3f7b9 487 """
488 Extract syncId required to download private playlists of secondary channels
fe93e2c4 489 @params response and/or ytcfg
8ea3f7b9 490 """
fe93e2c4 491 for data in args:
492 # ytcfg includes channel_syncid if on secondary channel
493 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
494 if delegated_sid:
495 return delegated_sid
496 sync_ids = (try_get(
497 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
e6f21b3d 498 lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
fe93e2c4 499 if len(sync_ids) >= 2 and sync_ids[1]:
500 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
501 # and just "user_syncid||" for primary channel. We only want the channel_syncid
502 return sync_ids[0]
a1c5d2ca 503
ac56cf38 504 @staticmethod
505 def _extract_visitor_data(*args):
506 """
507 Extracts visitorData from an API response or ytcfg
508 Appears to be used to track session state
509 """
9222c381 510 return get_first(
511 args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
512 expected_type=str)
ac56cf38 513
99e9e001 514 @property
515 def is_authenticated(self):
516 return bool(self._generate_sapisidhash_header())
517
11f9be09 518 def extract_ytcfg(self, video_id, webpage):
8c54a305 519 if not webpage:
520 return {}
29f7c58a 521 return self._parse_json(
522 self._search_regex(
523 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 524 default='{}'), video_id, fatal=False) or {}
525
11f9be09 526 def generate_api_headers(
99e9e001 527 self, *, ytcfg=None, account_syncid=None, session_index=None,
528 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
529
11f9be09 530 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
f4f751af 531 headers = {
109dd3b2 532 'X-YouTube-Client-Name': compat_str(
11f9be09 533 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
534 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 535 'Origin': origin,
536 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
537 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
ac56cf38 538 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
99e9e001 539 }
540 if session_index is None:
314ee305 541 session_index = self._extract_session_index(ytcfg)
542 if account_syncid or session_index is not None:
543 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 544
109dd3b2 545 auth = self._generate_sapisidhash_header(origin)
f4f751af 546 if auth is not None:
547 headers['Authorization'] = auth
109dd3b2 548 headers['X-Origin'] = origin
99e9e001 549 return {h: v for h, v in headers.items() if v is not None}
29f7c58a 550
2d6659b9 551 @staticmethod
552 def _build_api_continuation_query(continuation, ctp=None):
553 query = {
554 'continuation': continuation
555 }
556 # TODO: Inconsistency with clickTrackingParams.
557 # Currently we have a fixed ctp contained within context (from ytcfg)
558 # and a ctp in root query for continuation.
559 if ctp:
560 query['clickTracking'] = {'clickTrackingParams': ctp}
561 return query
562
2d6659b9 563 @classmethod
564 def _extract_next_continuation_data(cls, renderer):
565 next_continuation = try_get(
566 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
567 lambda x: x['continuation']['reloadContinuationData']), dict)
568 if not next_continuation:
569 return
570 continuation = next_continuation.get('continuation')
571 if not continuation:
572 return
573 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 574 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 575
576 @classmethod
577 def _extract_continuation_ep_data(cls, continuation_ep: dict):
578 if isinstance(continuation_ep, dict):
579 continuation = try_get(
580 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
581 if not continuation:
582 return
583 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 584 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 585
586 @classmethod
587 def _extract_continuation(cls, renderer):
588 next_continuation = cls._extract_next_continuation_data(renderer)
589 if next_continuation:
590 return next_continuation
fe93e2c4 591
2d6659b9 592 contents = []
593 for key in ('contents', 'items'):
594 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 595
2d6659b9 596 for content in contents:
597 if not isinstance(content, dict):
598 continue
599 continuation_ep = try_get(
600 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
601 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
602 dict)
603 continuation = cls._extract_continuation_ep_data(continuation_ep)
604 if continuation:
605 return continuation
606
fe93e2c4 607 @classmethod
608 def _extract_alerts(cls, data):
109dd3b2 609 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
610 if not isinstance(alert_dict, dict):
611 continue
612 for alert in alert_dict.values():
613 alert_type = alert.get('type')
614 if not alert_type:
615 continue
052e1350 616 message = cls._get_text(alert, 'text')
109dd3b2 617 if message:
618 yield alert_type, message
619
c0ac49bc 620 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
109dd3b2 621 errors = []
622 warnings = []
623 for alert_type, alert_message in alerts:
641ad5d8 624 if alert_type.lower() == 'error' and fatal:
109dd3b2 625 errors.append([alert_type, alert_message])
626 else:
627 warnings.append([alert_type, alert_message])
628
629 for alert_type, alert_message in (warnings + errors[:-1]):
c0ac49bc 630 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
109dd3b2 631 if errors:
632 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
633
634 def _extract_and_report_alerts(self, data, *args, **kwargs):
635 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
636
47193e02 637 def _extract_badges(self, renderer: dict):
638 badges = set()
639 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
640 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
641 if label:
642 badges.add(label.lower())
643 return badges
644
645 @staticmethod
052e1350 646 def _get_text(data, *path_list, max_runs=None):
647 for path in path_list or [None]:
648 if path is None:
649 obj = [data]
650 else:
651 obj = traverse_obj(data, path, default=[])
652 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
653 obj = [obj]
654 for item in obj:
655 text = try_get(item, lambda x: x['simpleText'], compat_str)
656 if text:
657 return text
658 runs = try_get(item, lambda x: x['runs'], list) or []
659 if not runs and isinstance(item, list):
660 runs = item
661
662 runs = runs[:min(len(runs), max_runs or len(runs))]
663 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
664 if text:
665 return text
47193e02 666
109dd3b2 667 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
668 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 669 default_client='web'):
109dd3b2 670 response = None
671 last_error = None
672 count = -1
673 retries = self.get_param('extractor_retries', 3)
674 if check_get_keys is None:
675 check_get_keys = []
676 while count < retries:
677 count += 1
678 if last_error:
c0ac49bc 679 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
109dd3b2 680 try:
681 response = self._call_api(
682 ep=ep, fatal=True, headers=headers,
683 video_id=item_id, query=query,
684 context=self._extract_context(ytcfg, default_client),
685 api_key=self._extract_api_key(ytcfg, default_client),
686 api_hostname=api_hostname, default_client=default_client,
687 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
688 except ExtractorError as e:
9c0d7f49 689 if isinstance(e.cause, network_exceptions):
641ad5d8 690 if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
691 e.cause.seek(0)
692 yt_error = try_get(
693 self._parse_json(e.cause.read().decode(), item_id, fatal=False),
694 lambda x: x['error']['message'], compat_str)
695 if yt_error:
696 self._report_alerts([('ERROR', yt_error)], fatal=False)
109dd3b2 697 # Downloading page may result in intermittent 5xx HTTP error
698 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
9c0d7f49 699 # We also want to catch all other network exceptions since errors in later pages can be troublesome
700 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
701 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
526d74ec 702 last_error = error_to_compat_str(e.cause or e.msg)
9c0d7f49 703 if count < retries:
704 continue
109dd3b2 705 if fatal:
706 raise
707 else:
708 self.report_warning(error_to_compat_str(e))
709 return
710
711 else:
109dd3b2 712 try:
ac56cf38 713 self._extract_and_report_alerts(response, only_once=True)
109dd3b2 714 except ExtractorError as e:
c0ac49bc 715 # YouTube servers may return errors we want to retry on in a 200 OK response
716 # See: https://github.com/yt-dlp/yt-dlp/issues/839
717 if 'unknown error' in e.msg.lower():
718 last_error = e.msg
719 continue
109dd3b2 720 if fatal:
721 raise
722 self.report_warning(error_to_compat_str(e))
723 return
724 if not check_get_keys or dict_get(response, check_get_keys):
725 break
726 # Youtube sometimes sends incomplete data
727 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
728 last_error = 'Incomplete data received'
729 if count >= retries:
730 if fatal:
731 raise ExtractorError(last_error)
732 else:
733 self.report_warning(last_error)
734 return
735 return response
736
9297939e 737 @staticmethod
738 def is_music_url(url):
739 return re.match(r'https?://music\.youtube\.com/', url) is not None
740
30a074c2 741 def _extract_video(self, renderer):
742 video_id = renderer.get('videoId')
052e1350 743 title = self._get_text(renderer, 'title')
744 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 745 duration = parse_duration(self._get_text(
746 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
052e1350 747 view_count_text = self._get_text(renderer, 'viewCountText') or ''
30a074c2 748 view_count = str_to_int(self._search_regex(
749 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
750 'view count', default=None))
fe93e2c4 751
052e1350 752 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
fe93e2c4 753
30a074c2 754 return {
39ed931e 755 '_type': 'url',
30a074c2 756 'ie_key': YoutubeIE.ie_key(),
757 'id': video_id,
5e3f2f8f 758 'url': f'https://www.youtube.com/watch?v={video_id}',
30a074c2 759 'title': title,
760 'description': description,
761 'duration': duration,
762 'view_count': view_count,
763 'uploader': uploader,
764 }
765
0c148415 766
360e1ca5 767class YoutubeIE(YoutubeBaseInfoExtractor):
96565c7e 768 IE_DESC = 'YouTube'
cb7dfeea 769 _VALID_URL = r"""(?x)^
c5e8d7af 770 (
edb53e2d 771 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 772 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
773 (?:www\.)?deturl\.com/www\.youtube\.com|
774 (?:www\.)?pwnyoutube\.com|
775 (?:www\.)?hooktube\.com|
776 (?:www\.)?yourepeat\.com|
777 tube\.majestyc\.net|
778 %(invidious)s|
779 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
780 (?:.*?\#/)? # handle anchor (#/) redirect urls
781 (?: # the various things that can precede the ID:
8fc54b12 782 (?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/
c5e8d7af 783 |(?: # or the v= param in all its forms
f7000f3a 784 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 785 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 786 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
787 v=
788 )
f4b05232 789 ))
cbaed4bb
S
790 |(?:
791 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
792 vid\.plus| # or vid.plus/xxxx
793 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 794 %(invidious)s
cbaed4bb 795 )/
edb53e2d 796 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 797 )
c5e8d7af 798 )? # all until now is optional -> you can pass the naked ID
201c1459 799 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 800 (?(1).+)? # if we found the ID, everything can follow
9297939e 801 (?:\#|$)""" % {
d9190e44 802 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
bc2ca1bb 803 }
e40c758c 804 _PLAYER_INFO_RE = (
cc2db878 805 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
806 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 807 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 808 )
2c62dc26 809 _formats = {
c2d3cb4c 810 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
811 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
812 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
813 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
814 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
815 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
816 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
817 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 818 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 819 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
820 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
821 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
822 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
823 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
824 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 825 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 826 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
827 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 828
829
830 # 3D videos
c2d3cb4c 831 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
832 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
833 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
834 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 835 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
836 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
837 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 838
96fb5605 839 # Apple HTTP Live Streaming
11f12195 840 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 841 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
842 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
843 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
844 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
845 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 846 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
847 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
848
849 # DASH mp4 video
d23028a8
S
850 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
851 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
852 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
853 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
854 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 855 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
856 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
857 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
858 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
859 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
860 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
861 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 862
f6f1fc92 863 # Dash mp4 audio
d23028a8
S
864 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
865 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
866 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
867 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
868 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
869 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
870 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
871
872 # Dash webm
d23028a8
S
873 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
874 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
875 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
876 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
877 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
878 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
879 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
880 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
881 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
882 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
883 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
884 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
885 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
886 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
887 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 888 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
889 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
890 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
891 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
892 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
893 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
894 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
895
896 # Dash webm audio
d23028a8
S
897 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
898 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 899
0857baad 900 # Dash webm audio with opus inside
d23028a8
S
901 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
902 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
903 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 904
ce6b9a2d
PH
905 # RTMP (unnamed)
906 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
907
908 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
909 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
910 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
911 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
912 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
913 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
914 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
915 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
916 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 917 }
29f7c58a 918 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 919
fd5c4aab
S
920 _GEO_BYPASS = False
921
78caa52a 922 IE_NAME = 'youtube'
2eb88d95
PH
923 _TESTS = [
924 {
2d3d2997 925 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
926 'info_dict': {
927 'id': 'BaW_jenozKc',
928 'ext': 'mp4',
3867038a 929 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
930 'uploader': 'Philipp Hagemeister',
931 'uploader_id': 'phihag',
ec85ded8 932 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
ff9f925b 933 'channel': 'Philipp Hagemeister',
dd4c4492
S
934 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
935 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 936 'upload_date': '20121002',
ff9f925b 937 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
4bc3a23e 938 'categories': ['Science & Technology'],
3867038a 939 'tags': ['youtube-dl'],
556dbe7f 940 'duration': 10,
dbdaaa23 941 'view_count': int,
3e7c1224 942 'like_count': int,
ff9f925b 943 # 'dislike_count': int,
944 'availability': 'public',
945 'playable_in_embed': True,
946 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
947 'live_status': 'not_live',
948 'age_limit': 0,
7c80519c 949 'start_time': 1,
297a564b 950 'end_time': 9,
2eb88d95 951 }
0e853ca4 952 },
fccd3771 953 {
4bc3a23e
PH
954 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
955 'note': 'Embed-only video (#1746)',
956 'info_dict': {
957 'id': 'yZIXLfi8CZQ',
958 'ext': 'mp4',
959 'upload_date': '20120608',
960 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
961 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
962 'uploader': 'SET India',
94bfcd23 963 'uploader_id': 'setindia',
ec85ded8 964 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 965 'age_limit': 18,
545cc85d 966 },
967 'skip': 'Private video',
fccd3771 968 },
11b56058 969 {
8bdd16b4 970 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
971 'note': 'Use the first video ID in the URL',
972 'info_dict': {
973 'id': 'BaW_jenozKc',
974 'ext': 'mp4',
3867038a 975 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
976 'uploader': 'Philipp Hagemeister',
977 'uploader_id': 'phihag',
ec85ded8 978 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 979 'upload_date': '20121002',
3867038a 980 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 981 'categories': ['Science & Technology'],
3867038a 982 'tags': ['youtube-dl'],
556dbe7f 983 'duration': 10,
dbdaaa23 984 'view_count': int,
11b56058
PM
985 'like_count': int,
986 'dislike_count': int,
34a7de29
S
987 },
988 'params': {
989 'skip_download': True,
990 },
11b56058 991 },
dd27fd17 992 {
2d3d2997 993 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
994 'note': '256k DASH audio (format 141) via DASH manifest',
995 'info_dict': {
996 'id': 'a9LDPn-MO4I',
997 'ext': 'm4a',
998 'upload_date': '20121002',
999 'uploader_id': '8KVIDEO',
ec85ded8 1000 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1001 'description': '',
1002 'uploader': '8KVIDEO',
1003 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1004 },
4bc3a23e
PH
1005 'params': {
1006 'youtube_include_dash_manifest': True,
1007 'format': '141',
4919603f 1008 },
de3c7fe0 1009 'skip': 'format 141 not served anymore',
dd27fd17 1010 },
8bdd16b4 1011 # DASH manifest with encrypted signature
1012 {
1013 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1014 'info_dict': {
1015 'id': 'IB3lcPjvWLA',
1016 'ext': 'm4a',
1017 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1018 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1019 'duration': 244,
1020 'uploader': 'AfrojackVEVO',
1021 'uploader_id': 'AfrojackVEVO',
1022 'upload_date': '20131011',
cc2db878 1023 'abr': 129.495,
8bdd16b4 1024 },
1025 'params': {
1026 'youtube_include_dash_manifest': True,
1027 'format': '141/bestaudio[ext=m4a]',
1028 },
1029 },
65c2fde2 1030 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1031 {
65c2fde2 1032 'note': 'Embed allowed age-gate video',
2d3d2997 1033 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1034 'info_dict': {
1035 'id': 'HtVdAasjOgU',
1036 'ext': 'mp4',
1037 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1038 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1039 'duration': 142,
c522adb1
JMF
1040 'uploader': 'The Witcher',
1041 'uploader_id': 'WitcherGame',
ec85ded8 1042 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1043 'upload_date': '20140605',
34952f09 1044 'age_limit': 18,
c522adb1
JMF
1045 },
1046 },
65c2fde2 1047 {
1048 'note': 'Age-gate video with embed allowed in public site',
1049 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1050 'info_dict': {
1051 'id': 'HsUATh_Nc2U',
1052 'ext': 'mp4',
1053 'title': 'Godzilla 2 (Official Video)',
1054 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1055 'upload_date': '20200408',
1056 'uploader_id': 'FlyingKitty900',
1057 'uploader': 'FlyingKitty',
1058 'age_limit': 18,
1059 },
1060 },
1061 {
1062 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1063 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1064 'info_dict': {
1065 'id': 'Tq92D6wQ1mg',
1066 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1067 'ext': 'mp4',
1068 'upload_date': '20191227',
65c2fde2 1069 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1070 'uploader': 'Projekt Melody',
1071 'description': 'md5:17eccca93a786d51bc67646756894066',
1072 'age_limit': 18,
1073 },
1074 },
1075 {
1076 'note': 'Non-Agegated non-embeddable video',
1077 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1078 'info_dict': {
1079 'id': 'MeJVWBSsPAY',
1080 'ext': 'mp4',
1081 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1082 'uploader': 'Herr Lurik',
1083 'uploader_id': 'st3in234',
1084 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1085 'upload_date': '20130730',
1086 },
1087 },
1088 {
1089 'note': 'Non-bypassable age-gated video',
1090 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1091 'only_matching': True,
1092 },
8bdd16b4 1093 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1094 # YouTube Red ad is not captured for creator
1095 {
1096 'url': '__2ABJjxzNo',
1097 'info_dict': {
1098 'id': '__2ABJjxzNo',
1099 'ext': 'mp4',
1100 'duration': 266,
1101 'upload_date': '20100430',
1102 'uploader_id': 'deadmau5',
1103 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1104 'creator': 'deadmau5',
1105 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1106 'uploader': 'deadmau5',
1107 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1108 'alt_title': 'Some Chords',
8bdd16b4 1109 },
1110 'expected_warnings': [
1111 'DASH manifest missing',
1112 ]
1113 },
067aa17e 1114 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1115 {
1116 'url': 'lqQg6PlCWgI',
1117 'info_dict': {
1118 'id': 'lqQg6PlCWgI',
1119 'ext': 'mp4',
556dbe7f 1120 'duration': 6085,
90227264 1121 'upload_date': '20150827',
cbe2bd91 1122 'uploader_id': 'olympic',
ec85ded8 1123 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1124 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
11f9be09 1125 'uploader': 'Olympics',
cbe2bd91
PH
1126 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1127 },
1128 'params': {
1129 'skip_download': 'requires avconv',
e52a40ab 1130 }
cbe2bd91 1131 },
6271f1ca
PH
1132 # Non-square pixels
1133 {
1134 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1135 'info_dict': {
1136 'id': '_b-2C3KPAM0',
1137 'ext': 'mp4',
1138 'stretched_ratio': 16 / 9.,
556dbe7f 1139 'duration': 85,
6271f1ca
PH
1140 'upload_date': '20110310',
1141 'uploader_id': 'AllenMeow',
ec85ded8 1142 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1143 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1144 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1145 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1146 },
06b491eb
S
1147 },
1148 # url_encoded_fmt_stream_map is empty string
1149 {
1150 'url': 'qEJwOuvDf7I',
1151 'info_dict': {
1152 'id': 'qEJwOuvDf7I',
f57b7835 1153 'ext': 'webm',
06b491eb
S
1154 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1155 'description': '',
1156 'upload_date': '20150404',
1157 'uploader_id': 'spbelect',
1158 'uploader': 'Наблюдатели Петербурга',
1159 },
1160 'params': {
1161 'skip_download': 'requires avconv',
e323cf3f
S
1162 },
1163 'skip': 'This live event has ended.',
06b491eb 1164 },
067aa17e 1165 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1166 {
1167 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1168 'info_dict': {
1169 'id': 'FIl7x6_3R5Y',
eb6793ba 1170 'ext': 'webm',
da77d856
S
1171 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1172 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1173 'duration': 220,
da77d856
S
1174 'upload_date': '20150625',
1175 'uploader_id': 'dorappi2000',
ec85ded8 1176 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1177 'uploader': 'dorappi2000',
eb6793ba 1178 'formats': 'mincount:31',
da77d856 1179 },
eb6793ba 1180 'skip': 'not actual anymore',
2ee8f5d8 1181 },
8a1a26ce
YCH
1182 # DASH manifest with segment_list
1183 {
1184 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1185 'md5': '8ce563a1d667b599d21064e982ab9e31',
1186 'info_dict': {
1187 'id': 'CsmdDsKjzN8',
1188 'ext': 'mp4',
17ee98e1 1189 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1190 'uploader': 'Airtek',
1191 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1192 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1193 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1194 },
1195 'params': {
1196 'youtube_include_dash_manifest': True,
1197 'format': '135', # bestvideo
be49068d
S
1198 },
1199 'skip': 'This live event has ended.',
2ee8f5d8 1200 },
cf7e015f
S
1201 {
1202 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1203 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1204 'info_dict': {
545cc85d 1205 'id': 'jvGDaLqkpTg',
1206 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1207 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1208 },
1209 'playlist': [{
1210 'info_dict': {
545cc85d 1211 'id': 'jvGDaLqkpTg',
cf7e015f 1212 'ext': 'mp4',
545cc85d 1213 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1214 'description': 'md5:e03b909557865076822aa169218d6a5d',
1215 'duration': 10643,
1216 'upload_date': '20161111',
1217 'uploader': 'Team PGP',
1218 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1219 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1220 },
1221 }, {
1222 'info_dict': {
545cc85d 1223 'id': '3AKt1R1aDnw',
cf7e015f 1224 'ext': 'mp4',
545cc85d 1225 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1226 'description': 'md5:e03b909557865076822aa169218d6a5d',
1227 'duration': 10991,
1228 'upload_date': '20161111',
1229 'uploader': 'Team PGP',
1230 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1231 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1232 },
1233 }, {
1234 'info_dict': {
545cc85d 1235 'id': 'RtAMM00gpVc',
cf7e015f 1236 'ext': 'mp4',
545cc85d 1237 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1238 'description': 'md5:e03b909557865076822aa169218d6a5d',
1239 'duration': 10995,
1240 'upload_date': '20161111',
1241 'uploader': 'Team PGP',
1242 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1243 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1244 },
1245 }, {
1246 'info_dict': {
545cc85d 1247 'id': '6N2fdlP3C5U',
cf7e015f 1248 'ext': 'mp4',
545cc85d 1249 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1250 'description': 'md5:e03b909557865076822aa169218d6a5d',
1251 'duration': 10990,
1252 'upload_date': '20161111',
1253 'uploader': 'Team PGP',
1254 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1255 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1256 },
1257 }],
1258 'params': {
1259 'skip_download': True,
1260 },
65c2fde2 1261 'skip': 'Not multifeed anymore',
cbaed4bb 1262 },
f9f49d87 1263 {
067aa17e 1264 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1265 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1266 'info_dict': {
1267 'id': 'gVfLd0zydlo',
1268 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1269 },
1270 'playlist_count': 2,
be49068d 1271 'skip': 'Not multifeed anymore',
f9f49d87 1272 },
cbaed4bb 1273 {
2d3d2997 1274 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1275 'only_matching': True,
0e49d9a6 1276 },
6d4fc66b 1277 {
2d3d2997 1278 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1279 'only_matching': True,
1280 },
0e49d9a6 1281 {
067aa17e 1282 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1283 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1284 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1285 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1286 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1287 'info_dict': {
1288 'id': 'lsguqyKfVQg',
1289 'ext': 'mp4',
1290 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1291 'alt_title': 'Dark Walk',
0e49d9a6 1292 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1293 'duration': 133,
0e49d9a6
LL
1294 'upload_date': '20151119',
1295 'uploader_id': 'IronSoulElf',
ec85ded8 1296 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1297 'uploader': 'IronSoulElf',
11f9be09 1298 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1299 'track': 'Dark Walk',
1300 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1301 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1302 },
1303 'params': {
1304 'skip_download': True,
1305 },
1306 },
61f92af1 1307 {
067aa17e 1308 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1309 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1310 'only_matching': True,
1311 },
313dfc45
LL
1312 {
1313 # Video with yt:stretch=17:0
1314 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1315 'info_dict': {
1316 'id': 'Q39EVAstoRM',
1317 'ext': 'mp4',
1318 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1319 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1320 'upload_date': '20151107',
1321 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1322 'uploader': 'CH GAMER DROID',
1323 },
1324 'params': {
1325 'skip_download': True,
1326 },
be49068d 1327 'skip': 'This video does not exist.',
313dfc45 1328 },
201c1459 1329 {
1330 # Video with incomplete 'yt:stretch=16:'
1331 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1332 'only_matching': True,
1333 },
7caf9830
S
1334 {
1335 # Video licensed under Creative Commons
1336 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1337 'info_dict': {
1338 'id': 'M4gD1WSo5mA',
1339 'ext': 'mp4',
1340 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1341 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1342 'duration': 721,
7caf9830
S
1343 'upload_date': '20150127',
1344 'uploader_id': 'BerkmanCenter',
ec85ded8 1345 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1346 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1347 'license': 'Creative Commons Attribution license (reuse allowed)',
1348 },
1349 'params': {
1350 'skip_download': True,
1351 },
1352 },
fd050249
S
1353 {
1354 # Channel-like uploader_url
1355 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1356 'info_dict': {
1357 'id': 'eQcmzGIKrzg',
1358 'ext': 'mp4',
1359 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1360 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1361 'duration': 4060,
fd050249 1362 'upload_date': '20151119',
eb6793ba 1363 'uploader': 'Bernie Sanders',
fd050249 1364 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1365 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1366 'license': 'Creative Commons Attribution license (reuse allowed)',
1367 },
1368 'params': {
1369 'skip_download': True,
1370 },
1371 },
040ac686
S
1372 {
1373 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1374 'only_matching': True,
7f29cf54
S
1375 },
1376 {
067aa17e 1377 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1378 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1379 'only_matching': True,
6496ccb4
S
1380 },
1381 {
1382 # Rental video preview
1383 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1384 'info_dict': {
1385 'id': 'uGpuVWrhIzE',
1386 'ext': 'mp4',
1387 'title': 'Piku - Trailer',
1388 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1389 'upload_date': '20150811',
1390 'uploader': 'FlixMatrix',
1391 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1392 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1393 'license': 'Standard YouTube License',
1394 },
1395 'params': {
1396 'skip_download': True,
1397 },
eb6793ba 1398 'skip': 'This video is not available.',
022a5d66 1399 },
12afdc2a
S
1400 {
1401 # YouTube Red video with episode data
1402 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1403 'info_dict': {
1404 'id': 'iqKdEhx-dD4',
1405 'ext': 'mp4',
1406 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1407 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1408 'duration': 2085,
12afdc2a
S
1409 'upload_date': '20170118',
1410 'uploader': 'Vsauce',
1411 'uploader_id': 'Vsauce',
1412 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1413 'series': 'Mind Field',
1414 'season_number': 1,
1415 'episode_number': 1,
1416 },
1417 'params': {
1418 'skip_download': True,
1419 },
1420 'expected_warnings': [
1421 'Skipping DASH manifest',
1422 ],
1423 },
c7121fa7
S
1424 {
1425 # The following content has been identified by the YouTube community
1426 # as inappropriate or offensive to some audiences.
1427 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1428 'info_dict': {
1429 'id': '6SJNVb0GnPI',
1430 'ext': 'mp4',
1431 'title': 'Race Differences in Intelligence',
1432 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1433 'duration': 965,
1434 'upload_date': '20140124',
1435 'uploader': 'New Century Foundation',
1436 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1437 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1438 },
1439 'params': {
1440 'skip_download': True,
1441 },
545cc85d 1442 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1443 },
022a5d66
S
1444 {
1445 # itag 212
1446 'url': '1t24XAntNCY',
1447 'only_matching': True,
fd5c4aab
S
1448 },
1449 {
1450 # geo restricted to JP
1451 'url': 'sJL6WA-aGkQ',
1452 'only_matching': True,
1453 },
cd5a74a2
S
1454 {
1455 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1456 'only_matching': True,
1457 },
bc2ca1bb 1458 {
1459 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1460 'only_matching': True,
1461 },
1462 {
1463 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1464 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1465 'only_matching': True,
1466 },
825cd268
RA
1467 {
1468 # DRM protected
1469 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1470 'only_matching': True,
4fe54c12
S
1471 },
1472 {
1473 # Video with unsupported adaptive stream type formats
1474 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1475 'info_dict': {
1476 'id': 'Z4Vy8R84T1U',
1477 'ext': 'mp4',
1478 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1479 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1480 'duration': 433,
1481 'upload_date': '20130923',
1482 'uploader': 'Amelia Putri Harwita',
1483 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1484 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1485 'formats': 'maxcount:10',
1486 },
1487 'params': {
1488 'skip_download': True,
1489 'youtube_include_dash_manifest': False,
1490 },
5429d6a9 1491 'skip': 'not actual anymore',
5caabd3c 1492 },
1493 {
822b9d9c 1494 # Youtube Music Auto-generated description
5caabd3c 1495 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1496 'info_dict': {
1497 'id': 'MgNrAu2pzNs',
1498 'ext': 'mp4',
1499 'title': 'Voyeur Girl',
1500 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1501 'upload_date': '20190312',
5429d6a9
S
1502 'uploader': 'Stephen - Topic',
1503 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1504 'artist': 'Stephen',
1505 'track': 'Voyeur Girl',
1506 'album': 'it\'s too much love to know my dear',
1507 'release_date': '20190313',
1508 'release_year': 2019,
1509 },
1510 'params': {
1511 'skip_download': True,
1512 },
1513 },
66b48727
RA
1514 {
1515 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1516 'only_matching': True,
1517 },
011e75e6
S
1518 {
1519 # invalid -> valid video id redirection
1520 'url': 'DJztXj2GPfl',
1521 'info_dict': {
1522 'id': 'DJztXj2GPfk',
1523 'ext': 'mp4',
1524 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1525 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1526 'upload_date': '20090125',
1527 'uploader': 'Prochorowka',
1528 'uploader_id': 'Prochorowka',
1529 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1530 'artist': 'Panjabi MC',
1531 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1532 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1533 },
1534 'params': {
1535 'skip_download': True,
1536 },
545cc85d 1537 'skip': 'Video unavailable',
ea74e00b
DP
1538 },
1539 {
1540 # empty description results in an empty string
1541 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1542 'info_dict': {
1543 'id': 'x41yOUIvK2k',
1544 'ext': 'mp4',
1545 'title': 'IMG 3456',
1546 'description': '',
1547 'upload_date': '20170613',
1548 'uploader_id': 'ElevageOrVert',
1549 'uploader': 'ElevageOrVert',
1550 },
1551 'params': {
1552 'skip_download': True,
1553 },
1554 },
a0566bbf 1555 {
29f7c58a 1556 # with '};' inside yt initial data (see [1])
1557 # see [2] for an example with '};' inside ytInitialPlayerResponse
1558 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1559 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1560 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1561 'info_dict': {
1562 'id': 'CHqg6qOn4no',
1563 'ext': 'mp4',
1564 'title': 'Part 77 Sort a list of simple types in c#',
1565 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1566 'upload_date': '20130831',
1567 'uploader_id': 'kudvenkat',
1568 'uploader': 'kudvenkat',
1569 },
1570 'params': {
1571 'skip_download': True,
1572 },
1573 },
29f7c58a 1574 {
1575 # another example of '};' in ytInitialData
1576 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1577 'only_matching': True,
1578 },
1579 {
1580 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1581 'only_matching': True,
1582 },
545cc85d 1583 {
cc2db878 1584 # https://github.com/ytdl-org/youtube-dl/pull/28094
1585 'url': 'OtqTfy26tG0',
1586 'info_dict': {
1587 'id': 'OtqTfy26tG0',
1588 'ext': 'mp4',
1589 'title': 'Burn Out',
1590 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1591 'upload_date': '20141120',
1592 'uploader': 'The Cinematic Orchestra - Topic',
1593 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1594 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1595 'artist': 'The Cinematic Orchestra',
1596 'track': 'Burn Out',
1597 'album': 'Every Day',
1598 'release_data': None,
1599 'release_year': None,
1600 },
1601 'params': {
1602 'skip_download': True,
1603 },
545cc85d 1604 },
bc2ca1bb 1605 {
1606 # controversial video, only works with bpctr when authenticated with cookies
1607 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1608 'only_matching': True,
1609 },
a1a7907b 1610 {
1611 # controversial video, requires bpctr/contentCheckOk
1612 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1613 'info_dict': {
1614 'id': 'SZJvDhaSDnc',
1615 'ext': 'mp4',
1616 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1617 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1618 'uploader': 'CBS This Morning',
11f9be09 1619 'uploader_id': 'CBSThisMorning',
a1a7907b 1620 'upload_date': '20140716',
1621 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1622 }
1623 },
f7ad7160 1624 {
1625 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1626 'url': 'cBvYw8_A0vQ',
1627 'info_dict': {
1628 'id': 'cBvYw8_A0vQ',
1629 'ext': 'mp4',
1630 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1631 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1632 'upload_date': '20201120',
1633 'uploader': 'Walk around Japan',
1634 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1635 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1636 },
1637 'params': {
1638 'skip_download': True,
1639 },
0fb983f6 1640 }, {
1641 # Has multiple audio streams
1642 'url': 'WaOKSUlf4TM',
1643 'only_matching': True
9297939e 1644 }, {
1645 # Requires Premium: has format 141 when requested using YTM url
1646 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1647 'only_matching': True
1648 }, {
120916da 1649 # multiple subtitles with same lang_code
1650 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1651 'only_matching': True,
109dd3b2 1652 }, {
1653 # Force use android client fallback
1654 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1655 'info_dict': {
1656 'id': 'YOelRv7fMxY',
11f9be09 1657 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 1658 'ext': '3gp',
1659 'upload_date': '20210624',
1660 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1661 'uploader': 'colinfurze',
11f9be09 1662 'uploader_id': 'colinfurze',
109dd3b2 1663 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
11f9be09 1664 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
109dd3b2 1665 },
1666 'params': {
1667 'format': '17', # 3gp format available on android
1668 'extractor_args': {'youtube': {'player_client': ['android']}},
1669 },
120916da 1670 },
109dd3b2 1671 {
1672 # Skip download of additional client configs (remix client config in this case)
1673 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1674 'only_matching': True,
1675 'params': {
1676 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1677 },
8fc54b12 1678 }, {
1679 # shorts
1680 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1681 'only_matching': True,
9222c381 1682 }, {
1683 'note': 'Storyboards',
1684 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
1685 'info_dict': {
1686 'id': '5KLPxDtMqe8',
1687 'ext': 'mhtml',
1688 'format_id': 'sb0',
1689 'title': 'Your Brain is Plastic',
1690 'uploader_id': 'scishow',
1691 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
1692 'upload_date': '20140324',
1693 'uploader': 'SciShow',
1694 }, 'params': {'format': 'mhtml', 'skip_download': True}
1695 }
2eb88d95
PH
1696 ]
1697
201c1459 1698 @classmethod
1699 def suitable(cls, url):
4dfbf869 1700 from ..utils import parse_qs
1701
201c1459 1702 qs = parse_qs(url)
1703 if qs.get('list', [None])[0]:
1704 return False
1705 return super(YoutubeIE, cls).suitable(url)
1706
e0df6211
PH
1707 def __init__(self, *args, **kwargs):
1708 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1709 self._code_cache = {}
83799698 1710 self._player_cache = {}
e0df6211 1711
b6de707d 1712 def _extract_player_url(self, *ytcfgs, webpage=None):
1713 player_url = traverse_obj(
1714 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1715 get_all=False, expected_type=compat_str)
11f9be09 1716 if not player_url:
b6de707d 1717 return
109dd3b2 1718 if player_url.startswith('//'):
1719 player_url = 'https:' + player_url
1720 elif not re.match(r'https?://', player_url):
1721 player_url = compat_urlparse.urljoin(
1722 'https://www.youtube.com', player_url)
1723 return player_url
1724
b6de707d 1725 def _download_player_url(self, video_id, fatal=False):
1726 res = self._download_webpage(
1727 'https://www.youtube.com/iframe_api',
1728 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1729 if res:
1730 player_version = self._search_regex(
1731 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1732 if player_version:
1733 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1734
60064c53
PH
1735 def _signature_cache_id(self, example_sig):
1736 """ Return a string representation of a signature """
78caa52a 1737 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1738
e40c758c
S
1739 @classmethod
1740 def _extract_player_info(cls, player_url):
1741 for player_re in cls._PLAYER_INFO_RE:
1742 id_m = re.search(player_re, player_url)
1743 if id_m:
1744 break
1745 else:
c081b35c 1746 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1747 return id_m.group('id')
e40c758c 1748
404f611f 1749 def _load_player(self, video_id, player_url, fatal=True):
109dd3b2 1750 player_id = self._extract_player_info(player_url)
1751 if player_id not in self._code_cache:
1276a43a 1752 code = self._download_webpage(
109dd3b2 1753 player_url, video_id, fatal=fatal,
1754 note='Downloading player ' + player_id,
1755 errnote='Download of %s failed' % player_url)
1276a43a 1756 if code:
1757 self._code_cache[player_id] = code
404f611f 1758 return self._code_cache.get(player_id)
109dd3b2 1759
e40c758c 1760 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1761 player_id = self._extract_player_info(player_url)
e0df6211 1762
c4417ddb 1763 # Read from filesystem cache
545cc85d 1764 func_id = 'js_%s_%s' % (
1765 player_id, self._signature_cache_id(example_sig))
c4417ddb 1766 assert os.path.basename(func_id) == func_id
a0e07d31 1767
69ea8ca4 1768 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1769 if cache_spec is not None:
78caa52a 1770 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1771
404f611f 1772 code = self._load_player(video_id, player_url)
1773 if code:
109dd3b2 1774 res = self._parse_sig_js(code)
e0df6211 1775
109dd3b2 1776 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1777 cache_res = res(test_string)
1778 cache_spec = [ord(c) for c in cache_res]
83799698 1779
109dd3b2 1780 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1781 return res
83799698 1782
60064c53 1783 def _print_sig_code(self, func, example_sig):
404f611f 1784 if not self.get_param('youtube_print_sig_code'):
1785 return
1786
edf3e38e
PH
1787 def gen_sig_code(idxs):
1788 def _genslice(start, end, step):
78caa52a 1789 starts = '' if start == 0 else str(start)
8bcc8756 1790 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1791 steps = '' if step == 1 else (':%d' % step)
78caa52a 1792 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1793
1794 step = None
7af808a5
PH
1795 # Quelch pyflakes warnings - start will be set when step is set
1796 start = '(Never used)'
edf3e38e
PH
1797 for i, prev in zip(idxs[1:], idxs[:-1]):
1798 if step is not None:
1799 if i - prev == step:
1800 continue
1801 yield _genslice(start, prev, step)
1802 step = None
1803 continue
1804 if i - prev in [-1, 1]:
1805 step = i - prev
1806 start = prev
1807 continue
1808 else:
78caa52a 1809 yield 's[%d]' % prev
edf3e38e 1810 if step is None:
78caa52a 1811 yield 's[%d]' % i
edf3e38e
PH
1812 else:
1813 yield _genslice(start, i, step)
1814
78caa52a 1815 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1816 cache_res = func(test_string)
edf3e38e 1817 cache_spec = [ord(c) for c in cache_res]
78caa52a 1818 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1819 signature_id_tuple = '(%s)' % (
1820 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1821 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1822 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1823 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1824
e0df6211
PH
1825 def _parse_sig_js(self, jscode):
1826 funcname = self._search_regex(
abefc03f
S
1827 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1828 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
1829 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1830 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1831 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1832 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1833 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1834 # Obsolete patterns
1835 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1836 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1837 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1838 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1839 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1840 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1841 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1842 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1843 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1844
1845 jsi = JSInterpreter(jscode)
1846 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1847 return lambda s: initial_function([s])
1848
545cc85d 1849 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1850 """Turn the encrypted s field into a working signature"""
6b37f0be 1851
c8bf86d5 1852 if player_url is None:
69ea8ca4 1853 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1854
c8bf86d5 1855 try:
62af3a0e 1856 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1857 if player_id not in self._player_cache:
1858 func = self._extract_signature_function(
60064c53 1859 video_id, player_url, s
c8bf86d5
PH
1860 )
1861 self._player_cache[player_id] = func
1862 func = self._player_cache[player_id]
404f611f 1863 self._print_sig_code(func, s)
c8bf86d5
PH
1864 return func(s)
1865 except Exception as e:
404f611f 1866 raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
1867
1868 def _decrypt_nsig(self, s, video_id, player_url):
1869 """Turn the encrypted n field into a working signature"""
1870 if player_url is None:
1871 raise ExtractorError('Cannot decrypt nsig without player_url')
1872 if player_url.startswith('//'):
1873 player_url = 'https:' + player_url
1874 elif not re.match(r'https?://', player_url):
1875 player_url = compat_urlparse.urljoin(
1876 'https://www.youtube.com', player_url)
1877
1878 sig_id = ('nsig_value', s)
1879 if sig_id in self._player_cache:
1880 return self._player_cache[sig_id]
1881
1882 try:
1883 player_id = ('nsig', player_url)
1884 if player_id not in self._player_cache:
1885 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
1886 func = self._player_cache[player_id]
1887 self._player_cache[sig_id] = func(s)
1888 self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
1889 return self._player_cache[sig_id]
1890 except Exception as e:
aa9369a2 1891 raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
404f611f 1892
1893 def _extract_n_function_name(self, jscode):
1894 return self._search_regex(
1895 (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
1896 jscode, 'Initial JS player n function name', group='nfunc')
1897
1898 def _extract_n_function(self, video_id, player_url):
1899 player_id = self._extract_player_info(player_url)
1900 func_code = self._downloader.cache.load('youtube-nsig', player_id)
1901
1902 if func_code:
1903 jsi = JSInterpreter(func_code)
1904 else:
1905 jscode = self._load_player(video_id, player_url)
1906 funcname = self._extract_n_function_name(jscode)
1907 jsi = JSInterpreter(jscode)
1908 func_code = jsi.extract_function_code(funcname)
1909 self._downloader.cache.store('youtube-nsig', player_id, func_code)
1910
1911 if self.get_param('youtube_print_sig_code'):
1912 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
1913
1914 return lambda s: jsi.extract_function_from_code(*func_code)([s])
e0df6211 1915
109dd3b2 1916 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1917 """
1918 Extract signatureTimestamp (sts)
1919 Required to tell API what sig/player version is in use.
1920 """
1921 sts = None
1922 if isinstance(ytcfg, dict):
1923 sts = int_or_none(ytcfg.get('STS'))
1924
1925 if not sts:
1926 # Attempt to extract from player
1927 if player_url is None:
1928 error_msg = 'Cannot extract signature timestamp without player_url.'
1929 if fatal:
1930 raise ExtractorError(error_msg)
1931 self.report_warning(error_msg)
1932 return
404f611f 1933 code = self._load_player(video_id, player_url, fatal=fatal)
1934 if code:
109dd3b2 1935 sts = int_or_none(self._search_regex(
1936 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1937 'JS player signature timestamp', group='sts', fatal=fatal))
1938 return sts
1939
11f9be09 1940 def _mark_watched(self, video_id, player_responses):
9222c381 1941 playback_url = get_first(
1942 player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1943 expected_type=url_or_none)
d77ab8e2 1944 if not playback_url:
352d63fd 1945 self.report_warning('Unable to mark watched')
d77ab8e2
S
1946 return
1947 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1948 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1949
1950 # cpn generation algorithm is reverse engineered from base.js.
1951 # In fact it works even with dummy cpn.
1952 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1953 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1954
1955 qs.update({
1956 'ver': ['2'],
1957 'cpn': [cpn],
1958 })
1959 playback_url = compat_urlparse.urlunparse(
15707c7e 1960 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1961
1962 self._download_webpage(
1963 playback_url, video_id, 'Marking watched',
1964 'Unable to mark watched', fatal=False)
1965
66c9fa36
S
1966 @staticmethod
1967 def _extract_urls(webpage):
1968 # Embedded YouTube player
1969 entries = [
1970 unescapeHTML(mobj.group('url'))
1971 for mobj in re.finditer(r'''(?x)
1972 (?:
1973 <iframe[^>]+?src=|
1974 data-video-url=|
1975 <embed[^>]+?src=|
1976 embedSWF\(?:\s*|
1977 <object[^>]+data=|
1978 new\s+SWFObject\(
1979 )
1980 (["\'])
1981 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1982 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1983 \1''', webpage)]
1984
1985 # lazyYT YouTube embed
1986 entries.extend(list(map(
1987 unescapeHTML,
1988 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1989
1990 # Wordpress "YouTube Video Importer" plugin
1991 matches = re.findall(r'''(?x)<div[^>]+
1992 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1993 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1994 entries.extend(m[-1] for m in matches)
1995
1996 return entries
1997
1998 @staticmethod
1999 def _extract_url(webpage):
2000 urls = YoutubeIE._extract_urls(webpage)
2001 return urls[0] if urls else None
2002
97665381
PH
2003 @classmethod
2004 def extract_id(cls, url):
2005 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 2006 if mobj is None:
69ea8ca4 2007 raise ExtractorError('Invalid URL: %s' % url)
5ad28e7f 2008 return mobj.group('id')
c5e8d7af 2009
7c365c21 2010 def _extract_chapters_from_json(self, data, duration):
2011 chapter_list = traverse_obj(
2012 data, (
2013 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2014 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2015 ), expected_type=list)
2016
2017 return self._extract_chapters(
2018 chapter_list,
2019 chapter_time=lambda chapter: float_or_none(
2020 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2021 chapter_title=lambda chapter: traverse_obj(
2022 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2023 duration=duration)
2024
2025 def _extract_chapters_from_engagement_panel(self, data, duration):
2026 content_list = traverse_obj(
8bdd16b4 2027 data,
7c365c21 2028 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2029 expected_type=list, default=[])
052e1350 2030 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2031 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 2032
2033 return next((
2034 filter(None, (
2035 self._extract_chapters(
2036 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2037 chapter_time, chapter_title, duration)
2038 for contents in content_list
2039 ))), [])
2040
2041 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 2042 chapters = []
7c365c21 2043 last_chapter = {'start_time': 0}
2044 for idx, chapter in enumerate(chapter_list or []):
2045 title = chapter_title(chapter)
84213ea8
S
2046 start_time = chapter_time(chapter)
2047 if start_time is None:
2048 continue
7c365c21 2049 last_chapter['end_time'] = start_time
2050 if start_time < last_chapter['start_time']:
2051 if idx == 1:
2052 chapters.pop()
2053 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2054 else:
2055 self.report_warning(f'Invalid start time for chapter "{title}"')
2056 continue
2057 last_chapter = {'start_time': start_time, 'title': title}
2058 chapters.append(last_chapter)
2059 last_chapter['end_time'] = duration
84213ea8
S
2060 return chapters
2061
545cc85d 2062 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2063 return self._parse_json(self._search_regex(
2064 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2065 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 2066
d92f5d5a 2067 @staticmethod
2068 def parse_time_text(time_text):
2069 """
2070 Parse the comment time text
2071 time_text is in the format 'X units ago (edited)'
2072 """
2073 time_text_split = time_text.split(' ')
2074 if len(time_text_split) >= 3:
da503b7a 2075 try:
2076 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2077 except ValueError:
2078 return None
d92f5d5a 2079
a1c5d2ca
M
2080 def _extract_comment(self, comment_renderer, parent=None):
2081 comment_id = comment_renderer.get('commentId')
2082 if not comment_id:
2083 return
fe93e2c4 2084
052e1350 2085 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 2086
49bd8c66 2087 # note: timestamp is an estimate calculated from the current time and time_text
052e1350 2088 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
fe93e2c4 2089 time_text_dt = self.parse_time_text(time_text)
2090 if isinstance(time_text_dt, datetime.datetime):
2091 timestamp = calendar.timegm(time_text_dt.timetuple())
052e1350 2092 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca
M
2093 author_id = try_get(comment_renderer,
2094 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2095
49bd8c66 2096 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2097 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2098 author_thumbnail = try_get(comment_renderer,
2099 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2100
2101 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2102 is_favorited = 'creatorHeart' in (try_get(
2103 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2104 return {
2105 'id': comment_id,
2106 'text': text,
d92f5d5a 2107 'timestamp': timestamp,
a1c5d2ca
M
2108 'time_text': time_text,
2109 'like_count': votes,
97524332 2110 'is_favorited': is_favorited,
a1c5d2ca
M
2111 'author': author,
2112 'author_id': author_id,
2113 'author_thumbnail': author_thumbnail,
2114 'author_is_uploader': author_is_uploader,
2115 'parent': parent or 'root'
2116 }
2117
46383212 2118 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2119
2120 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2d6659b9 2121
2122 def extract_header(contents):
2d6659b9 2123 _continuation = None
2124 for content in contents:
46383212 2125 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
fe93e2c4 2126 expected_comment_count = parse_count(self._get_text(
052e1350 2127 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
fe93e2c4 2128
2d6659b9 2129 if expected_comment_count:
46383212 2130 tracker['est_total'] = expected_comment_count
2131 self.to_screen(f'Downloading ~{expected_comment_count} comments')
2132 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2d6659b9 2133
2134 sort_menu_item = try_get(
2135 comments_header_renderer,
2136 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2137 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2138
2139 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2140 if not _continuation:
2141 continue
2142
46383212 2143 sort_text = str_or_none(sort_menu_item.get('title'))
2144 if not sort_text:
2d6659b9 2145 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
46383212 2146 self.to_screen('Sorting comments by %s' % sort_text.lower())
2d6659b9 2147 break
a2160aa4 2148 return _continuation
a1c5d2ca 2149
2d6659b9 2150 def extract_thread(contents):
a1c5d2ca 2151 if not parent:
46383212 2152 tracker['current_page_thread'] = 0
a1c5d2ca 2153 for content in contents:
46383212 2154 if not parent and tracker['total_parent_comments'] >= max_parents:
2155 yield
a1c5d2ca 2156 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
46383212 2157 comment_renderer = get_first(
2158 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2159 expected_type=dict, default={})
a1c5d2ca 2160
a1c5d2ca
M
2161 comment = self._extract_comment(comment_renderer, parent)
2162 if not comment:
2163 continue
46383212 2164
2165 tracker['running_total'] += 1
2166 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
a1c5d2ca 2167 yield comment
46383212 2168
a1c5d2ca
M
2169 # Attempt to get the replies
2170 comment_replies_renderer = try_get(
2171 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2172
2173 if comment_replies_renderer:
46383212 2174 tracker['current_page_thread'] += 1
a1c5d2ca 2175 comment_entries_iter = self._comment_entries(
99e9e001 2176 comment_replies_renderer, ytcfg, video_id,
46383212 2177 parent=comment.get('id'), tracker=tracker)
2178 for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))):
a1c5d2ca
M
2179 yield reply_comment
2180
46383212 2181 # Keeps track of counts across recursive calls
2182 if not tracker:
2183 tracker = dict(
2184 running_total=0,
2185 est_total=0,
2186 current_page_thread=0,
2187 total_parent_comments=0,
2188 total_reply_comments=0)
2189
2190 # TODO: Deprecated
2d6659b9 2191 # YouTube comments have a max depth of 2
46383212 2192 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
2193 if max_depth:
2194 self._downloader.deprecation_warning(
2195 '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
2d6659b9 2196 if max_depth == 1 and parent:
2197 return
a1c5d2ca 2198
46383212 2199 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
2200 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2d6659b9 2201
46383212 2202 continuation = self._extract_continuation(root_continuation_data)
aae16f6e 2203 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2204 if message and not parent:
2205 self.report_warning(message, video_id=video_id)
2206
46383212 2207 response = None
2d6659b9 2208 is_first_continuation = parent is None
a1c5d2ca
M
2209
2210 for page_num in itertools.count(0):
2211 if not continuation:
2212 break
46383212 2213 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
2214 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2d6659b9 2215 if page_num == 0:
2216 if is_first_continuation:
2217 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2218 else:
2d6659b9 2219 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
46383212 2220 tracker['current_page_thread'], comment_prog_str)
2d6659b9 2221 else:
2222 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2223 ' ' if parent else '', ' replies' if parent else '',
2224 page_num, comment_prog_str)
2225
2226 response = self._extract_response(
fe93e2c4 2227 item_id=None, query=continuation,
2d6659b9 2228 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
46383212 2229 check_get_keys='onResponseReceivedEndpoints')
a1c5d2ca 2230
46383212 2231 continuation_contents = traverse_obj(
2232 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
a1c5d2ca 2233
2d6659b9 2234 continuation = None
46383212 2235 for continuation_section in continuation_contents:
2236 continuation_items = traverse_obj(
2237 continuation_section,
2238 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
2239 get_all=False, expected_type=list) or []
2240 if is_first_continuation:
2241 continuation = extract_header(continuation_items)
2242 is_first_continuation = False
2d6659b9 2243 if continuation:
a1c5d2ca 2244 break
46383212 2245 continue
a1c5d2ca 2246
46383212 2247 for entry in extract_thread(continuation_items):
2248 if not entry:
2249 return
2250 yield entry
2251 continuation = self._extract_continuation({'contents': continuation_items})
2252 if continuation:
2d6659b9 2253 break
a1c5d2ca 2254
a2160aa4 2255 def _get_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2256 """Entry for comment extraction"""
2d6659b9 2257 def _real_comment_extract(contents):
aae16f6e 2258 renderer = next((
2259 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2260 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2261 yield from self._comment_entries(renderer, ytcfg, video_id)
99e9e001 2262
a2160aa4 2263 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
65524694 2264 # Force English regardless of account setting to prevent parsing issues
2265 # See: https://github.com/yt-dlp/yt-dlp/issues/532
2266 ytcfg = copy.deepcopy(ytcfg)
2267 traverse_obj(
2268 ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
a2160aa4 2269 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
a1c5d2ca 2270
109dd3b2 2271 @staticmethod
99e9e001 2272 def _get_checkok_params():
2273 return {'contentCheckOk': True, 'racyCheckOk': True}
2274
2275 @classmethod
2276 def _generate_player_context(cls, sts=None):
109dd3b2 2277 context = {
2278 'html5Preference': 'HTML5_PREF_WANTS',
2279 }
2280 if sts is not None:
2281 context['signatureTimestamp'] = sts
2282 return {
2283 'playbackContext': {
2284 'contentPlaybackContext': context
a1a7907b 2285 },
99e9e001 2286 **cls._get_checkok_params()
109dd3b2 2287 }
2288
e7e94f2a
D
2289 @staticmethod
2290 def _is_agegated(player_response):
2291 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 2292 return True
e7e94f2a
D
2293
2294 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2295 AGE_GATE_REASONS = (
2296 'confirm your age', 'age-restricted', 'inappropriate', # reason
2297 'age_verification_required', 'age_check_required', # status
2298 )
2299 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2300
2301 @staticmethod
2302 def _is_unplayable(player_response):
2303 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 2304
99e9e001 2305 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
109dd3b2 2306
11f9be09 2307 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2308 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 2309 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 2310 headers = self.generate_api_headers(
99e9e001 2311 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 2312
11f9be09 2313 yt_query = {'videoId': video_id}
2314 yt_query.update(self._generate_player_context(sts))
2315 return self._extract_response(
2316 item_id=video_id, ep='player', query=yt_query,
379e44ed 2317 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 2318 default_client=client,
11f9be09 2319 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2320 ) or None
2321
11f9be09 2322 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 2323 requested_clients = []
d0d012d4 2324 default = ['android', 'web']
000c15a4 2325 allowed_clients = sorted(
2326 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2327 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 2328 for client in self._configuration_arg('player_client'):
2329 if client in allowed_clients:
2330 requested_clients.append(client)
d0d012d4 2331 elif client == 'default':
2332 requested_clients.extend(default)
b4c055ba 2333 elif client == 'all':
2334 requested_clients.extend(allowed_clients)
2335 else:
2336 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 2337 if not requested_clients:
d0d012d4 2338 requested_clients = default
cf7e015f 2339
11f9be09 2340 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2341 requested_clients.extend(
e7e94f2a 2342 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 2343
11f9be09 2344 return orderedSet(requested_clients)
cf7e015f 2345
c0bc527b
M
2346 def _extract_player_ytcfg(self, client, video_id):
2347 url = {
2348 'web_music': 'https://music.youtube.com',
2349 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2350 }.get(client)
2351 if not url:
2352 return {}
2353 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2354 return self.extract_ytcfg(video_id, webpage) or {}
2355
99e9e001 2356 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
11f9be09 2357 initial_pr = None
2358 if webpage:
2359 initial_pr = self._extract_yt_initial_variable(
2360 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2361 video_id, 'initial player response')
6b09401b 2362
c0bc527b
M
2363 original_clients = clients
2364 clients = clients[::-1]
b6de707d 2365 prs = []
e7e94f2a
D
2366
2367 def append_client(client_name):
2368 if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2369 clients.append(client_name)
2370
379e44ed 2371 # Android player_response does not have microFormats which are needed for
2372 # extraction of some data. So we return the initial_pr with formats
2373 # stripped out even if not requested by the user
2374 # See: https://github.com/yt-dlp/yt-dlp/issues/501
379e44ed 2375 if initial_pr:
2376 pr = dict(initial_pr)
2377 pr['streamingData'] = None
b6de707d 2378 prs.append(pr)
379e44ed 2379
2380 last_error = None
b6de707d 2381 tried_iframe_fallback = False
2382 player_url = None
c0bc527b
M
2383 while clients:
2384 client = clients.pop()
11f9be09 2385 player_ytcfg = master_ytcfg if client == 'web' else {}
c0bc527b
M
2386 if 'configs' not in self._configuration_arg('player_skip'):
2387 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
c0bc527b 2388
b6de707d 2389 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2390 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2391 if 'js' in self._configuration_arg('player_skip'):
2392 require_js_player = False
2393 player_url = None
2394
2395 if not player_url and not tried_iframe_fallback and require_js_player:
2396 player_url = self._download_player_url(video_id)
2397 tried_iframe_fallback = True
2398
379e44ed 2399 try:
2400 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
99e9e001 2401 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
379e44ed 2402 except ExtractorError as e:
2403 if last_error:
2404 self.report_warning(last_error)
2405 last_error = e
2406 continue
2407
11f9be09 2408 if pr:
b6de707d 2409 prs.append(pr)
c0bc527b 2410
e7e94f2a 2411 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
99e9e001 2412 if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
e7e94f2a
D
2413 append_client(client.replace('_agegate', '_creator'))
2414 elif self._is_agegated(pr):
2415 append_client(f'{client}_agegate')
c0bc527b 2416
379e44ed 2417 if last_error:
b6de707d 2418 if not len(prs):
379e44ed 2419 raise last_error
2420 self.report_warning(last_error)
b6de707d 2421 return prs, player_url
11f9be09 2422
2423 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
a0bb6ce5 2424 itags, stream_ids = {}, []
2a9c6dcd 2425 itag_qualities, res_qualities = {}, {}
d3fc8074 2426 q = qualities([
2a9c6dcd 2427 # Normally tiny is the smallest video-only formats. But
2428 # audio-only formats with unknown quality may get tagged as tiny
2429 'tiny',
2430 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 2431 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2432 ])
11f9be09 2433 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 2434
545cc85d 2435 for fmt in streaming_formats:
2436 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2437 continue
321bf820 2438
cc2db878 2439 itag = str_or_none(fmt.get('itag'))
9297939e 2440 audio_track = fmt.get('audioTrack') or {}
2441 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2442 if stream_id in stream_ids:
2443 continue
2444
cc2db878 2445 quality = fmt.get('quality')
2a9c6dcd 2446 height = int_or_none(fmt.get('height'))
d3fc8074 2447 if quality == 'tiny' or not quality:
2448 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 2449 # The 3gp format (17) in android client has a quality of "small",
2450 # but is actually worse than other formats
2451 if itag == '17':
2452 quality = 'tiny'
2453 if quality:
2454 if itag:
2455 itag_qualities[itag] = quality
2456 if height:
2457 res_qualities[height] = quality
cc2db878 2458 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2459 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2460 # number of fragment that would subsequently requested with (`&sq=N`)
2461 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2462 continue
2463
545cc85d 2464 fmt_url = fmt.get('url')
2465 if not fmt_url:
2466 sc = compat_parse_qs(fmt.get('signatureCipher'))
2467 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2468 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2469 if not (sc and fmt_url and encrypted_sig):
2470 continue
545cc85d 2471 if not player_url:
201e9eaa 2472 continue
545cc85d 2473 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2474 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2475 fmt_url += '&' + sp + '=' + signature
2476
404f611f 2477 query = parse_qs(fmt_url)
2478 throttled = False
b2916526 2479 if query.get('n'):
404f611f 2480 try:
2481 fmt_url = update_url_query(fmt_url, {
2482 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
2483 except ExtractorError as e:
aa9369a2 2484 self.report_warning(
2485 f'nsig extraction failed: You may experience throttling for some formats\n'
2486 f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
404f611f 2487 throttled = True
2488
545cc85d 2489 if itag:
a0bb6ce5 2490 itags[itag] = 'https'
9297939e 2491 stream_ids.append(stream_id)
2492
cc2db878 2493 tbr = float_or_none(
2494 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2495 dct = {
2496 'asr': int_or_none(fmt.get('audioSampleRate')),
2497 'filesize': int_or_none(fmt.get('contentLength')),
2498 'format_id': itag,
34921b43 2499 'format_note': join_nonempty(
26e8e044 2500 '%s%s' % (audio_track.get('displayName') or '',
2501 ' (default)' if audio_track.get('audioIsDefault') else ''),
404f611f 2502 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
34921b43 2503 throttled and 'THROTTLED', delim=', '),
c18d4482 2504 'source_preference': -10 if throttled else -1,
a4211baf 2505 'fps': int_or_none(fmt.get('fps')) or None,
2a9c6dcd 2506 'height': height,
dca3ff4a 2507 'quality': q(quality),
cc2db878 2508 'tbr': tbr,
545cc85d 2509 'url': fmt_url,
2a9c6dcd 2510 'width': int_or_none(fmt.get('width')),
0fb983f6 2511 'language': audio_track.get('id', '').split('.')[0],
26e8e044 2512 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
545cc85d 2513 }
60bdb7bd 2514 mime_mobj = re.match(
2515 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2516 if mime_mobj:
2517 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2518 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 2519 no_audio = dct.get('acodec') == 'none'
2520 no_video = dct.get('vcodec') == 'none'
2521 if no_audio:
2522 dct['vbr'] = tbr
2523 if no_video:
2524 dct['abr'] = tbr
2525 if no_audio or no_video:
545cc85d 2526 dct['downloader_options'] = {
2527 # Youtube throttles chunks >~10M
2528 'http_chunk_size': 10485760,
bf1317d2 2529 }
7c60c33e 2530 if dct.get('ext'):
2531 dct['container'] = dct['ext'] + '_dash'
11f9be09 2532 yield dct
545cc85d 2533
4bb6b02f 2534 skip_manifests = self._configuration_arg('skip')
57015a4a 2535 get_dash = (
2536 (not is_live or self._configuration_arg('include_live_dash'))
2537 and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
5d3a0e79 2538 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2539
a0bb6ce5 2540 def process_manifest_format(f, proto, itag):
2541 if itag in itags:
2542 if itags[itag] == proto or f'{itag}-{proto}' in itags:
2543 return False
2544 itag = f'{itag}-{proto}'
2545 if itag:
2546 f['format_id'] = itag
2547 itags[itag] = proto
2548
2549 f['quality'] = next((
2550 q(qdict[val])
e339d25a 2551 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
a0bb6ce5 2552 if val in qdict), -1)
2553 return True
2a9c6dcd 2554
11f9be09 2555 for sd in streaming_data:
5d3a0e79 2556 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2557 if hls_manifest_url:
2a9c6dcd 2558 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
a0bb6ce5 2559 if process_manifest_format(f, 'hls', self._search_regex(
2560 r'/itag/(\d+)', f['url'], 'itag', default=None)):
2561 yield f
545cc85d 2562
5d3a0e79 2563 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2564 if dash_manifest_url:
2a9c6dcd 2565 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
a0bb6ce5 2566 if process_manifest_format(f, 'dash', f['format_id']):
2567 f['filesize'] = int_or_none(self._search_regex(
2568 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
2569 yield f
11f9be09 2570
720c3099 2571 def _extract_storyboard(self, player_responses, duration):
2572 spec = get_first(
2573 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
2574 if not spec:
2575 return
2576 base_url = spec.pop()
2577 L = len(spec) - 1
2578 for i, args in enumerate(spec):
2579 args = args.split('#')
2580 counts = list(map(int_or_none, args[:5]))
2581 if len(args) != 8 or not all(counts):
2582 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
2583 continue
2584 width, height, frame_count, cols, rows = counts
2585 N, sigh = args[6:]
2586
2587 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
2588 fragment_count = frame_count / (cols * rows)
2589 fragment_duration = duration / fragment_count
2590 yield {
2591 'format_id': f'sb{i}',
2592 'format_note': 'storyboard',
2593 'ext': 'mhtml',
2594 'protocol': 'mhtml',
2595 'acodec': 'none',
2596 'vcodec': 'none',
2597 'url': url,
2598 'width': width,
2599 'height': height,
2600 'fragments': [{
2601 'path': url.replace('$M', str(j)),
2602 'duration': min(fragment_duration, duration - (j * fragment_duration)),
2603 } for j in range(math.ceil(fragment_count))],
2604 }
2605
11f9be09 2606 def _real_extract(self, url):
2607 url, smuggled_data = unsmuggle_url(url, {})
2608 video_id = self._match_id(url)
2609
2610 base_url = self.http_scheme() + '//www.youtube.com/'
2611 webpage_url = base_url + 'watch?v=' + video_id
b6de707d 2612 webpage = None
2613 if 'webpage' not in self._configuration_arg('player_skip'):
2614 webpage = self._download_webpage(
2615 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
11f9be09 2616
2617 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 2618
b6de707d 2619 player_responses, player_url = self._extract_player_responses(
11f9be09 2620 self._get_requested_clients(url, smuggled_data),
99e9e001 2621 video_id, webpage, master_ytcfg)
11f9be09 2622
11f9be09 2623 playability_statuses = traverse_obj(
2624 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2625
2626 trailer_video_id = get_first(
2627 playability_statuses,
2628 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2629 expected_type=str)
2630 if trailer_video_id:
2631 return self.url_result(
2632 trailer_video_id, self.ie_key(), trailer_video_id)
2633
2634 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2635 if webpage else (lambda x: None))
2636
2637 video_details = traverse_obj(
2638 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2639 microformats = traverse_obj(
2640 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2641 expected_type=dict, default=[])
2642 video_title = (
2643 get_first(video_details, 'title')
2644 or self._get_text(microformats, (..., 'title'))
2645 or search_meta(['og:title', 'twitter:title', 'title']))
2646 video_description = get_first(video_details, 'shortDescription')
2647
d89257f3 2648 multifeed_metadata_list = get_first(
2649 player_responses,
2650 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2651 expected_type=str)
2652 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
2653 if self.get_param('noplaylist'):
11f9be09 2654 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
d89257f3 2655 else:
2656 entries = []
2657 feed_ids = []
2658 for feed in multifeed_metadata_list.split(','):
2659 # Unquote should take place before split on comma (,) since textual
2660 # fields may contain comma as well (see
2661 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2662 feed_data = compat_parse_qs(
2663 compat_urllib_parse_unquote_plus(feed))
2664
2665 def feed_entry(name):
2666 return try_get(
2667 feed_data, lambda x: x[name][0], compat_str)
2668
2669 feed_id = feed_entry('id')
2670 if not feed_id:
2671 continue
2672 feed_title = feed_entry('title')
2673 title = video_title
2674 if feed_title:
2675 title += ' (%s)' % feed_title
2676 entries.append({
2677 '_type': 'url_transparent',
2678 'ie_key': 'Youtube',
2679 'url': smuggle_url(
2680 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2681 {'force_singlefeed': True}),
2682 'title': title,
2683 })
2684 feed_ids.append(feed_id)
2685 self.to_screen(
2686 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2687 % (', '.join(feed_ids), video_id))
2688 return self.playlist_result(
2689 entries, video_id, video_title, video_description)
11f9be09 2690
7ea65411 2691 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
11f9be09 2692 is_live = get_first(video_details, 'isLive')
7ea65411 2693 if is_live is None:
2694 is_live = get_first(live_broadcast_details, 'isLiveNow')
11f9be09 2695
2696 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2697 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
bf1317d2 2698
545cc85d 2699 if not formats:
11f9be09 2700 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 2701 self.report_drm(video_id)
11f9be09 2702 pemr = get_first(
2703 playability_statuses,
2704 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2705 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2706 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 2707 if subreason:
545cc85d 2708 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 2709 countries = get_first(microformats, 'availableCountries')
545cc85d 2710 if not countries:
2711 regions_allowed = search_meta('regionsAllowed')
2712 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2713 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 2714 reason += f'. {subreason}'
545cc85d 2715 if reason:
b7da73eb 2716 self.raise_no_formats(reason, expected=True)
bf1317d2 2717
11f9be09 2718 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 2719 if not keywords and webpage:
2720 keywords = [
2721 unescapeHTML(m.group('content'))
2722 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2723 for keyword in keywords:
2724 if keyword.startswith('yt:stretch='):
201c1459 2725 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2726 if mobj:
2727 # NB: float is intentional for forcing float division
2728 w, h = (float(v) for v in mobj.groups())
2729 if w > 0 and h > 0:
2730 ratio = w / h
2731 for f in formats:
2732 if f.get('vcodec') != 'none':
2733 f['stretched_ratio'] = ratio
2734 break
6449cd80 2735
545cc85d 2736 thumbnails = []
11f9be09 2737 thumbnail_dicts = traverse_obj(
2738 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2739 expected_type=dict, default=[])
2740 for thumbnail in thumbnail_dicts:
2741 thumbnail_url = thumbnail.get('url')
2742 if not thumbnail_url:
2743 continue
2744 # Sometimes youtube gives a wrong thumbnail URL. See:
2745 # https://github.com/yt-dlp/yt-dlp/issues/233
2746 # https://github.com/ytdl-org/youtube-dl/issues/28023
2747 if 'maxresdefault' in thumbnail_url:
2748 thumbnail_url = thumbnail_url.split('?')[0]
2749 thumbnails.append({
2750 'url': thumbnail_url,
2751 'height': int_or_none(thumbnail.get('height')),
2752 'width': int_or_none(thumbnail.get('width')),
2753 })
ff2751ac 2754 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2755 if thumbnail_url:
2756 thumbnails.append({
2757 'url': thumbnail_url,
ff2751ac 2758 })
fccf5021 2759 original_thumbnails = thumbnails.copy()
2760
0ba692ac 2761 # The best resolution thumbnails sometimes does not appear in the webpage
2762 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 2763 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
e820fbaa 2764 thumbnail_names = [
2765 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
cca80fe6 2766 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2767 'mqdefault', 'mq1', 'mq2', 'mq3',
2768 'default', '1', '2', '3'
2769 ]
cca80fe6 2770 n_thumbnail_names = len(thumbnail_names)
0ba692ac 2771 thumbnails.extend({
2772 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2773 video_id=video_id, name=name, ext=ext,
2774 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 2775 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 2776 for thumb in thumbnails:
cca80fe6 2777 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 2778 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 2779 self._remove_duplicate_formats(thumbnails)
fccf5021 2780 self._downloader._sort_thumbnails(original_thumbnails)
545cc85d 2781
7ea65411 2782 category = get_first(microformats, 'category') or search_meta('genre')
2783 channel_id = str_or_none(
2784 get_first(video_details, 'channelId')
2785 or get_first(microformats, 'externalChannelId')
2786 or search_meta('channelId'))
2787 duration = int_or_none(
2788 get_first(video_details, 'lengthSeconds')
2789 or get_first(microformats, 'lengthSeconds')
2790 or parse_duration(search_meta('duration'))) or None
2791 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2792
2793 live_content = get_first(video_details, 'isLiveContent')
2794 is_upcoming = get_first(video_details, 'isUpcoming')
2795 if is_live is None:
2796 if is_upcoming or live_content is False:
2797 is_live = False
2798 if is_upcoming is None and (live_content or is_live):
2799 is_upcoming = False
2800 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2801 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2802 if not duration and live_endtime and live_starttime:
2803 duration = live_endtime - live_starttime
2804
720c3099 2805 formats.extend(self._extract_storyboard(player_responses, duration))
2806
2807 # Source is given priority since formats that throttle are given lower source_preference
2808 # When throttling issue is fully fixed, remove this
2809 self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
2810
545cc85d 2811 info = {
2812 'id': video_id,
2813 'title': self._live_title(video_title) if is_live else video_title,
2814 'formats': formats,
2815 'thumbnails': thumbnails,
fccf5021 2816 # The best thumbnail that we are sure exists. Prevents unnecessary
2817 # URL checking if user don't care about getting the best possible thumbnail
2818 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
545cc85d 2819 'description': video_description,
2820 'upload_date': unified_strdate(
11f9be09 2821 get_first(microformats, 'uploadDate')
545cc85d 2822 or search_meta('uploadDate')),
11f9be09 2823 'uploader': get_first(video_details, 'author'),
545cc85d 2824 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2825 'uploader_url': owner_profile_url,
2826 'channel_id': channel_id,
11f9be09 2827 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
545cc85d 2828 'duration': duration,
2829 'view_count': int_or_none(
11f9be09 2830 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 2831 or search_meta('interactionCount')),
11f9be09 2832 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 2833 'age_limit': 18 if (
11f9be09 2834 get_first(microformats, 'isFamilySafe') is False
545cc85d 2835 or search_meta('isFamilyFriendly') == 'false'
2836 or search_meta('og:restrictions:age') == '18+') else 0,
2837 'webpage_url': webpage_url,
2838 'categories': [category] if category else None,
2839 'tags': keywords,
11f9be09 2840 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 2841 'is_live': is_live,
2842 'was_live': (False if is_live or is_upcoming or live_content is False
2843 else None if is_live is None or is_upcoming is None
2844 else live_content),
2845 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2846 'release_timestamp': live_starttime,
545cc85d 2847 }
b477fc13 2848
3944e7af 2849 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 2850 if pctr:
ecdc9049 2851 def get_lang_code(track):
2852 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
2853 or track.get('languageCode'))
2854
2855 # Converted into dicts to remove duplicates
2856 captions = {
2857 get_lang_code(sub): sub
2858 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2859 translation_languages = {
2860 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
2861 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2862
774d79cc 2863 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2864 lang_subs = container.setdefault(lang_code, [])
545cc85d 2865 for fmt in self._SUBTITLE_FORMATS:
2866 query.update({
2867 'fmt': fmt,
2868 })
2869 lang_subs.append({
2870 'ext': fmt,
2871 'url': update_url_query(base_url, query),
774d79cc 2872 'name': sub_name,
545cc85d 2873 })
7e72694b 2874
ecdc9049 2875 subtitles, automatic_captions = {}, {}
2876 for lang_code, caption_track in captions.items():
2877 base_url = caption_track.get('baseUrl')
545cc85d 2878 if not base_url:
2879 continue
ecdc9049 2880 lang_name = self._get_text(caption_track, 'name', max_runs=1)
545cc85d 2881 if caption_track.get('kind') != 'asr':
545cc85d 2882 if not lang_code:
2883 continue
2884 process_language(
ecdc9049 2885 subtitles, base_url, lang_code, lang_name, {})
2886 if not caption_track.get('isTranslatable'):
2887 continue
3944e7af 2888 for trans_code, trans_name in translation_languages.items():
2889 if not trans_code:
545cc85d 2890 continue
ecdc9049 2891 if caption_track.get('kind') != 'asr':
2892 trans_code += f'-{lang_code}'
2893 trans_name += format_field(lang_name, template=' from %s')
545cc85d 2894 process_language(
ecdc9049 2895 automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
2896 info['automatic_captions'] = automatic_captions
2897 info['subtitles'] = subtitles
7e72694b 2898
545cc85d 2899 parsed_url = compat_urllib_parse_urlparse(url)
2900 for component in [parsed_url.fragment, parsed_url.query]:
2901 query = compat_parse_qs(component)
2902 for k, v in query.items():
2903 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2904 d_k += '_time'
2905 if d_k not in info and k in s_ks:
2906 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2907
2908 # Youtube Music Auto-generated description
822b9d9c 2909 if video_description:
38d70284 2910 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2911 if mobj:
822b9d9c
RA
2912 release_year = mobj.group('release_year')
2913 release_date = mobj.group('release_date')
2914 if release_date:
2915 release_date = release_date.replace('-', '')
2916 if not release_year:
545cc85d 2917 release_year = release_date[:4]
2918 info.update({
2919 'album': mobj.group('album'.strip()),
2920 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2921 'track': mobj.group('track').strip(),
2922 'release_date': release_date,
cc2db878 2923 'release_year': int_or_none(release_year),
545cc85d 2924 })
7e72694b 2925
545cc85d 2926 initial_data = None
2927 if webpage:
2928 initial_data = self._extract_yt_initial_variable(
2929 webpage, self._YT_INITIAL_DATA_RE, video_id,
2930 'yt initial data')
2931 if not initial_data:
99e9e001 2932 query = {'videoId': video_id}
2933 query.update(self._get_checkok_params())
109dd3b2 2934 initial_data = self._extract_response(
2935 item_id=video_id, ep='next', fatal=False,
99e9e001 2936 ytcfg=master_ytcfg, query=query,
2937 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 2938 note='Downloading initial data API JSON')
545cc85d 2939
c60ee3a2 2940 try:
2941 # This will error if there is no livechat
2942 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
ecdc9049 2943 info.setdefault('subtitles', {})['live_chat'] = [{
c60ee3a2 2944 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2945 'video_id': video_id,
2946 'ext': 'json',
f6745c49 2947 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 2948 }]
2949 except (KeyError, IndexError, TypeError):
2950 pass
545cc85d 2951
2952 if initial_data:
7c365c21 2953 info['chapters'] = (
2954 self._extract_chapters_from_json(initial_data, duration)
2955 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2956 or None)
545cc85d 2957
2958 contents = try_get(
2959 initial_data,
2960 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2961 list) or []
2962 for content in contents:
2963 vpir = content.get('videoPrimaryInfoRenderer')
2964 if vpir:
2965 stl = vpir.get('superTitleLink')
2966 if stl:
fe93e2c4 2967 stl = self._get_text(stl)
545cc85d 2968 if try_get(
2969 vpir,
2970 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2971 info['location'] = stl
2972 else:
2973 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2974 if mobj:
2975 info.update({
2976 'series': mobj.group(1),
2977 'season_number': int(mobj.group(2)),
2978 'episode_number': int(mobj.group(3)),
2979 })
2980 for tlb in (try_get(
2981 vpir,
2982 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2983 list) or []):
2984 tbr = tlb.get('toggleButtonRenderer') or {}
2985 for getter, regex in [(
2986 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2987 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2988 lambda x: x['accessibility'],
2989 lambda x: x['accessibilityData']['accessibilityData'],
2990 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2991 label = (try_get(tbr, getter, dict) or {}).get('label')
2992 if label:
2993 mobj = re.match(regex, label)
2994 if mobj:
2995 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2996 break
2997 sbr_tooltip = try_get(
2998 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2999 if sbr_tooltip:
3000 like_count, dislike_count = sbr_tooltip.split(' / ')
3001 info.update({
3002 'like_count': str_to_int(like_count),
3003 'dislike_count': str_to_int(dislike_count),
3004 })
3005 vsir = content.get('videoSecondaryInfoRenderer')
3006 if vsir:
052e1350 3007 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
545cc85d 3008 rows = try_get(
3009 vsir,
3010 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3011 list) or []
3012 multiple_songs = False
3013 for row in rows:
3014 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3015 multiple_songs = True
3016 break
3017 for row in rows:
3018 mrr = row.get('metadataRowRenderer') or {}
3019 mrr_title = mrr.get('title')
3020 if not mrr_title:
3021 continue
052e1350 3022 mrr_title = self._get_text(mrr, 'title')
3023 mrr_contents_text = self._get_text(mrr, ('contents', 0))
545cc85d 3024 if mrr_title == 'License':
3025 info['license'] = mrr_contents_text
3026 elif not multiple_songs:
3027 if mrr_title == 'Album':
3028 info['album'] = mrr_contents_text
3029 elif mrr_title == 'Artist':
3030 info['artist'] = mrr_contents_text
3031 elif mrr_title == 'Song':
3032 info['track'] = mrr_contents_text
3033
3034 fallbacks = {
3035 'channel': 'uploader',
3036 'channel_id': 'uploader_id',
3037 'channel_url': 'uploader_url',
3038 }
3039 for to, frm in fallbacks.items():
3040 if not info.get(to):
3041 info[to] = info.get(frm)
3042
3043 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3044 v = info.get(s_k)
3045 if v:
3046 info[d_k] = v
b84071c0 3047
11f9be09 3048 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3049 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3050 is_membersonly = None
b28f8d24 3051 is_premium = None
c224251a
M
3052 if initial_data and is_private is not None:
3053 is_membersonly = False
b28f8d24 3054 is_premium = False
47193e02 3055 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3056 badge_labels = set()
3057 for content in contents:
3058 if not isinstance(content, dict):
3059 continue
3060 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3061 for badge_label in badge_labels:
3062 if badge_label.lower() == 'members only':
3063 is_membersonly = True
3064 elif badge_label.lower() == 'premium':
3065 is_premium = True
3066 elif badge_label.lower() == 'unlisted':
3067 is_unlisted = True
c224251a 3068
c224251a
M
3069 info['availability'] = self._availability(
3070 is_private=is_private,
b28f8d24 3071 needs_premium=is_premium,
c224251a
M
3072 needs_subscription=is_membersonly,
3073 needs_auth=info['age_limit'] >= 18,
3074 is_unlisted=None if is_private is None else is_unlisted)
3075
a2160aa4 3076 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3077
11f9be09 3078 self.mark_watched(video_id, player_responses)
d77ab8e2 3079
545cc85d 3080 return info
c5e8d7af 3081
a61fd4cf 3082
a6213a49 3083class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
5f6a1245 3084
a6213a49 3085 def _extract_channel_id(self, webpage):
3086 channel_id = self._html_search_meta(
3087 'channelId', webpage, 'channel id', default=None)
3088 if channel_id:
3089 return channel_id
3090 channel_url = self._html_search_meta(
3091 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3092 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3093 'twitter:app:url:googleplay'), webpage, 'channel url')
3094 return self._search_regex(
3095 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3096 channel_url, 'channel id')
15f6397c 3097
8bdd16b4 3098 @staticmethod
cd7c66cf 3099 def _extract_basic_item_renderer(item):
3100 # Modified from _extract_grid_item_renderer
201c1459 3101 known_basic_renderers = (
3102 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3103 )
3104 for key, renderer in item.items():
201c1459 3105 if not isinstance(renderer, dict):
cd7c66cf 3106 continue
201c1459 3107 elif key in known_basic_renderers:
3108 return renderer
3109 elif key.startswith('grid') and key.endswith('Renderer'):
3110 return renderer
8bdd16b4 3111
8bdd16b4 3112 def _grid_entries(self, grid_renderer):
3113 for item in grid_renderer['items']:
3114 if not isinstance(item, dict):
39b62db1 3115 continue
cd7c66cf 3116 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3117 if not isinstance(renderer, dict):
3118 continue
052e1350 3119 title = self._get_text(renderer, 'title')
fe93e2c4 3120
8bdd16b4 3121 # playlist
3122 playlist_id = renderer.get('playlistId')
3123 if playlist_id:
3124 yield self.url_result(
3125 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3126 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3127 video_title=title)
201c1459 3128 continue
8bdd16b4 3129 # video
3130 video_id = renderer.get('videoId')
3131 if video_id:
3132 yield self._extract_video(renderer)
201c1459 3133 continue
8bdd16b4 3134 # channel
3135 channel_id = renderer.get('channelId')
3136 if channel_id:
8bdd16b4 3137 yield self.url_result(
3138 'https://www.youtube.com/channel/%s' % channel_id,
3139 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3140 continue
3141 # generic endpoint URL support
3142 ep_url = urljoin('https://www.youtube.com/', try_get(
3143 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3144 compat_str))
3145 if ep_url:
3146 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3147 if ie.suitable(ep_url):
3148 yield self.url_result(
3149 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3150 break
8bdd16b4 3151
3d3dddc9 3152 def _shelf_entries_from_content(self, shelf_renderer):
3153 content = shelf_renderer.get('content')
3154 if not isinstance(content, dict):
8bdd16b4 3155 return
cd7c66cf 3156 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3157 if renderer:
3158 # TODO: add support for nested playlists so each shelf is processed
3159 # as separate playlist
3160 # TODO: this includes only first N items
3161 for entry in self._grid_entries(renderer):
3162 yield entry
3163 renderer = content.get('horizontalListRenderer')
3164 if renderer:
3165 # TODO
3166 pass
8bdd16b4 3167
29f7c58a 3168 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3169 ep = try_get(
3170 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3171 compat_str)
3172 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3173 if shelf_url:
29f7c58a 3174 # Skipping links to another channels, note that checking for
3175 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3176 # will not work
3177 if skip_channels and '/channels?' in shelf_url:
3178 return
052e1350 3179 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 3180 yield self.url_result(shelf_url, video_title=title)
3181 # Shelf may not contain shelf URL, fallback to extraction from content
3182 for entry in self._shelf_entries_from_content(shelf_renderer):
3183 yield entry
c5e8d7af 3184
8bdd16b4 3185 def _playlist_entries(self, video_list_renderer):
3186 for content in video_list_renderer['contents']:
3187 if not isinstance(content, dict):
3188 continue
3189 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3190 if not isinstance(renderer, dict):
3191 continue
3192 video_id = renderer.get('videoId')
3193 if not video_id:
3194 continue
3195 yield self._extract_video(renderer)
07aeced6 3196
3462ffa8 3197 def _rich_entries(self, rich_grid_renderer):
3198 renderer = try_get(
70d5c17b 3199 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3200 video_id = renderer.get('videoId')
3201 if not video_id:
3202 return
3203 yield self._extract_video(renderer)
3204
8bdd16b4 3205 def _video_entry(self, video_renderer):
3206 video_id = video_renderer.get('videoId')
3207 if video_id:
3208 return self._extract_video(video_renderer)
dacb3a86 3209
8bdd16b4 3210 def _post_thread_entries(self, post_thread_renderer):
3211 post_renderer = try_get(
3212 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3213 if not post_renderer:
3214 return
3215 # video attachment
3216 video_renderer = try_get(
895b0931 3217 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3218 video_id = video_renderer.get('videoId')
3219 if video_id:
3220 entry = self._extract_video(video_renderer)
8bdd16b4 3221 if entry:
3222 yield entry
895b0931 3223 # playlist attachment
3224 playlist_id = try_get(
3225 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3226 if playlist_id:
3227 yield self.url_result(
e28f1c0a 3228 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3229 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3230 # inline video links
3231 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3232 for run in runs:
3233 if not isinstance(run, dict):
3234 continue
3235 ep_url = try_get(
3236 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3237 if not ep_url:
3238 continue
3239 if not YoutubeIE.suitable(ep_url):
3240 continue
3241 ep_video_id = YoutubeIE._match_id(ep_url)
3242 if video_id == ep_video_id:
3243 continue
895b0931 3244 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3245
8bdd16b4 3246 def _post_thread_continuation_entries(self, post_thread_continuation):
3247 contents = post_thread_continuation.get('contents')
3248 if not isinstance(contents, list):
3249 return
3250 for content in contents:
3251 renderer = content.get('backstagePostThreadRenderer')
3252 if not isinstance(renderer, dict):
3253 continue
3254 for entry in self._post_thread_entries(renderer):
3255 yield entry
07aeced6 3256
39ed931e 3257 r''' # unused
3258 def _rich_grid_entries(self, contents):
3259 for content in contents:
3260 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3261 if video_renderer:
3262 entry = self._video_entry(video_renderer)
3263 if entry:
3264 yield entry
3265 '''
a6213a49 3266 def _extract_entries(self, parent_renderer, continuation_list):
3267 # continuation_list is modified in-place with continuation_list = [continuation_token]
3268 continuation_list[:] = [None]
3269 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3270 for content in contents:
3271 if not isinstance(content, dict):
3272 continue
3273 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3274 if not is_renderer:
3275 renderer = content.get('richItemRenderer')
3276 if renderer:
3277 for entry in self._rich_entries(renderer):
3278 yield entry
3279 continuation_list[0] = self._extract_continuation(parent_renderer)
3280 continue
3281 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3282 for isr_content in isr_contents:
3283 if not isinstance(isr_content, dict):
8bdd16b4 3284 continue
69184e41 3285
a6213a49 3286 known_renderers = {
3287 'playlistVideoListRenderer': self._playlist_entries,
3288 'gridRenderer': self._grid_entries,
3289 'shelfRenderer': lambda x: self._shelf_entries(x),
3290 'backstagePostThreadRenderer': self._post_thread_entries,
3291 'videoRenderer': lambda x: [self._video_entry(x)],
a61fd4cf 3292 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
3293 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
a6213a49 3294 }
3295 for key, renderer in isr_content.items():
3296 if key not in known_renderers:
3297 continue
3298 for entry in known_renderers[key](renderer):
3299 if entry:
3300 yield entry
3301 continuation_list[0] = self._extract_continuation(renderer)
3302 break
70d5c17b 3303
3304 if not continuation_list[0]:
a6213a49 3305 continuation_list[0] = self._extract_continuation(is_renderer)
3462ffa8 3306
a6213a49 3307 if not continuation_list[0]:
3308 continuation_list[0] = self._extract_continuation(parent_renderer)
3309
3310 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
3311 continuation_list = [None]
3312 extract_entries = lambda x: self._extract_entries(x, continuation_list)
29f7c58a 3313 tab_content = try_get(tab, lambda x: x['content'], dict)
3314 if not tab_content:
3315 return
3462ffa8 3316 parent_renderer = (
29f7c58a 3317 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3318 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3319 for entry in extract_entries(parent_renderer):
3320 yield entry
3462ffa8 3321 continuation = continuation_list[0]
d069eca7 3322
8bdd16b4 3323 for page_num in itertools.count(1):
3324 if not continuation:
3325 break
99e9e001 3326 headers = self.generate_api_headers(
3327 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 3328 response = self._extract_response(
3329 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3330 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3331 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3332
3333 if not response:
8bdd16b4 3334 break
ac56cf38 3335 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
3336 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
3337 visitor_data = self._extract_visitor_data(response) or visitor_data
ebf1b291 3338
69184e41 3339 known_continuation_renderers = {
3340 'playlistVideoListContinuation': self._playlist_entries,
3341 'gridContinuation': self._grid_entries,
3342 'itemSectionContinuation': self._post_thread_continuation_entries,
3343 'sectionListContinuation': extract_entries, # for feeds
3344 }
8bdd16b4 3345 continuation_contents = try_get(
69184e41 3346 response, lambda x: x['continuationContents'], dict) or {}
3347 continuation_renderer = None
3348 for key, value in continuation_contents.items():
3349 if key not in known_continuation_renderers:
3462ffa8 3350 continue
69184e41 3351 continuation_renderer = value
3352 continuation_list = [None]
3353 for entry in known_continuation_renderers[key](continuation_renderer):
3354 yield entry
3355 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3356 break
3357 if continuation_renderer:
3358 continue
c5e8d7af 3359
a1b535bd 3360 known_renderers = {
3361 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3362 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 3363 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 3364 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3365 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3366 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3367 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3368 }
cce889b9 3369 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3370 continuation_items = try_get(
cce889b9 3371 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3372 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3373 video_items_renderer = None
3374 for key, value in continuation_item.items():
3375 if key not in known_renderers:
8bdd16b4 3376 continue
a1b535bd 3377 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3378 continuation_list = [None]
a1b535bd 3379 for entry in known_renderers[key][0](video_items_renderer):
3380 yield entry
9ba5705a 3381 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3382 break
3383 if video_items_renderer:
3384 continue
8bdd16b4 3385 break
9558dcec 3386
8bdd16b4 3387 @staticmethod
3388 def _extract_selected_tab(tabs):
3389 for tab in tabs:
cd684175 3390 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3391 if renderer.get('selected') is True:
3392 return renderer
2b3c2546 3393 else:
8bdd16b4 3394 raise ExtractorError('Unable to find selected tab')
b82f815f 3395
47193e02 3396 @classmethod
3397 def _extract_uploader(cls, data):
8bdd16b4 3398 uploader = {}
47193e02 3399 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3400 owner = try_get(
3401 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3402 if owner:
3403 uploader['uploader'] = owner.get('text')
3404 uploader['uploader_id'] = try_get(
3405 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3406 uploader['uploader_url'] = urljoin(
3407 'https://www.youtube.com/',
3408 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3409 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3410
ac56cf38 3411 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
b60419c5 3412 playlist_id = title = description = channel_url = channel_name = channel_id = None
ac56cf38 3413 thumbnails_list = []
3414 tags = []
b60419c5 3415
8bdd16b4 3416 selected_tab = self._extract_selected_tab(tabs)
3417 renderer = try_get(
3418 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3419 if renderer:
b60419c5 3420 channel_name = renderer.get('title')
3421 channel_url = renderer.get('channelUrl')
3422 channel_id = renderer.get('externalId')
39ed931e 3423 else:
64c0d954 3424 renderer = try_get(
3425 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3426
8bdd16b4 3427 if renderer:
3428 title = renderer.get('title')
ecc97af3 3429 description = renderer.get('description', '')
b60419c5 3430 playlist_id = channel_id
3431 tags = renderer.get('keywords', '').split()
3432 thumbnails_list = (
3433 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3434 or try_get(
47193e02 3435 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3436 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
ff84930c 3437 list)
b60419c5 3438 or [])
3439
3440 thumbnails = []
3441 for t in thumbnails_list:
3442 if not isinstance(t, dict):
3443 continue
3444 thumbnail_url = url_or_none(t.get('url'))
3445 if not thumbnail_url:
3446 continue
3447 thumbnails.append({
3448 'url': thumbnail_url,
3449 'width': int_or_none(t.get('width')),
3450 'height': int_or_none(t.get('height')),
3451 })
3462ffa8 3452 if playlist_id is None:
70d5c17b 3453 playlist_id = item_id
3454 if title is None:
39ed931e 3455 title = (
3456 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3457 or playlist_id)
b60419c5 3458 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3459 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3460 metadata = {
3461 'playlist_id': playlist_id,
3462 'playlist_title': title,
3463 'playlist_description': description,
3464 'uploader': channel_name,
3465 'uploader_id': channel_id,
3466 'uploader_url': channel_url,
3467 'thumbnails': thumbnails,
3468 'tags': tags,
3469 }
47193e02 3470 availability = self._extract_availability(data)
3471 if availability:
3472 metadata['availability'] = availability
b60419c5 3473 if not channel_id:
3474 metadata.update(self._extract_uploader(data))
3475 metadata.update({
3476 'channel': metadata['uploader'],
3477 'channel_id': metadata['uploader_id'],
3478 'channel_url': metadata['uploader_url']})
3479 return self.playlist_result(
d069eca7 3480 self._entries(
ac56cf38 3481 selected_tab, playlist_id, ytcfg,
3482 self._extract_account_syncid(ytcfg, data),
3483 self._extract_visitor_data(data, ytcfg)),
b60419c5 3484 **metadata)
73c4ac2c 3485
ac56cf38 3486 def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
3487 first_id = last_id = response = None
2be71994 3488 for page_num in itertools.count(1):
cd7c66cf 3489 videos = list(self._playlist_entries(playlist))
3490 if not videos:
3491 return
2be71994 3492 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3493 if start >= len(videos):
3494 return
3495 for video in videos[start:]:
3496 if video['id'] == first_id:
3497 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3498 return
3499 yield video
3500 first_id = first_id or videos[0]['id']
3501 last_id = videos[-1]['id']
79360d99 3502 watch_endpoint = try_get(
3503 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
ac56cf38 3504 headers = self.generate_api_headers(
3505 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3506 visitor_data=self._extract_visitor_data(response, data, ytcfg))
79360d99 3507 query = {
3508 'playlistId': playlist_id,
3509 'videoId': watch_endpoint.get('videoId') or last_id,
3510 'index': watch_endpoint.get('index') or len(videos),
3511 'params': watch_endpoint.get('params') or 'OAE%3D'
3512 }
3513 response = self._extract_response(
3514 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 3515 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 3516 check_get_keys='contents'
3517 )
cd7c66cf 3518 playlist = try_get(
79360d99 3519 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3520
ac56cf38 3521 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
8bdd16b4 3522 title = playlist.get('title') or try_get(
3523 data, lambda x: x['titleText']['simpleText'], compat_str)
3524 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3525
3526 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3527 playlist_url = urljoin(url, try_get(
3528 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3529 compat_str))
3530 if playlist_url and playlist_url != url:
3531 return self.url_result(
3532 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3533 video_title=title)
cd7c66cf 3534
8bdd16b4 3535 return self.playlist_result(
ac56cf38 3536 self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
cd7c66cf 3537 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3538
47193e02 3539 def _extract_availability(self, data):
3540 """
3541 Gets the availability of a given playlist/tab.
3542 Note: Unless YouTube tells us explicitly, we do not assume it is public
3543 @param data: response
3544 """
3545 is_private = is_unlisted = None
3546 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3547 badge_labels = self._extract_badges(renderer)
3548
3549 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3550 privacy_dropdown_entries = try_get(
3551 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3552 for renderer_dict in privacy_dropdown_entries:
3553 is_selected = try_get(
3554 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3555 if not is_selected:
3556 continue
052e1350 3557 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
47193e02 3558 if label:
3559 badge_labels.add(label.lower())
3560 break
3561
3562 for badge_label in badge_labels:
3563 if badge_label == 'unlisted':
3564 is_unlisted = True
3565 elif badge_label == 'private':
3566 is_private = True
3567 elif badge_label == 'public':
3568 is_unlisted = is_private = False
3569 return self._availability(is_private, False, False, False, is_unlisted)
3570
3571 @staticmethod
3572 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
3573 sidebar_renderer = try_get(
3574 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
3575 for item in sidebar_renderer:
3576 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
3577 if renderer:
3578 return renderer
3579
ac56cf38 3580 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
358de58c 3581 """
3582 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3583 """
5d342002 3584 browse_id = params = None
47193e02 3585 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
3586 if not renderer:
3587 return
3588 menu_renderer = try_get(
3589 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3590 for menu_item in menu_renderer:
3591 if not isinstance(menu_item, dict):
358de58c 3592 continue
47193e02 3593 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3594 text = try_get(
3595 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3596 if not text or text.lower() != 'show unavailable videos':
3597 continue
3598 browse_endpoint = try_get(
3599 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3600 browse_id = browse_endpoint.get('browseId')
3601 params = browse_endpoint.get('params')
3602 break
5d342002 3603
11f9be09 3604 headers = self.generate_api_headers(
99e9e001 3605 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
ac56cf38 3606 visitor_data=self._extract_visitor_data(data, ytcfg))
47193e02 3607 query = {
3608 'params': params or 'wgYCCAA=',
3609 'browseId': browse_id or 'VL%s' % item_id
3610 }
3611 return self._extract_response(
3612 item_id=item_id, headers=headers, query=query,
fe93e2c4 3613 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 3614 note='Downloading API JSON with unavailable videos')
358de58c 3615
ac56cf38 3616 def _extract_webpage(self, url, item_id, fatal=True):
a06916d9 3617 retries = self.get_param('extractor_retries', 3)
62bff2c1 3618 count = -1
ac56cf38 3619 webpage = data = last_error = None
14fdfea9 3620 while count < retries:
62bff2c1 3621 count += 1
14fdfea9 3622 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3623 # See: https://github.com/yt-dlp/yt-dlp/issues/116
ac56cf38 3624 if last_error:
c705177d 3625 self.report_warning('%s. Retrying ...' % last_error)
ac56cf38 3626 try:
3627 webpage = self._download_webpage(
3628 url, item_id,
3629 note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
3630 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
3631 except ExtractorError as e:
3632 if isinstance(e.cause, network_exceptions):
3633 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
3634 last_error = error_to_compat_str(e.cause or e.msg)
3635 if count < retries:
3636 continue
3637 if fatal:
3638 raise
3639 self.report_warning(error_to_compat_str(e))
14fdfea9 3640 break
ac56cf38 3641 else:
3642 try:
3643 self._extract_and_report_alerts(data)
3644 except ExtractorError as e:
3645 if fatal:
3646 raise
3647 self.report_warning(error_to_compat_str(e))
3648 break
3649
3650 if dict_get(data, ('contents', 'currentVideoEndpoint')):
3651 break
3652
3653 last_error = 'Incomplete yt initial data received'
3654 if count >= retries:
3655 if fatal:
3656 raise ExtractorError(last_error)
3657 self.report_warning(last_error)
3658 break
3659
cd7c66cf 3660 return webpage, data
3661
ac56cf38 3662 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
3663 data = None
3664 if 'webpage' not in self._configuration_arg('skip'):
3665 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
3666 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
3667 if not data:
3668 if not ytcfg and self.is_authenticated:
3669 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
3670 if 'authcheck' not in self._configuration_arg('skip') and fatal:
3671 raise ExtractorError(
3672 msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
3673 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
3674 expected=True)
3675 self.report_warning(msg, only_once=True)
3676 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
3677 return data, ytcfg
3678
3679 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
3680 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
3681 resolve_response = self._extract_response(
3682 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
3683 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
3684 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
3685 for ep_key, ep in endpoints.items():
3686 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
3687 if params:
3688 return self._extract_response(
3689 item_id=item_id, query=params, ep=ep, headers=headers,
3690 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
3691 check_get_keys=('contents', 'currentVideoEndpoint'))
3692 err_note = 'Failed to resolve url (does the playlist exist?)'
3693 if fatal:
3694 raise ExtractorError(err_note, expected=True)
3695 self.report_warning(err_note, item_id)
3696
a6213a49 3697 @staticmethod
3698 def _smuggle_data(entries, data):
3699 for entry in entries:
3700 if data:
3701 entry['url'] = smuggle_url(entry['url'], data)
3702 yield entry
3703
3704 _SEARCH_PARAMS = None
3705
3706 def _search_results(self, query, params=NO_DEFAULT):
3707 data = {'query': query}
3708 if params is NO_DEFAULT:
3709 params = self._SEARCH_PARAMS
3710 if params:
3711 data['params'] = params
a61fd4cf 3712 continuation_list = [None]
a6213a49 3713 for page_num in itertools.count(1):
a61fd4cf 3714 data.update(continuation_list[0] or {})
a6213a49 3715 search = self._extract_response(
3716 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
a61fd4cf 3717 check_get_keys=('contents', 'onResponseReceivedCommands'))
a6213a49 3718 slr_contents = try_get(
3719 search,
3720 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3721 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3722 list)
a61fd4cf 3723 yield from self._extract_entries({'contents': slr_contents}, continuation_list)
3724 if not continuation_list[0]:
a6213a49 3725 break
3726
3727
3728class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
3729 IE_DESC = 'YouTube Tabs'
3730 _VALID_URL = r'''(?x:
3731 https?://
3732 (?:\w+\.)?
3733 (?:
3734 youtube(?:kids)?\.com|
3735 %(invidious)s
3736 )/
3737 (?:
3738 (?P<channel_type>channel|c|user|browse)/|
3739 (?P<not_channel>
3740 feed/|hashtag/|
3741 (?:playlist|watch)\?.*?\blist=
3742 )|
3743 (?!(?:%(reserved_names)s)\b) # Direct URLs
3744 )
3745 (?P<id>[^/?\#&]+)
3746 )''' % {
3747 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
3748 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
3749 }
3750 IE_NAME = 'youtube:tab'
3751
3752 _TESTS = [{
3753 'note': 'playlists, multipage',
3754 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3755 'playlist_mincount': 94,
3756 'info_dict': {
3757 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3758 'title': 'Игорь Клейнер - Playlists',
3759 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3760 'uploader': 'Игорь Клейнер',
3761 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3762 },
3763 }, {
3764 'note': 'playlists, multipage, different order',
3765 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3766 'playlist_mincount': 94,
3767 'info_dict': {
3768 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3769 'title': 'Игорь Клейнер - Playlists',
3770 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3771 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3772 'uploader': 'Игорь Клейнер',
3773 },
3774 }, {
3775 'note': 'playlists, series',
3776 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3777 'playlist_mincount': 5,
3778 'info_dict': {
3779 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3780 'title': '3Blue1Brown - Playlists',
3781 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3782 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3783 'uploader': '3Blue1Brown',
3784 },
3785 }, {
3786 'note': 'playlists, singlepage',
3787 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3788 'playlist_mincount': 4,
3789 'info_dict': {
3790 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3791 'title': 'ThirstForScience - Playlists',
3792 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3793 'uploader': 'ThirstForScience',
3794 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3795 }
3796 }, {
3797 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3798 'only_matching': True,
3799 }, {
3800 'note': 'basic, single video playlist',
3801 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3802 'info_dict': {
3803 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3804 'uploader': 'Sergey M.',
3805 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3806 'title': 'youtube-dl public playlist',
3807 },
3808 'playlist_count': 1,
3809 }, {
3810 'note': 'empty playlist',
3811 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3812 'info_dict': {
3813 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3814 'uploader': 'Sergey M.',
3815 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3816 'title': 'youtube-dl empty playlist',
3817 },
3818 'playlist_count': 0,
3819 }, {
3820 'note': 'Home tab',
3821 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3822 'info_dict': {
3823 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3824 'title': 'lex will - Home',
3825 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3826 'uploader': 'lex will',
3827 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3828 },
3829 'playlist_mincount': 2,
3830 }, {
3831 'note': 'Videos tab',
3832 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3833 'info_dict': {
3834 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3835 'title': 'lex will - Videos',
3836 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3837 'uploader': 'lex will',
3838 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3839 },
3840 'playlist_mincount': 975,
3841 }, {
3842 'note': 'Videos tab, sorted by popular',
3843 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3844 'info_dict': {
3845 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3846 'title': 'lex will - Videos',
3847 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3848 'uploader': 'lex will',
3849 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3850 },
3851 'playlist_mincount': 199,
3852 }, {
3853 'note': 'Playlists tab',
3854 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3855 'info_dict': {
3856 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3857 'title': 'lex will - Playlists',
3858 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3859 'uploader': 'lex will',
3860 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3861 },
3862 'playlist_mincount': 17,
3863 }, {
3864 'note': 'Community tab',
3865 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3866 'info_dict': {
3867 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3868 'title': 'lex will - Community',
3869 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3870 'uploader': 'lex will',
3871 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3872 },
3873 'playlist_mincount': 18,
3874 }, {
3875 'note': 'Channels tab',
3876 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3877 'info_dict': {
3878 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3879 'title': 'lex will - Channels',
3880 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3881 'uploader': 'lex will',
3882 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3883 },
3884 'playlist_mincount': 12,
3885 }, {
3886 'note': 'Search tab',
3887 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3888 'playlist_mincount': 40,
3889 'info_dict': {
3890 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3891 'title': '3Blue1Brown - Search - linear algebra',
3892 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3893 'uploader': '3Blue1Brown',
3894 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3895 },
3896 }, {
3897 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3898 'only_matching': True,
3899 }, {
3900 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3901 'only_matching': True,
3902 }, {
3903 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3904 'only_matching': True,
3905 }, {
3906 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3907 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3908 'info_dict': {
3909 'title': '29C3: Not my department',
3910 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3911 'uploader': 'Christiaan008',
3912 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3913 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3914 },
3915 'playlist_count': 96,
3916 }, {
3917 'note': 'Large playlist',
3918 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3919 'info_dict': {
3920 'title': 'Uploads from Cauchemar',
3921 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3922 'uploader': 'Cauchemar',
3923 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3924 },
3925 'playlist_mincount': 1123,
3926 }, {
3927 'note': 'even larger playlist, 8832 videos',
3928 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3929 'only_matching': True,
3930 }, {
3931 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3932 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3933 'info_dict': {
3934 'title': 'Uploads from Interstellar Movie',
3935 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3936 'uploader': 'Interstellar Movie',
3937 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3938 },
3939 'playlist_mincount': 21,
3940 }, {
3941 'note': 'Playlist with "show unavailable videos" button',
3942 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3943 'info_dict': {
3944 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3945 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3946 'uploader': 'Phim Siêu Nhân Nhật Bản',
3947 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3948 },
3949 'playlist_mincount': 200,
3950 }, {
3951 'note': 'Playlist with unavailable videos in page 7',
3952 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3953 'info_dict': {
3954 'title': 'Uploads from BlankTV',
3955 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3956 'uploader': 'BlankTV',
3957 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3958 },
3959 'playlist_mincount': 1000,
3960 }, {
3961 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3962 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3963 'info_dict': {
3964 'title': 'Data Analysis with Dr Mike Pound',
3965 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3966 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3967 'uploader': 'Computerphile',
3968 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3969 },
3970 'playlist_mincount': 11,
3971 }, {
3972 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3973 'only_matching': True,
3974 }, {
3975 'note': 'Playlist URL that does not actually serve a playlist',
3976 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3977 'info_dict': {
3978 'id': 'FqZTN594JQw',
3979 'ext': 'webm',
3980 'title': "Smiley's People 01 detective, Adventure Series, Action",
3981 'uploader': 'STREEM',
3982 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3983 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3984 'upload_date': '20150526',
3985 'license': 'Standard YouTube License',
3986 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3987 'categories': ['People & Blogs'],
3988 'tags': list,
3989 'view_count': int,
3990 'like_count': int,
3991 'dislike_count': int,
3992 },
3993 'params': {
3994 'skip_download': True,
3995 },
3996 'skip': 'This video is not available.',
3997 'add_ie': [YoutubeIE.ie_key()],
3998 }, {
3999 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
4000 'only_matching': True,
4001 }, {
4002 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
4003 'only_matching': True,
4004 }, {
4005 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
4006 'info_dict': {
4007 'id': '3yImotZU3tw', # This will keep changing
4008 'ext': 'mp4',
4009 'title': compat_str,
4010 'uploader': 'Sky News',
4011 'uploader_id': 'skynews',
4012 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
4013 'upload_date': r're:\d{8}',
4014 'description': compat_str,
4015 'categories': ['News & Politics'],
4016 'tags': list,
4017 'like_count': int,
4018 'dislike_count': int,
4019 },
4020 'params': {
4021 'skip_download': True,
4022 },
4023 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
4024 }, {
4025 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
4026 'info_dict': {
4027 'id': 'a48o2S1cPoo',
4028 'ext': 'mp4',
4029 'title': 'The Young Turks - Live Main Show',
4030 'uploader': 'The Young Turks',
4031 'uploader_id': 'TheYoungTurks',
4032 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
4033 'upload_date': '20150715',
4034 'license': 'Standard YouTube License',
4035 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
4036 'categories': ['News & Politics'],
4037 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
4038 'like_count': int,
4039 'dislike_count': int,
4040 },
4041 'params': {
4042 'skip_download': True,
4043 },
4044 'only_matching': True,
4045 }, {
4046 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
4047 'only_matching': True,
4048 }, {
4049 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
4050 'only_matching': True,
4051 }, {
4052 'note': 'A channel that is not live. Should raise error',
4053 'url': 'https://www.youtube.com/user/numberphile/live',
4054 'only_matching': True,
4055 }, {
4056 'url': 'https://www.youtube.com/feed/trending',
4057 'only_matching': True,
4058 }, {
4059 'url': 'https://www.youtube.com/feed/library',
4060 'only_matching': True,
4061 }, {
4062 'url': 'https://www.youtube.com/feed/history',
4063 'only_matching': True,
4064 }, {
4065 'url': 'https://www.youtube.com/feed/subscriptions',
4066 'only_matching': True,
4067 }, {
4068 'url': 'https://www.youtube.com/feed/watch_later',
4069 'only_matching': True,
4070 }, {
4071 'note': 'Recommended - redirects to home page.',
4072 'url': 'https://www.youtube.com/feed/recommended',
4073 'only_matching': True,
4074 }, {
4075 'note': 'inline playlist with not always working continuations',
4076 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
4077 'only_matching': True,
4078 }, {
4079 'url': 'https://www.youtube.com/course',
4080 'only_matching': True,
4081 }, {
4082 'url': 'https://www.youtube.com/zsecurity',
4083 'only_matching': True,
4084 }, {
4085 'url': 'http://www.youtube.com/NASAgovVideo/videos',
4086 'only_matching': True,
4087 }, {
4088 'url': 'https://www.youtube.com/TheYoungTurks/live',
4089 'only_matching': True,
4090 }, {
4091 'url': 'https://www.youtube.com/hashtag/cctv9',
4092 'info_dict': {
4093 'id': 'cctv9',
4094 'title': '#cctv9',
4095 },
4096 'playlist_mincount': 350,
4097 }, {
4098 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
4099 'only_matching': True,
4100 }, {
4101 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
4102 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4103 'only_matching': True
4104 }, {
4105 'note': '/browse/ should redirect to /channel/',
4106 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
4107 'only_matching': True
4108 }, {
4109 'note': 'VLPL, should redirect to playlist?list=PL...',
4110 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4111 'info_dict': {
4112 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4113 'uploader': 'NoCopyrightSounds',
4114 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
4115 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
4116 'title': 'NCS Releases',
4117 },
4118 'playlist_mincount': 166,
4119 }, {
4120 'note': 'Topic, should redirect to playlist?list=UU...',
4121 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4122 'info_dict': {
4123 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4124 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4125 'title': 'Uploads from Royalty Free Music - Topic',
4126 'uploader': 'Royalty Free Music - Topic',
4127 },
4128 'expected_warnings': [
4129 'A channel/user page was given',
4130 'The URL does not have a videos tab',
4131 ],
4132 'playlist_mincount': 101,
4133 }, {
4134 'note': 'Topic without a UU playlist',
4135 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
4136 'info_dict': {
4137 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
4138 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
4139 },
4140 'expected_warnings': [
4141 'A channel/user page was given',
4142 'The URL does not have a videos tab',
4143 'Falling back to channel URL',
4144 ],
4145 'playlist_mincount': 9,
4146 }, {
4147 'note': 'Youtube music Album',
4148 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
4149 'info_dict': {
4150 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
4151 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
4152 },
4153 'playlist_count': 50,
4154 }, {
4155 'note': 'unlisted single video playlist',
4156 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4157 'info_dict': {
4158 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
4159 'uploader': 'colethedj',
4160 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4161 'title': 'yt-dlp unlisted playlist test',
4162 'availability': 'unlisted'
4163 },
4164 'playlist_count': 1,
4165 }, {
4166 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
4167 'url': 'https://www.youtube.com/feed/recommended',
4168 'info_dict': {
4169 'id': 'recommended',
4170 'title': 'recommended',
4171 },
4172 'playlist_mincount': 50,
4173 'params': {
4174 'skip_download': True,
4175 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4176 },
4177 }, {
4178 'note': 'API Fallback: /videos tab, sorted by oldest first',
4179 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
4180 'info_dict': {
4181 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4182 'title': 'Cody\'sLab - Videos',
4183 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
4184 'uploader': 'Cody\'sLab',
4185 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4186 },
4187 'playlist_mincount': 650,
4188 'params': {
4189 'skip_download': True,
4190 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4191 },
4192 }, {
4193 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
4194 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4195 'info_dict': {
4196 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4197 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4198 'title': 'Uploads from Royalty Free Music - Topic',
4199 'uploader': 'Royalty Free Music - Topic',
4200 },
4201 'expected_warnings': [
4202 'A channel/user page was given',
4203 'The URL does not have a videos tab',
4204 ],
4205 'playlist_mincount': 101,
4206 'params': {
4207 'skip_download': True,
4208 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4209 },
4210 }]
4211
4212 @classmethod
4213 def suitable(cls, url):
4214 return False if YoutubeIE.suitable(url) else super(
4215 YoutubeTabIE, cls).suitable(url)
9297939e 4216
cd7c66cf 4217 def _real_extract(self, url):
9297939e 4218 url, smuggled_data = unsmuggle_url(url, {})
4219 if self.is_music_url(url):
4220 smuggled_data['is_music_url'] = True
fe03a6cd 4221 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4222 if info_dict.get('entries'):
4223 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4224 return info_dict
4225
fe03a6cd 4226 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4227
4228 def __real_extract(self, url, smuggled_data):
cd7c66cf 4229 item_id = self._match_id(url)
4230 url = compat_urlparse.urlunparse(
4231 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4232 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4233
fe03a6cd 4234 def get_mobj(url):
4235 mobj = self._url_re.match(url).groupdict()
07cce701 4236 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4237 return mobj
4238
4239 mobj = get_mobj(url)
4240 # Youtube returns incomplete data if tabname is not lower case
4241 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
fe03a6cd 4242 if is_channel:
4243 if smuggled_data.get('is_music_url'):
4244 if item_id[:2] == 'VL':
4245 # Youtube music VL channels have an equivalent playlist
4246 item_id = item_id[2:]
4247 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4248 elif item_id[:2] == 'MP':
ac56cf38 4249 # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
4250 mdata = self._extract_tab_endpoint(
4251 'https://music.youtube.com/channel/%s' % item_id, item_id, default_client='web_music')
4252 murl = traverse_obj(
4253 mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'), get_all=False, expected_type=compat_str)
4254 if not murl:
4255 raise ExtractorError('Failed to resolve album to playlist.')
4256 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
fe03a6cd 4257 elif mobj['channel_type'] == 'browse':
4258 # Youtube music /browse/ should be changed to /channel/
4259 pre = 'https://www.youtube.com/channel/%s' % item_id
4260 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4261 # Home URLs should redirect to /videos/
6a39ee13 4262 self.report_warning(
cd7c66cf 4263 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4264 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4265 tab = '/videos'
4266
4267 url = ''.join((pre, tab, post))
4268 mobj = get_mobj(url)
cd7c66cf 4269
4270 # Handle both video/playlist URLs
201c1459 4271 qs = parse_qs(url)
cd7c66cf 4272 video_id = qs.get('v', [None])[0]
4273 playlist_id = qs.get('list', [None])[0]
4274
fe03a6cd 4275 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4276 if not playlist_id:
fe03a6cd 4277 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4278 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4279 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4280 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4281 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4282 mobj = get_mobj(url)
cd7c66cf 4283
4284 if video_id and playlist_id:
a06916d9 4285 if self.get_param('noplaylist'):
cd7c66cf 4286 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
5e3f2f8f 4287 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4288 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4289
ac56cf38 4290 data, ytcfg = self._extract_data(url, item_id)
14fdfea9 4291
18db7548 4292 tabs = try_get(
4293 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4294 if tabs:
4295 selected_tab = self._extract_selected_tab(tabs)
4296 tab_name = selected_tab.get('title', '')
09f1580e 4297 if 'no-youtube-channel-redirect' not in compat_opts:
4298 if mobj['tab'] == '/live':
4299 # Live tab should have redirected to the video
4300 raise ExtractorError('The channel is not currently live', expected=True)
4301 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4302 if not mobj['not_channel'] and item_id[:2] == 'UC':
4303 # Topic channels don't have /videos. Use the equivalent playlist instead
4304 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4305 pl_id = 'UU%s' % item_id[2:]
4306 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4307 try:
ac56cf38 4308 data, ytcfg, item_id, url = *self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True), pl_id, pl_url
09f1580e 4309 except ExtractorError:
4310 self.report_warning('The playlist gave error. Falling back to channel URL')
4311 else:
4312 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4313
4314 self.write_debug('Final URL: %s' % url)
4315
358de58c 4316 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4317 if 'no-youtube-unavailable-videos' not in compat_opts:
ac56cf38 4318 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
c0ac49bc 4319 self._extract_and_report_alerts(data, only_once=True)
8bdd16b4 4320 tabs = try_get(
4321 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4322 if tabs:
ac56cf38 4323 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
cd7c66cf 4324
8bdd16b4 4325 playlist = try_get(
4326 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4327 if playlist:
ac56cf38 4328 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
cd7c66cf 4329
a0566bbf 4330 video_id = try_get(
4331 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4332 compat_str) or video_id
8bdd16b4 4333 if video_id:
09f1580e 4334 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4335 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
5e3f2f8f 4336 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4337
8bdd16b4 4338 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4339
c5e8d7af 4340
8bdd16b4 4341class YoutubePlaylistIE(InfoExtractor):
96565c7e 4342 IE_DESC = 'YouTube playlists'
8bdd16b4 4343 _VALID_URL = r'''(?x)(?:
4344 (?:https?://)?
4345 (?:\w+\.)?
4346 (?:
4347 (?:
4348 youtube(?:kids)?\.com|
d9190e44 4349 %(invidious)s
8bdd16b4 4350 )
4351 /.*?\?.*?\blist=
4352 )?
4353 (?P<id>%(playlist_id)s)
d9190e44
RH
4354 )''' % {
4355 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
4356 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4357 }
8bdd16b4 4358 IE_NAME = 'youtube:playlist'
cdc628a4 4359 _TESTS = [{
8bdd16b4 4360 'note': 'issue #673',
4361 'url': 'PLBB231211A4F62143',
cdc628a4 4362 'info_dict': {
8bdd16b4 4363 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4364 'id': 'PLBB231211A4F62143',
4365 'uploader': 'Wickydoo',
4366 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 4367 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
8bdd16b4 4368 },
4369 'playlist_mincount': 29,
4370 }, {
4371 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4372 'info_dict': {
4373 'title': 'YDL_safe_search',
4374 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4375 },
4376 'playlist_count': 2,
4377 'skip': 'This playlist is private',
9558dcec 4378 }, {
8bdd16b4 4379 'note': 'embedded',
4380 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4381 'playlist_count': 4,
9558dcec 4382 'info_dict': {
8bdd16b4 4383 'title': 'JODA15',
4384 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4385 'uploader': 'milan',
4386 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4387 }
cdc628a4 4388 }, {
8bdd16b4 4389 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
11f9be09 4390 'playlist_mincount': 654,
8bdd16b4 4391 'info_dict': {
4392 'title': '2018 Chinese New Singles (11/6 updated)',
4393 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4394 'uploader': 'LBK',
4395 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 4396 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
8bdd16b4 4397 }
daa0df9e 4398 }, {
29f7c58a 4399 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4400 'only_matching': True,
4401 }, {
4402 # music album playlist
4403 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4404 'only_matching': True,
4405 }]
4406
4407 @classmethod
4408 def suitable(cls, url):
201c1459 4409 if YoutubeTabIE.suitable(url):
4410 return False
49a57e70 4411 from ..utils import parse_qs
201c1459 4412 qs = parse_qs(url)
4413 if qs.get('v', [None])[0]:
4414 return False
4415 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4416
4417 def _real_extract(self, url):
4418 playlist_id = self._match_id(url)
46953e7e 4419 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4420 url = update_url_query(
4421 'https://www.youtube.com/playlist',
4422 parse_qs(url) or {'list': playlist_id})
4423 if is_music_url:
4424 url = smuggle_url(url, {'is_music_url': True})
4425 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4426
4427
4428class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4429 IE_DESC = 'youtu.be'
29f7c58a 4430 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4431 _TESTS = [{
8bdd16b4 4432 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4433 'info_dict': {
4434 'id': 'yeWKywCrFtk',
4435 'ext': 'mp4',
4436 'title': 'Small Scale Baler and Braiding Rugs',
4437 'uploader': 'Backus-Page House Museum',
4438 'uploader_id': 'backuspagemuseum',
4439 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4440 'upload_date': '20161008',
4441 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4442 'categories': ['Nonprofits & Activism'],
4443 'tags': list,
4444 'like_count': int,
4445 'dislike_count': int,
4446 },
4447 'params': {
4448 'noplaylist': True,
4449 'skip_download': True,
4450 },
39e7107d 4451 }, {
8bdd16b4 4452 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4453 'only_matching': True,
cdc628a4
PH
4454 }]
4455
8bdd16b4 4456 def _real_extract(self, url):
5ad28e7f 4457 mobj = self._match_valid_url(url)
29f7c58a 4458 video_id = mobj.group('id')
4459 playlist_id = mobj.group('playlist_id')
8bdd16b4 4460 return self.url_result(
29f7c58a 4461 update_url_query('https://www.youtube.com/watch', {
4462 'v': video_id,
4463 'list': playlist_id,
4464 'feature': 'youtu.be',
4465 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4466
4467
4468class YoutubeYtUserIE(InfoExtractor):
96565c7e 4469 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
8bdd16b4 4470 _VALID_URL = r'ytuser:(?P<id>.+)'
4471 _TESTS = [{
4472 'url': 'ytuser:phihag',
4473 'only_matching': True,
4474 }]
4475
4476 def _real_extract(self, url):
4477 user_id = self._match_id(url)
4478 return self.url_result(
c586f9e8 4479 'https://www.youtube.com/user/%s/videos' % user_id,
8bdd16b4 4480 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4481
b05654f0 4482
3d3dddc9 4483class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4484 IE_NAME = 'youtube:favorites'
96565c7e 4485 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
70d5c17b 4486 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4487 _LOGIN_REQUIRED = True
4488 _TESTS = [{
4489 'url': ':ytfav',
4490 'only_matching': True,
4491 }, {
4492 'url': ':ytfavorites',
4493 'only_matching': True,
4494 }]
4495
4496 def _real_extract(self, url):
4497 return self.url_result(
4498 'https://www.youtube.com/playlist?list=LL',
4499 ie=YoutubeTabIE.ie_key())
4500
4501
a6213a49 4502class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
4503 IE_DESC = 'YouTube search'
78caa52a 4504 IE_NAME = 'youtube:search'
b05654f0 4505 _SEARCH_KEY = 'ytsearch'
a61fd4cf 4506 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
9dd8e46a 4507 _TESTS = []
b05654f0 4508
a61fd4cf 4509
5f7cb91a 4510class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
cb7fb546 4511 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4512 _SEARCH_KEY = 'ytsearchdate'
a6213a49 4513 IE_DESC = 'YouTube search, newest videos first'
a61fd4cf 4514 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
75dff0ee 4515
c9ae7b95 4516
a6213a49 4517class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
96565c7e 4518 IE_DESC = 'YouTube search URLs with sorting and filter support'
386e1dd9 4519 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4520 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
3462ffa8 4521 _TESTS = [{
4522 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4523 'playlist_mincount': 5,
4524 'info_dict': {
11f9be09 4525 'id': 'youtube-dl test video',
3462ffa8 4526 'title': 'youtube-dl test video',
4527 }
a61fd4cf 4528 }, {
4529 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
4530 'playlist_mincount': 5,
4531 'info_dict': {
4532 'id': 'python',
4533 'title': 'python',
4534 }
4535
3462ffa8 4536 }, {
4537 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4538 'only_matching': True,
4539 }]
4540
4541 def _real_extract(self, url):
4dfbf869 4542 qs = parse_qs(url)
386e1dd9 4543 query = (qs.get('search_query') or qs.get('q'))[0]
a6213a49 4544 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
3462ffa8 4545
4546
4547class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4548 """
25f14e9f 4549 Base class for feed extractors
3d3dddc9 4550 Subclasses must define the _FEED_NAME property.
d7ae0639 4551 """
b2e8bc1b 4552 _LOGIN_REQUIRED = True
ef2f3c7f 4553 _TESTS = []
d7ae0639
JMF
4554
4555 @property
4556 def IE_NAME(self):
78caa52a 4557 return 'youtube:%s' % self._FEED_NAME
04cc9617 4558
3853309f 4559 def _real_extract(self, url):
3d3dddc9 4560 return self.url_result(
4561 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4562 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4563
4564
ef2f3c7f 4565class YoutubeWatchLaterIE(InfoExtractor):
4566 IE_NAME = 'youtube:watchlater'
96565c7e 4567 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
3d3dddc9 4568 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4569 _TESTS = [{
8bdd16b4 4570 'url': ':ytwatchlater',
bc7a9cd8
S
4571 'only_matching': True,
4572 }]
25f14e9f
S
4573
4574 def _real_extract(self, url):
ef2f3c7f 4575 return self.url_result(
4576 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4577
4578
25f14e9f 4579class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
96565c7e 4580 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
3d3dddc9 4581 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4582 _FEED_NAME = 'recommended'
45db527f 4583 _LOGIN_REQUIRED = False
3d3dddc9 4584 _TESTS = [{
4585 'url': ':ytrec',
4586 'only_matching': True,
4587 }, {
4588 'url': ':ytrecommended',
4589 'only_matching': True,
4590 }, {
4591 'url': 'https://youtube.com',
4592 'only_matching': True,
4593 }]
1ed5b5c9 4594
1ed5b5c9 4595
25f14e9f 4596class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
96565c7e 4597 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
3d3dddc9 4598 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4599 _FEED_NAME = 'subscriptions'
3d3dddc9 4600 _TESTS = [{
4601 'url': ':ytsubs',
4602 'only_matching': True,
4603 }, {
4604 'url': ':ytsubscriptions',
4605 'only_matching': True,
4606 }]
1ed5b5c9 4607
1ed5b5c9 4608
25f14e9f 4609class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
96565c7e 4610 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
a5c56234 4611 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4612 _FEED_NAME = 'history'
3d3dddc9 4613 _TESTS = [{
4614 'url': ':ythistory',
4615 'only_matching': True,
4616 }]
1ed5b5c9
JMF
4617
4618
15870e90
PH
4619class YoutubeTruncatedURLIE(InfoExtractor):
4620 IE_NAME = 'youtube:truncated_url'
4621 IE_DESC = False # Do not list
975d35db 4622 _VALID_URL = r'''(?x)
b95aab84
PH
4623 (?:https?://)?
4624 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4625 (?:watch\?(?:
c4808c60 4626 feature=[a-z_]+|
b95aab84
PH
4627 annotation_id=annotation_[^&]+|
4628 x-yt-cl=[0-9]+|
c1708b89 4629 hl=[^&]*|
287be8c6 4630 t=[0-9]+
b95aab84
PH
4631 )?
4632 |
4633 attribution_link\?a=[^&]+
4634 )
4635 $
975d35db 4636 '''
15870e90 4637
c4808c60 4638 _TESTS = [{
2d3d2997 4639 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4640 'only_matching': True,
dc2fc736 4641 }, {
2d3d2997 4642 'url': 'https://www.youtube.com/watch?',
dc2fc736 4643 'only_matching': True,
b95aab84
PH
4644 }, {
4645 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4646 'only_matching': True,
4647 }, {
4648 'url': 'https://www.youtube.com/watch?feature=foo',
4649 'only_matching': True,
c1708b89
PH
4650 }, {
4651 'url': 'https://www.youtube.com/watch?hl=en-GB',
4652 'only_matching': True,
287be8c6
PH
4653 }, {
4654 'url': 'https://www.youtube.com/watch?t=2372',
4655 'only_matching': True,
c4808c60
PH
4656 }]
4657
15870e90
PH
4658 def _real_extract(self, url):
4659 raise ExtractorError(
78caa52a
PH
4660 'Did you forget to quote the URL? Remember that & is a meta '
4661 'character in most shells, so you want to put the URL in quotes, '
3867038a 4662 'like youtube-dl '
2d3d2997 4663 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4664 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4665 expected=True)
772fd5cc
PH
4666
4667
3cd786db 4668class YoutubeClipIE(InfoExtractor):
4669 IE_NAME = 'youtube:clip'
4670 IE_DESC = False # Do not list
4671 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4672
4673 def _real_extract(self, url):
4674 self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4675 return self.url_result(url, 'Generic')
4676
4677
772fd5cc
PH
4678class YoutubeTruncatedIDIE(InfoExtractor):
4679 IE_NAME = 'youtube:truncated_id'
4680 IE_DESC = False # Do not list
b95aab84 4681 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4682
4683 _TESTS = [{
4684 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4685 'only_matching': True,
4686 }]
4687
4688 def _real_extract(self, url):
4689 video_id = self._match_id(url)
4690 raise ExtractorError(
4691 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4692 expected=True)