]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
Add field `webpage_url_domain`
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
2d6659b9 5import base64
d92f5d5a 6import calendar
109dd3b2 7import copy
fe93e2c4 8import datetime
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
720c3099 12import math
c4417ddb 13import os.path
d77ab8e2 14import random
c5e8d7af 15import re
8a784c74 16import time
e0df6211 17import traceback
c5e8d7af 18
b05654f0 19from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 20from ..compat import (
edf3e38e 21 compat_chr,
29f7c58a 22 compat_HTTPError,
c5e8d7af 23 compat_parse_qs,
545cc85d 24 compat_str,
7fd002c0 25 compat_urllib_parse_unquote_plus,
15707c7e 26 compat_urllib_parse_urlencode,
7c80519c 27 compat_urllib_parse_urlparse,
7c61bd36 28 compat_urlparse,
4bb4a188 29)
545cc85d 30from ..jsinterp import JSInterpreter
4bb4a188 31from ..utils import (
720c3099 32 bug_reports_message,
2d6659b9 33 bytes_to_intlist,
c5e8d7af 34 clean_html,
d92f5d5a 35 datetime_from_str,
11f9be09 36 dict_get,
358de58c 37 error_to_compat_str,
c5e8d7af 38 ExtractorError,
2d30521a 39 float_or_none,
11f9be09 40 format_field,
dd27fd17 41 int_or_none,
2d6659b9 42 intlist_to_bytes,
641ad5d8 43 is_html,
34921b43 44 join_nonempty,
94278f72 45 mimetype2ext,
9c0d7f49 46 network_exceptions,
a6213a49 47 NO_DEFAULT,
11f9be09 48 orderedSet,
6310acf5 49 parse_codecs,
49bd8c66 50 parse_count,
7c80519c 51 parse_duration,
7ea65411 52 parse_iso8601,
4dfbf869 53 parse_qs,
dca3ff4a 54 qualities,
c0ac49bc 55 remove_end,
3995d37d 56 remove_start,
cf7e015f 57 smuggle_url,
dbdaaa23 58 str_or_none,
c93d53f5 59 str_to_int,
7c365c21 60 traverse_obj,
556dbe7f 61 try_get,
c5e8d7af
PH
62 unescapeHTML,
63 unified_strdate,
cf7e015f 64 unsmuggle_url,
8bdd16b4 65 update_url_query,
21c340b8 66 url_or_none,
fe93e2c4 67 urljoin,
7c365c21 68 variadic,
c5e8d7af
PH
69)
70
5f6a1245 71
720c3099 72def get_first(obj, keys, **kwargs):
73 return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
74
75
000c15a4 76# any clients starting with _ cannot be explicity requested by the user
77INNERTUBE_CLIENTS = {
78 'web': {
79 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
80 'INNERTUBE_CONTEXT': {
81 'client': {
82 'clientName': 'WEB',
83 'clientVersion': '2.20210622.10.00',
84 }
85 },
86 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
87 },
88 'web_embedded': {
89 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
90 'INNERTUBE_CONTEXT': {
91 'client': {
92 'clientName': 'WEB_EMBEDDED_PLAYER',
93 'clientVersion': '1.20210620.0.1',
94 },
95 },
96 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
97 },
98 'web_music': {
99 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
100 'INNERTUBE_HOST': 'music.youtube.com',
101 'INNERTUBE_CONTEXT': {
102 'client': {
103 'clientName': 'WEB_REMIX',
104 'clientVersion': '1.20210621.00.00',
105 }
106 },
107 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
108 },
e7e94f2a
D
109 'web_creator': {
110 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
111 'INNERTUBE_CONTEXT': {
112 'client': {
113 'clientName': 'WEB_CREATOR',
114 'clientVersion': '1.20210621.00.00',
115 }
116 },
117 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
118 },
000c15a4 119 'android': {
120 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
121 'INNERTUBE_CONTEXT': {
122 'client': {
123 'clientName': 'ANDROID',
124 'clientVersion': '16.20',
125 }
126 },
127 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
b6de707d 128 'REQUIRE_JS_PLAYER': False
000c15a4 129 },
130 'android_embedded': {
131 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
132 'INNERTUBE_CONTEXT': {
133 'client': {
134 'clientName': 'ANDROID_EMBEDDED_PLAYER',
135 'clientVersion': '16.20',
136 },
137 },
b6de707d 138 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
139 'REQUIRE_JS_PLAYER': False
000c15a4 140 },
141 'android_music': {
142 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
143 'INNERTUBE_HOST': 'music.youtube.com',
144 'INNERTUBE_CONTEXT': {
145 'client': {
146 'clientName': 'ANDROID_MUSIC',
147 'clientVersion': '4.32',
148 }
149 },
150 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
b6de707d 151 'REQUIRE_JS_PLAYER': False
000c15a4 152 },
e7e94f2a
D
153 'android_creator': {
154 'INNERTUBE_CONTEXT': {
155 'client': {
156 'clientName': 'ANDROID_CREATOR',
157 'clientVersion': '21.24.100',
158 },
159 },
b6de707d 160 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
161 'REQUIRE_JS_PLAYER': False
e7e94f2a 162 },
3619f78d 163 # ios has HLS live streams
164 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
000c15a4 165 'ios': {
166 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
167 'INNERTUBE_CONTEXT': {
168 'client': {
169 'clientName': 'IOS',
170 'clientVersion': '16.20',
171 }
172 },
b6de707d 173 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
174 'REQUIRE_JS_PLAYER': False
000c15a4 175 },
176 'ios_embedded': {
177 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
178 'INNERTUBE_CONTEXT': {
179 'client': {
180 'clientName': 'IOS_MESSAGES_EXTENSION',
181 'clientVersion': '16.20',
182 },
183 },
b6de707d 184 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
185 'REQUIRE_JS_PLAYER': False
000c15a4 186 },
187 'ios_music': {
188 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
189 'INNERTUBE_HOST': 'music.youtube.com',
190 'INNERTUBE_CONTEXT': {
191 'client': {
192 'clientName': 'IOS_MUSIC',
193 'clientVersion': '4.32',
194 },
195 },
b6de707d 196 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
197 'REQUIRE_JS_PLAYER': False
000c15a4 198 },
e7e94f2a
D
199 'ios_creator': {
200 'INNERTUBE_CONTEXT': {
201 'client': {
202 'clientName': 'IOS_CREATOR',
203 'clientVersion': '21.24.100',
204 },
205 },
b6de707d 206 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
207 'REQUIRE_JS_PLAYER': False
e7e94f2a 208 },
3619f78d 209 # mweb has 'ultralow' formats
210 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 211 'mweb': {
212 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
213 'INNERTUBE_CONTEXT': {
214 'client': {
215 'clientName': 'MWEB',
216 'clientVersion': '2.20210721.07.00',
217 }
218 },
219 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
220 },
221}
222
223
224def build_innertube_clients():
65c2fde2 225 third_party = {
226 'embedUrl': 'https://google.com', # Can be any valid URL
227 }
000c15a4 228 base_clients = ('android', 'web', 'ios', 'mweb')
229 priority = qualities(base_clients[::-1])
230
231 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 232 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 233 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 234 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 235 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
236 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
237
238 if client in base_clients:
239 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
240 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
65c2fde2 241 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 242 agegate_ytcfg['priority'] -= 1
243 elif client.endswith('_embedded'):
65c2fde2 244 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 245 ytcfg['priority'] -= 2
246 else:
247 ytcfg['priority'] -= 3
248
249
250build_innertube_clients()
251
252
de7f3446 253class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 254 """Provide base functions for Youtube extractors"""
e00eb564 255
3462ffa8 256 _RESERVED_NAMES = (
3cd786db 257 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
3619f78d 258 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
259 r'browse|oembed|get_video_info|iframe_api|s/player|'
cd7c66cf 260 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 261
3619f78d 262 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
263
b2e8bc1b 264 _NETRC_MACHINE = 'youtube'
3619f78d 265
b2e8bc1b
JMF
266 # If True it will raise an error if no login info is provided
267 _LOGIN_REQUIRED = False
268
d9190e44
RH
269 _INVIDIOUS_SITES = (
270 # invidious-redirect websites
271 r'(?:www\.)?redirect\.invidious\.io',
272 r'(?:(?:www|dev)\.)?invidio\.us',
273 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
274 r'(?:www\.)?invidious\.pussthecat\.org',
275 r'(?:www\.)?invidious\.zee\.li',
276 r'(?:www\.)?invidious\.ethibox\.fr',
277 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
278 # youtube-dl invidious instances list
279 r'(?:(?:www|no)\.)?invidiou\.sh',
280 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
281 r'(?:www\.)?invidious\.kabi\.tk',
282 r'(?:www\.)?invidious\.mastodon\.host',
283 r'(?:www\.)?invidious\.zapashcanon\.fr',
284 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
285 r'(?:www\.)?invidious\.tinfoil-hat\.net',
286 r'(?:www\.)?invidious\.himiko\.cloud',
287 r'(?:www\.)?invidious\.reallyancient\.tech',
288 r'(?:www\.)?invidious\.tube',
289 r'(?:www\.)?invidiou\.site',
290 r'(?:www\.)?invidious\.site',
291 r'(?:www\.)?invidious\.xyz',
292 r'(?:www\.)?invidious\.nixnet\.xyz',
293 r'(?:www\.)?invidious\.048596\.xyz',
294 r'(?:www\.)?invidious\.drycat\.fr',
295 r'(?:www\.)?inv\.skyn3t\.in',
296 r'(?:www\.)?tube\.poal\.co',
297 r'(?:www\.)?tube\.connect\.cafe',
298 r'(?:www\.)?vid\.wxzm\.sx',
299 r'(?:www\.)?vid\.mint\.lgbt',
300 r'(?:www\.)?vid\.puffyan\.us',
301 r'(?:www\.)?yewtu\.be',
302 r'(?:www\.)?yt\.elukerio\.org',
303 r'(?:www\.)?yt\.lelux\.fi',
304 r'(?:www\.)?invidious\.ggc-project\.de',
305 r'(?:www\.)?yt\.maisputain\.ovh',
306 r'(?:www\.)?ytprivate\.com',
307 r'(?:www\.)?invidious\.13ad\.de',
308 r'(?:www\.)?invidious\.toot\.koeln',
309 r'(?:www\.)?invidious\.fdn\.fr',
310 r'(?:www\.)?watch\.nettohikari\.com',
311 r'(?:www\.)?invidious\.namazso\.eu',
312 r'(?:www\.)?invidious\.silkky\.cloud',
313 r'(?:www\.)?invidious\.exonip\.de',
314 r'(?:www\.)?invidious\.riverside\.rocks',
315 r'(?:www\.)?invidious\.blamefran\.net',
316 r'(?:www\.)?invidious\.moomoo\.de',
317 r'(?:www\.)?ytb\.trom\.tf',
318 r'(?:www\.)?yt\.cyberhost\.uk',
319 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
320 r'(?:www\.)?qklhadlycap4cnod\.onion',
321 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
322 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
323 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
324 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
325 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
326 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
327 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
328 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
329 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
330 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
331 )
332
b2e8bc1b 333 def _login(self):
83317f69 334 """
335 Attempt to log in to YouTube.
83317f69 336 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
337 """
9d5d4d64 338
982ee69a
MB
339 if (self._LOGIN_REQUIRED
340 and self.get_param('cookiefile') is None
341 and self.get_param('cookiesfrombrowser') is None):
9d5d4d64 342 self.raise_login_required(
343 'Login details are needed to download this content', method='cookies')
68217024 344 username, password = self._get_login_info()
9d5d4d64 345 if username:
24b0a72b 346 self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
b2e8bc1b 347
cce889b9 348 def _initialize_consent(self):
349 cookies = self._get_cookies('https://www.youtube.com/')
350 if cookies.get('__Secure-3PSID'):
351 return
352 consent_id = None
353 consent = cookies.get('CONSENT')
354 if consent:
355 if 'YES' in consent.value:
356 return
357 consent_id = self._search_regex(
358 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
359 if not consent_id:
360 consent_id = random.randint(100, 999)
361 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 362
b2e8bc1b 363 def _real_initialize(self):
cce889b9 364 self._initialize_consent()
24b0a72b 365 self._login()
c5e8d7af 366
a0566bbf 367 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 368 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
369 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 370
000c15a4 371 def _get_default_ytcfg(self, client='web'):
372 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 373
000c15a4 374 def _get_innertube_host(self, client='web'):
375 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 376
000c15a4 377 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 378 # try_get but with fallback to default ytcfg client values when present
379 _func = lambda y: try_get(y, getter, expected_type)
380 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
381
000c15a4 382 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 383 return self._ytcfg_get_safe(
384 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
385 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
109dd3b2 386
000c15a4 387 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 388 return self._ytcfg_get_safe(
389 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
390 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
109dd3b2 391
000c15a4 392 def _extract_api_key(self, ytcfg=None, default_client='web'):
109dd3b2 393 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
394
000c15a4 395 def _extract_context(self, ytcfg=None, default_client='web'):
109dd3b2 396 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
397 context = _get_context(ytcfg)
398 if context:
399 return context
400
401 context = _get_context(self._get_default_ytcfg(default_client))
402 if not ytcfg:
403 return context
404
405 # Recreate the client context (required)
406 context['client'].update({
407 'clientVersion': self._extract_client_version(ytcfg, default_client),
408 'clientName': self._extract_client_name(ytcfg, default_client),
409 })
410 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
411 if visitor_data:
412 context['client']['visitorData'] = visitor_data
413 return context
414
cf87314d 415 _SAPISID = None
416
109dd3b2 417 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 418 time_now = round(time.time())
cf87314d 419 if self._SAPISID is None:
420 yt_cookies = self._get_cookies('https://www.youtube.com')
421 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
422 # See: https://github.com/yt-dlp/yt-dlp/issues/393
423 sapisid_cookie = dict_get(
424 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
425 if sapisid_cookie and sapisid_cookie.value:
426 self._SAPISID = sapisid_cookie.value
427 self.write_debug('Extracted SAPISID cookie')
428 # SAPISID cookie is required if not already present
429 if not yt_cookies.get('SAPISID'):
430 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
431 self._set_cookie(
432 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
433 else:
434 self._SAPISID = False
435 if not self._SAPISID:
436 return None
1974e99f 437 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
438 sapisidhash = hashlib.sha1(
cf87314d 439 f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
1974e99f 440 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
441
442 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 443 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 444 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 445
109dd3b2 446 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 447 data.update(query)
11f9be09 448 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 449 real_headers.update({'content-type': 'application/json'})
450 if headers:
451 real_headers.update(headers)
545cc85d 452 return self._download_json(
109dd3b2 453 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 454 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 455 data=json.dumps(data).encode('utf8'), headers=real_headers,
456 query={'key': api_key or self._extract_api_key()})
457
ac56cf38 458 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
459 data = self._search_regex(
460 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
461 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
462 if data:
463 return self._parse_json(data, item_id, fatal=fatal)
0c148415 464
99e9e001 465 @staticmethod
466 def _extract_session_index(*data):
467 """
468 Index of current account in account list.
469 See: https://github.com/yt-dlp/yt-dlp/pull/519
470 """
471 for ytcfg in data:
472 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
473 if session_index is not None:
474 return session_index
475
476 # Deprecated?
477 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca
M
478 if ytcfg:
479 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
480 if token:
481 return token
99e9e001 482 if webpage:
483 return self._search_regex(
484 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
485 'identity token', default=None, fatal=False)
a1c5d2ca
M
486
487 @staticmethod
fe93e2c4 488 def _extract_account_syncid(*args):
8ea3f7b9 489 """
490 Extract syncId required to download private playlists of secondary channels
fe93e2c4 491 @params response and/or ytcfg
8ea3f7b9 492 """
fe93e2c4 493 for data in args:
494 # ytcfg includes channel_syncid if on secondary channel
495 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
496 if delegated_sid:
497 return delegated_sid
498 sync_ids = (try_get(
499 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
e6f21b3d 500 lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
fe93e2c4 501 if len(sync_ids) >= 2 and sync_ids[1]:
502 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
503 # and just "user_syncid||" for primary channel. We only want the channel_syncid
504 return sync_ids[0]
a1c5d2ca 505
ac56cf38 506 @staticmethod
507 def _extract_visitor_data(*args):
508 """
509 Extracts visitorData from an API response or ytcfg
510 Appears to be used to track session state
511 """
9222c381 512 return get_first(
513 args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
514 expected_type=str)
ac56cf38 515
99e9e001 516 @property
517 def is_authenticated(self):
518 return bool(self._generate_sapisidhash_header())
519
11f9be09 520 def extract_ytcfg(self, video_id, webpage):
8c54a305 521 if not webpage:
522 return {}
29f7c58a 523 return self._parse_json(
524 self._search_regex(
525 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 526 default='{}'), video_id, fatal=False) or {}
527
11f9be09 528 def generate_api_headers(
99e9e001 529 self, *, ytcfg=None, account_syncid=None, session_index=None,
530 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
531
11f9be09 532 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
f4f751af 533 headers = {
109dd3b2 534 'X-YouTube-Client-Name': compat_str(
11f9be09 535 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
536 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 537 'Origin': origin,
538 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
539 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
ac56cf38 540 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
99e9e001 541 }
542 if session_index is None:
314ee305 543 session_index = self._extract_session_index(ytcfg)
544 if account_syncid or session_index is not None:
545 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 546
109dd3b2 547 auth = self._generate_sapisidhash_header(origin)
f4f751af 548 if auth is not None:
549 headers['Authorization'] = auth
109dd3b2 550 headers['X-Origin'] = origin
99e9e001 551 return {h: v for h, v in headers.items() if v is not None}
29f7c58a 552
2d6659b9 553 @staticmethod
554 def _build_api_continuation_query(continuation, ctp=None):
555 query = {
556 'continuation': continuation
557 }
558 # TODO: Inconsistency with clickTrackingParams.
559 # Currently we have a fixed ctp contained within context (from ytcfg)
560 # and a ctp in root query for continuation.
561 if ctp:
562 query['clickTracking'] = {'clickTrackingParams': ctp}
563 return query
564
2d6659b9 565 @classmethod
566 def _extract_next_continuation_data(cls, renderer):
567 next_continuation = try_get(
568 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
569 lambda x: x['continuation']['reloadContinuationData']), dict)
570 if not next_continuation:
571 return
572 continuation = next_continuation.get('continuation')
573 if not continuation:
574 return
575 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 576 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 577
578 @classmethod
579 def _extract_continuation_ep_data(cls, continuation_ep: dict):
580 if isinstance(continuation_ep, dict):
581 continuation = try_get(
582 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
583 if not continuation:
584 return
585 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 586 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 587
588 @classmethod
589 def _extract_continuation(cls, renderer):
590 next_continuation = cls._extract_next_continuation_data(renderer)
591 if next_continuation:
592 return next_continuation
fe93e2c4 593
2d6659b9 594 contents = []
595 for key in ('contents', 'items'):
596 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 597
2d6659b9 598 for content in contents:
599 if not isinstance(content, dict):
600 continue
601 continuation_ep = try_get(
602 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
603 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
604 dict)
605 continuation = cls._extract_continuation_ep_data(continuation_ep)
606 if continuation:
607 return continuation
608
fe93e2c4 609 @classmethod
610 def _extract_alerts(cls, data):
109dd3b2 611 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
612 if not isinstance(alert_dict, dict):
613 continue
614 for alert in alert_dict.values():
615 alert_type = alert.get('type')
616 if not alert_type:
617 continue
052e1350 618 message = cls._get_text(alert, 'text')
109dd3b2 619 if message:
620 yield alert_type, message
621
c0ac49bc 622 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
109dd3b2 623 errors = []
624 warnings = []
625 for alert_type, alert_message in alerts:
641ad5d8 626 if alert_type.lower() == 'error' and fatal:
109dd3b2 627 errors.append([alert_type, alert_message])
628 else:
629 warnings.append([alert_type, alert_message])
630
631 for alert_type, alert_message in (warnings + errors[:-1]):
c0ac49bc 632 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
109dd3b2 633 if errors:
634 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
635
636 def _extract_and_report_alerts(self, data, *args, **kwargs):
637 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
638
47193e02 639 def _extract_badges(self, renderer: dict):
640 badges = set()
641 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
642 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
643 if label:
644 badges.add(label.lower())
645 return badges
646
647 @staticmethod
052e1350 648 def _get_text(data, *path_list, max_runs=None):
649 for path in path_list or [None]:
650 if path is None:
651 obj = [data]
652 else:
653 obj = traverse_obj(data, path, default=[])
654 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
655 obj = [obj]
656 for item in obj:
657 text = try_get(item, lambda x: x['simpleText'], compat_str)
658 if text:
659 return text
660 runs = try_get(item, lambda x: x['runs'], list) or []
661 if not runs and isinstance(item, list):
662 runs = item
663
664 runs = runs[:min(len(runs), max_runs or len(runs))]
665 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
666 if text:
667 return text
47193e02 668
109dd3b2 669 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
670 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 671 default_client='web'):
109dd3b2 672 response = None
673 last_error = None
674 count = -1
675 retries = self.get_param('extractor_retries', 3)
676 if check_get_keys is None:
677 check_get_keys = []
678 while count < retries:
679 count += 1
680 if last_error:
c0ac49bc 681 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
109dd3b2 682 try:
683 response = self._call_api(
684 ep=ep, fatal=True, headers=headers,
685 video_id=item_id, query=query,
686 context=self._extract_context(ytcfg, default_client),
687 api_key=self._extract_api_key(ytcfg, default_client),
688 api_hostname=api_hostname, default_client=default_client,
689 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
690 except ExtractorError as e:
9c0d7f49 691 if isinstance(e.cause, network_exceptions):
641ad5d8 692 if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
693 e.cause.seek(0)
694 yt_error = try_get(
695 self._parse_json(e.cause.read().decode(), item_id, fatal=False),
696 lambda x: x['error']['message'], compat_str)
697 if yt_error:
698 self._report_alerts([('ERROR', yt_error)], fatal=False)
109dd3b2 699 # Downloading page may result in intermittent 5xx HTTP error
700 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
9c0d7f49 701 # We also want to catch all other network exceptions since errors in later pages can be troublesome
702 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
703 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
526d74ec 704 last_error = error_to_compat_str(e.cause or e.msg)
9c0d7f49 705 if count < retries:
706 continue
109dd3b2 707 if fatal:
708 raise
709 else:
710 self.report_warning(error_to_compat_str(e))
711 return
712
713 else:
109dd3b2 714 try:
ac56cf38 715 self._extract_and_report_alerts(response, only_once=True)
109dd3b2 716 except ExtractorError as e:
c0ac49bc 717 # YouTube servers may return errors we want to retry on in a 200 OK response
718 # See: https://github.com/yt-dlp/yt-dlp/issues/839
719 if 'unknown error' in e.msg.lower():
720 last_error = e.msg
721 continue
109dd3b2 722 if fatal:
723 raise
724 self.report_warning(error_to_compat_str(e))
725 return
726 if not check_get_keys or dict_get(response, check_get_keys):
727 break
728 # Youtube sometimes sends incomplete data
729 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
730 last_error = 'Incomplete data received'
731 if count >= retries:
732 if fatal:
733 raise ExtractorError(last_error)
734 else:
735 self.report_warning(last_error)
736 return
737 return response
738
9297939e 739 @staticmethod
740 def is_music_url(url):
741 return re.match(r'https?://music\.youtube\.com/', url) is not None
742
30a074c2 743 def _extract_video(self, renderer):
744 video_id = renderer.get('videoId')
052e1350 745 title = self._get_text(renderer, 'title')
746 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 747 duration = parse_duration(self._get_text(
748 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
052e1350 749 view_count_text = self._get_text(renderer, 'viewCountText') or ''
30a074c2 750 view_count = str_to_int(self._search_regex(
751 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
752 'view count', default=None))
fe93e2c4 753
052e1350 754 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
fe93e2c4 755
30a074c2 756 return {
39ed931e 757 '_type': 'url',
30a074c2 758 'ie_key': YoutubeIE.ie_key(),
759 'id': video_id,
5e3f2f8f 760 'url': f'https://www.youtube.com/watch?v={video_id}',
30a074c2 761 'title': title,
762 'description': description,
763 'duration': duration,
764 'view_count': view_count,
765 'uploader': uploader,
766 }
767
0c148415 768
360e1ca5 769class YoutubeIE(YoutubeBaseInfoExtractor):
96565c7e 770 IE_DESC = 'YouTube'
cb7dfeea 771 _VALID_URL = r"""(?x)^
c5e8d7af 772 (
edb53e2d 773 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 774 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
775 (?:www\.)?deturl\.com/www\.youtube\.com|
776 (?:www\.)?pwnyoutube\.com|
777 (?:www\.)?hooktube\.com|
778 (?:www\.)?yourepeat\.com|
779 tube\.majestyc\.net|
780 %(invidious)s|
781 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
782 (?:.*?\#/)? # handle anchor (#/) redirect urls
783 (?: # the various things that can precede the ID:
8fc54b12 784 (?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/
c5e8d7af 785 |(?: # or the v= param in all its forms
f7000f3a 786 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 787 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 788 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
789 v=
790 )
f4b05232 791 ))
cbaed4bb
S
792 |(?:
793 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
794 vid\.plus| # or vid.plus/xxxx
795 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 796 %(invidious)s
cbaed4bb 797 )/
edb53e2d 798 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 799 )
c5e8d7af 800 )? # all until now is optional -> you can pass the naked ID
201c1459 801 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 802 (?(1).+)? # if we found the ID, everything can follow
9297939e 803 (?:\#|$)""" % {
d9190e44 804 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
bc2ca1bb 805 }
e40c758c 806 _PLAYER_INFO_RE = (
cc2db878 807 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
808 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 809 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 810 )
2c62dc26 811 _formats = {
c2d3cb4c 812 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
813 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
814 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
815 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
816 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
817 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
818 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
819 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 820 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 821 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
822 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
823 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
824 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
825 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
826 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 827 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 828 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
829 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 830
831
832 # 3D videos
c2d3cb4c 833 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
834 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
835 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
836 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 837 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
838 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
839 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 840
96fb5605 841 # Apple HTTP Live Streaming
11f12195 842 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 843 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
844 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
845 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
846 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
847 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 848 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
849 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
850
851 # DASH mp4 video
d23028a8
S
852 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
853 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
854 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
855 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
856 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 857 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
858 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
859 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
860 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
861 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
862 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
863 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 864
f6f1fc92 865 # Dash mp4 audio
d23028a8
S
866 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
867 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
868 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
869 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
870 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
871 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
872 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
873
874 # Dash webm
d23028a8
S
875 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
876 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
877 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
878 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
879 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
880 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
881 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
882 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
883 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
884 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
885 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
886 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
887 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
888 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
889 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 890 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
891 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
892 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
893 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
894 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
895 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
896 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
897
898 # Dash webm audio
d23028a8
S
899 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
900 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 901
0857baad 902 # Dash webm audio with opus inside
d23028a8
S
903 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
904 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
905 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 906
ce6b9a2d
PH
907 # RTMP (unnamed)
908 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
909
910 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
911 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
912 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
913 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
914 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
915 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
916 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
917 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
918 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 919 }
29f7c58a 920 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 921
fd5c4aab
S
922 _GEO_BYPASS = False
923
78caa52a 924 IE_NAME = 'youtube'
2eb88d95
PH
925 _TESTS = [
926 {
2d3d2997 927 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
928 'info_dict': {
929 'id': 'BaW_jenozKc',
930 'ext': 'mp4',
3867038a 931 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
932 'uploader': 'Philipp Hagemeister',
933 'uploader_id': 'phihag',
ec85ded8 934 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
ff9f925b 935 'channel': 'Philipp Hagemeister',
dd4c4492
S
936 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
937 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 938 'upload_date': '20121002',
ff9f925b 939 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
4bc3a23e 940 'categories': ['Science & Technology'],
3867038a 941 'tags': ['youtube-dl'],
556dbe7f 942 'duration': 10,
dbdaaa23 943 'view_count': int,
3e7c1224 944 'like_count': int,
ff9f925b 945 # 'dislike_count': int,
946 'availability': 'public',
947 'playable_in_embed': True,
948 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
949 'live_status': 'not_live',
950 'age_limit': 0,
7c80519c 951 'start_time': 1,
297a564b 952 'end_time': 9,
2eb88d95 953 }
0e853ca4 954 },
fccd3771 955 {
4bc3a23e
PH
956 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
957 'note': 'Embed-only video (#1746)',
958 'info_dict': {
959 'id': 'yZIXLfi8CZQ',
960 'ext': 'mp4',
961 'upload_date': '20120608',
962 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
963 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
964 'uploader': 'SET India',
94bfcd23 965 'uploader_id': 'setindia',
ec85ded8 966 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 967 'age_limit': 18,
545cc85d 968 },
969 'skip': 'Private video',
fccd3771 970 },
11b56058 971 {
8bdd16b4 972 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
973 'note': 'Use the first video ID in the URL',
974 'info_dict': {
975 'id': 'BaW_jenozKc',
976 'ext': 'mp4',
3867038a 977 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
978 'uploader': 'Philipp Hagemeister',
979 'uploader_id': 'phihag',
ec85ded8 980 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 981 'upload_date': '20121002',
3867038a 982 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 983 'categories': ['Science & Technology'],
3867038a 984 'tags': ['youtube-dl'],
556dbe7f 985 'duration': 10,
dbdaaa23 986 'view_count': int,
11b56058
PM
987 'like_count': int,
988 'dislike_count': int,
34a7de29
S
989 },
990 'params': {
991 'skip_download': True,
992 },
11b56058 993 },
dd27fd17 994 {
2d3d2997 995 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
996 'note': '256k DASH audio (format 141) via DASH manifest',
997 'info_dict': {
998 'id': 'a9LDPn-MO4I',
999 'ext': 'm4a',
1000 'upload_date': '20121002',
1001 'uploader_id': '8KVIDEO',
ec85ded8 1002 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1003 'description': '',
1004 'uploader': '8KVIDEO',
1005 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1006 },
4bc3a23e
PH
1007 'params': {
1008 'youtube_include_dash_manifest': True,
1009 'format': '141',
4919603f 1010 },
de3c7fe0 1011 'skip': 'format 141 not served anymore',
dd27fd17 1012 },
8bdd16b4 1013 # DASH manifest with encrypted signature
1014 {
1015 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1016 'info_dict': {
1017 'id': 'IB3lcPjvWLA',
1018 'ext': 'm4a',
1019 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1020 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1021 'duration': 244,
1022 'uploader': 'AfrojackVEVO',
1023 'uploader_id': 'AfrojackVEVO',
1024 'upload_date': '20131011',
cc2db878 1025 'abr': 129.495,
8bdd16b4 1026 },
1027 'params': {
1028 'youtube_include_dash_manifest': True,
1029 'format': '141/bestaudio[ext=m4a]',
1030 },
1031 },
65c2fde2 1032 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1033 {
65c2fde2 1034 'note': 'Embed allowed age-gate video',
2d3d2997 1035 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1036 'info_dict': {
1037 'id': 'HtVdAasjOgU',
1038 'ext': 'mp4',
1039 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1040 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1041 'duration': 142,
c522adb1
JMF
1042 'uploader': 'The Witcher',
1043 'uploader_id': 'WitcherGame',
ec85ded8 1044 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1045 'upload_date': '20140605',
34952f09 1046 'age_limit': 18,
c522adb1
JMF
1047 },
1048 },
65c2fde2 1049 {
1050 'note': 'Age-gate video with embed allowed in public site',
1051 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1052 'info_dict': {
1053 'id': 'HsUATh_Nc2U',
1054 'ext': 'mp4',
1055 'title': 'Godzilla 2 (Official Video)',
1056 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1057 'upload_date': '20200408',
1058 'uploader_id': 'FlyingKitty900',
1059 'uploader': 'FlyingKitty',
1060 'age_limit': 18,
1061 },
1062 },
1063 {
1064 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1065 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1066 'info_dict': {
1067 'id': 'Tq92D6wQ1mg',
1068 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1069 'ext': 'mp4',
1070 'upload_date': '20191227',
65c2fde2 1071 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1072 'uploader': 'Projekt Melody',
1073 'description': 'md5:17eccca93a786d51bc67646756894066',
1074 'age_limit': 18,
1075 },
1076 },
1077 {
1078 'note': 'Non-Agegated non-embeddable video',
1079 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1080 'info_dict': {
1081 'id': 'MeJVWBSsPAY',
1082 'ext': 'mp4',
1083 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1084 'uploader': 'Herr Lurik',
1085 'uploader_id': 'st3in234',
1086 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1087 'upload_date': '20130730',
1088 },
1089 },
1090 {
1091 'note': 'Non-bypassable age-gated video',
1092 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1093 'only_matching': True,
1094 },
8bdd16b4 1095 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1096 # YouTube Red ad is not captured for creator
1097 {
1098 'url': '__2ABJjxzNo',
1099 'info_dict': {
1100 'id': '__2ABJjxzNo',
1101 'ext': 'mp4',
1102 'duration': 266,
1103 'upload_date': '20100430',
1104 'uploader_id': 'deadmau5',
1105 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1106 'creator': 'deadmau5',
1107 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1108 'uploader': 'deadmau5',
1109 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1110 'alt_title': 'Some Chords',
8bdd16b4 1111 },
1112 'expected_warnings': [
1113 'DASH manifest missing',
1114 ]
1115 },
067aa17e 1116 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1117 {
1118 'url': 'lqQg6PlCWgI',
1119 'info_dict': {
1120 'id': 'lqQg6PlCWgI',
1121 'ext': 'mp4',
556dbe7f 1122 'duration': 6085,
90227264 1123 'upload_date': '20150827',
cbe2bd91 1124 'uploader_id': 'olympic',
ec85ded8 1125 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1126 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
11f9be09 1127 'uploader': 'Olympics',
cbe2bd91
PH
1128 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1129 },
1130 'params': {
1131 'skip_download': 'requires avconv',
e52a40ab 1132 }
cbe2bd91 1133 },
6271f1ca
PH
1134 # Non-square pixels
1135 {
1136 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1137 'info_dict': {
1138 'id': '_b-2C3KPAM0',
1139 'ext': 'mp4',
1140 'stretched_ratio': 16 / 9.,
556dbe7f 1141 'duration': 85,
6271f1ca
PH
1142 'upload_date': '20110310',
1143 'uploader_id': 'AllenMeow',
ec85ded8 1144 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1145 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1146 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1147 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1148 },
06b491eb
S
1149 },
1150 # url_encoded_fmt_stream_map is empty string
1151 {
1152 'url': 'qEJwOuvDf7I',
1153 'info_dict': {
1154 'id': 'qEJwOuvDf7I',
f57b7835 1155 'ext': 'webm',
06b491eb
S
1156 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1157 'description': '',
1158 'upload_date': '20150404',
1159 'uploader_id': 'spbelect',
1160 'uploader': 'Наблюдатели Петербурга',
1161 },
1162 'params': {
1163 'skip_download': 'requires avconv',
e323cf3f
S
1164 },
1165 'skip': 'This live event has ended.',
06b491eb 1166 },
067aa17e 1167 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1168 {
1169 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1170 'info_dict': {
1171 'id': 'FIl7x6_3R5Y',
eb6793ba 1172 'ext': 'webm',
da77d856
S
1173 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1174 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1175 'duration': 220,
da77d856
S
1176 'upload_date': '20150625',
1177 'uploader_id': 'dorappi2000',
ec85ded8 1178 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1179 'uploader': 'dorappi2000',
eb6793ba 1180 'formats': 'mincount:31',
da77d856 1181 },
eb6793ba 1182 'skip': 'not actual anymore',
2ee8f5d8 1183 },
8a1a26ce
YCH
1184 # DASH manifest with segment_list
1185 {
1186 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1187 'md5': '8ce563a1d667b599d21064e982ab9e31',
1188 'info_dict': {
1189 'id': 'CsmdDsKjzN8',
1190 'ext': 'mp4',
17ee98e1 1191 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1192 'uploader': 'Airtek',
1193 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1194 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1195 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1196 },
1197 'params': {
1198 'youtube_include_dash_manifest': True,
1199 'format': '135', # bestvideo
be49068d
S
1200 },
1201 'skip': 'This live event has ended.',
2ee8f5d8 1202 },
cf7e015f
S
1203 {
1204 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1205 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1206 'info_dict': {
545cc85d 1207 'id': 'jvGDaLqkpTg',
1208 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1209 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1210 },
1211 'playlist': [{
1212 'info_dict': {
545cc85d 1213 'id': 'jvGDaLqkpTg',
cf7e015f 1214 'ext': 'mp4',
545cc85d 1215 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1216 'description': 'md5:e03b909557865076822aa169218d6a5d',
1217 'duration': 10643,
1218 'upload_date': '20161111',
1219 'uploader': 'Team PGP',
1220 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1221 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1222 },
1223 }, {
1224 'info_dict': {
545cc85d 1225 'id': '3AKt1R1aDnw',
cf7e015f 1226 'ext': 'mp4',
545cc85d 1227 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1228 'description': 'md5:e03b909557865076822aa169218d6a5d',
1229 'duration': 10991,
1230 'upload_date': '20161111',
1231 'uploader': 'Team PGP',
1232 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1233 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1234 },
1235 }, {
1236 'info_dict': {
545cc85d 1237 'id': 'RtAMM00gpVc',
cf7e015f 1238 'ext': 'mp4',
545cc85d 1239 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1240 'description': 'md5:e03b909557865076822aa169218d6a5d',
1241 'duration': 10995,
1242 'upload_date': '20161111',
1243 'uploader': 'Team PGP',
1244 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1245 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1246 },
1247 }, {
1248 'info_dict': {
545cc85d 1249 'id': '6N2fdlP3C5U',
cf7e015f 1250 'ext': 'mp4',
545cc85d 1251 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1252 'description': 'md5:e03b909557865076822aa169218d6a5d',
1253 'duration': 10990,
1254 'upload_date': '20161111',
1255 'uploader': 'Team PGP',
1256 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1257 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1258 },
1259 }],
1260 'params': {
1261 'skip_download': True,
1262 },
65c2fde2 1263 'skip': 'Not multifeed anymore',
cbaed4bb 1264 },
f9f49d87 1265 {
067aa17e 1266 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1267 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1268 'info_dict': {
1269 'id': 'gVfLd0zydlo',
1270 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1271 },
1272 'playlist_count': 2,
be49068d 1273 'skip': 'Not multifeed anymore',
f9f49d87 1274 },
cbaed4bb 1275 {
2d3d2997 1276 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1277 'only_matching': True,
0e49d9a6 1278 },
6d4fc66b 1279 {
2d3d2997 1280 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1281 'only_matching': True,
1282 },
0e49d9a6 1283 {
067aa17e 1284 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1285 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1286 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1287 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1288 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1289 'info_dict': {
1290 'id': 'lsguqyKfVQg',
1291 'ext': 'mp4',
1292 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1293 'alt_title': 'Dark Walk',
0e49d9a6 1294 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1295 'duration': 133,
0e49d9a6
LL
1296 'upload_date': '20151119',
1297 'uploader_id': 'IronSoulElf',
ec85ded8 1298 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1299 'uploader': 'IronSoulElf',
11f9be09 1300 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1301 'track': 'Dark Walk',
1302 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1303 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1304 },
1305 'params': {
1306 'skip_download': True,
1307 },
1308 },
61f92af1 1309 {
067aa17e 1310 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1311 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1312 'only_matching': True,
1313 },
313dfc45
LL
1314 {
1315 # Video with yt:stretch=17:0
1316 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1317 'info_dict': {
1318 'id': 'Q39EVAstoRM',
1319 'ext': 'mp4',
1320 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1321 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1322 'upload_date': '20151107',
1323 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1324 'uploader': 'CH GAMER DROID',
1325 },
1326 'params': {
1327 'skip_download': True,
1328 },
be49068d 1329 'skip': 'This video does not exist.',
313dfc45 1330 },
201c1459 1331 {
1332 # Video with incomplete 'yt:stretch=16:'
1333 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1334 'only_matching': True,
1335 },
7caf9830
S
1336 {
1337 # Video licensed under Creative Commons
1338 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1339 'info_dict': {
1340 'id': 'M4gD1WSo5mA',
1341 'ext': 'mp4',
1342 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1343 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1344 'duration': 721,
7caf9830
S
1345 'upload_date': '20150127',
1346 'uploader_id': 'BerkmanCenter',
ec85ded8 1347 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1348 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1349 'license': 'Creative Commons Attribution license (reuse allowed)',
1350 },
1351 'params': {
1352 'skip_download': True,
1353 },
1354 },
fd050249
S
1355 {
1356 # Channel-like uploader_url
1357 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1358 'info_dict': {
1359 'id': 'eQcmzGIKrzg',
1360 'ext': 'mp4',
1361 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1362 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1363 'duration': 4060,
fd050249 1364 'upload_date': '20151119',
eb6793ba 1365 'uploader': 'Bernie Sanders',
fd050249 1366 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1367 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1368 'license': 'Creative Commons Attribution license (reuse allowed)',
1369 },
1370 'params': {
1371 'skip_download': True,
1372 },
1373 },
040ac686
S
1374 {
1375 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1376 'only_matching': True,
7f29cf54
S
1377 },
1378 {
067aa17e 1379 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1380 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1381 'only_matching': True,
6496ccb4
S
1382 },
1383 {
1384 # Rental video preview
1385 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1386 'info_dict': {
1387 'id': 'uGpuVWrhIzE',
1388 'ext': 'mp4',
1389 'title': 'Piku - Trailer',
1390 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1391 'upload_date': '20150811',
1392 'uploader': 'FlixMatrix',
1393 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1394 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1395 'license': 'Standard YouTube License',
1396 },
1397 'params': {
1398 'skip_download': True,
1399 },
eb6793ba 1400 'skip': 'This video is not available.',
022a5d66 1401 },
12afdc2a
S
1402 {
1403 # YouTube Red video with episode data
1404 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1405 'info_dict': {
1406 'id': 'iqKdEhx-dD4',
1407 'ext': 'mp4',
1408 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1409 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1410 'duration': 2085,
12afdc2a
S
1411 'upload_date': '20170118',
1412 'uploader': 'Vsauce',
1413 'uploader_id': 'Vsauce',
1414 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1415 'series': 'Mind Field',
1416 'season_number': 1,
1417 'episode_number': 1,
1418 },
1419 'params': {
1420 'skip_download': True,
1421 },
1422 'expected_warnings': [
1423 'Skipping DASH manifest',
1424 ],
1425 },
c7121fa7
S
1426 {
1427 # The following content has been identified by the YouTube community
1428 # as inappropriate or offensive to some audiences.
1429 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1430 'info_dict': {
1431 'id': '6SJNVb0GnPI',
1432 'ext': 'mp4',
1433 'title': 'Race Differences in Intelligence',
1434 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1435 'duration': 965,
1436 'upload_date': '20140124',
1437 'uploader': 'New Century Foundation',
1438 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1439 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1440 },
1441 'params': {
1442 'skip_download': True,
1443 },
545cc85d 1444 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1445 },
022a5d66
S
1446 {
1447 # itag 212
1448 'url': '1t24XAntNCY',
1449 'only_matching': True,
fd5c4aab
S
1450 },
1451 {
1452 # geo restricted to JP
1453 'url': 'sJL6WA-aGkQ',
1454 'only_matching': True,
1455 },
cd5a74a2
S
1456 {
1457 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1458 'only_matching': True,
1459 },
bc2ca1bb 1460 {
1461 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1462 'only_matching': True,
1463 },
1464 {
1465 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1466 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1467 'only_matching': True,
1468 },
825cd268
RA
1469 {
1470 # DRM protected
1471 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1472 'only_matching': True,
4fe54c12
S
1473 },
1474 {
1475 # Video with unsupported adaptive stream type formats
1476 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1477 'info_dict': {
1478 'id': 'Z4Vy8R84T1U',
1479 'ext': 'mp4',
1480 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1481 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1482 'duration': 433,
1483 'upload_date': '20130923',
1484 'uploader': 'Amelia Putri Harwita',
1485 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1486 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1487 'formats': 'maxcount:10',
1488 },
1489 'params': {
1490 'skip_download': True,
1491 'youtube_include_dash_manifest': False,
1492 },
5429d6a9 1493 'skip': 'not actual anymore',
5caabd3c 1494 },
1495 {
822b9d9c 1496 # Youtube Music Auto-generated description
5caabd3c 1497 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1498 'info_dict': {
1499 'id': 'MgNrAu2pzNs',
1500 'ext': 'mp4',
1501 'title': 'Voyeur Girl',
1502 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1503 'upload_date': '20190312',
5429d6a9
S
1504 'uploader': 'Stephen - Topic',
1505 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1506 'artist': 'Stephen',
1507 'track': 'Voyeur Girl',
1508 'album': 'it\'s too much love to know my dear',
1509 'release_date': '20190313',
1510 'release_year': 2019,
1511 },
1512 'params': {
1513 'skip_download': True,
1514 },
1515 },
66b48727
RA
1516 {
1517 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1518 'only_matching': True,
1519 },
011e75e6
S
1520 {
1521 # invalid -> valid video id redirection
1522 'url': 'DJztXj2GPfl',
1523 'info_dict': {
1524 'id': 'DJztXj2GPfk',
1525 'ext': 'mp4',
1526 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1527 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1528 'upload_date': '20090125',
1529 'uploader': 'Prochorowka',
1530 'uploader_id': 'Prochorowka',
1531 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1532 'artist': 'Panjabi MC',
1533 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1534 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1535 },
1536 'params': {
1537 'skip_download': True,
1538 },
545cc85d 1539 'skip': 'Video unavailable',
ea74e00b
DP
1540 },
1541 {
1542 # empty description results in an empty string
1543 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1544 'info_dict': {
1545 'id': 'x41yOUIvK2k',
1546 'ext': 'mp4',
1547 'title': 'IMG 3456',
1548 'description': '',
1549 'upload_date': '20170613',
1550 'uploader_id': 'ElevageOrVert',
1551 'uploader': 'ElevageOrVert',
1552 },
1553 'params': {
1554 'skip_download': True,
1555 },
1556 },
a0566bbf 1557 {
29f7c58a 1558 # with '};' inside yt initial data (see [1])
1559 # see [2] for an example with '};' inside ytInitialPlayerResponse
1560 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1561 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1562 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1563 'info_dict': {
1564 'id': 'CHqg6qOn4no',
1565 'ext': 'mp4',
1566 'title': 'Part 77 Sort a list of simple types in c#',
1567 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1568 'upload_date': '20130831',
1569 'uploader_id': 'kudvenkat',
1570 'uploader': 'kudvenkat',
1571 },
1572 'params': {
1573 'skip_download': True,
1574 },
1575 },
29f7c58a 1576 {
1577 # another example of '};' in ytInitialData
1578 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1579 'only_matching': True,
1580 },
1581 {
1582 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1583 'only_matching': True,
1584 },
545cc85d 1585 {
cc2db878 1586 # https://github.com/ytdl-org/youtube-dl/pull/28094
1587 'url': 'OtqTfy26tG0',
1588 'info_dict': {
1589 'id': 'OtqTfy26tG0',
1590 'ext': 'mp4',
1591 'title': 'Burn Out',
1592 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1593 'upload_date': '20141120',
1594 'uploader': 'The Cinematic Orchestra - Topic',
1595 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1596 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1597 'artist': 'The Cinematic Orchestra',
1598 'track': 'Burn Out',
1599 'album': 'Every Day',
1600 'release_data': None,
1601 'release_year': None,
1602 },
1603 'params': {
1604 'skip_download': True,
1605 },
545cc85d 1606 },
bc2ca1bb 1607 {
1608 # controversial video, only works with bpctr when authenticated with cookies
1609 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1610 'only_matching': True,
1611 },
a1a7907b 1612 {
1613 # controversial video, requires bpctr/contentCheckOk
1614 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1615 'info_dict': {
1616 'id': 'SZJvDhaSDnc',
1617 'ext': 'mp4',
1618 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1619 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1620 'uploader': 'CBS This Morning',
11f9be09 1621 'uploader_id': 'CBSThisMorning',
a1a7907b 1622 'upload_date': '20140716',
1623 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1624 }
1625 },
f7ad7160 1626 {
1627 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1628 'url': 'cBvYw8_A0vQ',
1629 'info_dict': {
1630 'id': 'cBvYw8_A0vQ',
1631 'ext': 'mp4',
1632 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1633 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1634 'upload_date': '20201120',
1635 'uploader': 'Walk around Japan',
1636 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1637 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1638 },
1639 'params': {
1640 'skip_download': True,
1641 },
0fb983f6 1642 }, {
1643 # Has multiple audio streams
1644 'url': 'WaOKSUlf4TM',
1645 'only_matching': True
9297939e 1646 }, {
1647 # Requires Premium: has format 141 when requested using YTM url
1648 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1649 'only_matching': True
1650 }, {
120916da 1651 # multiple subtitles with same lang_code
1652 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1653 'only_matching': True,
109dd3b2 1654 }, {
1655 # Force use android client fallback
1656 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1657 'info_dict': {
1658 'id': 'YOelRv7fMxY',
11f9be09 1659 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 1660 'ext': '3gp',
1661 'upload_date': '20210624',
1662 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1663 'uploader': 'colinfurze',
11f9be09 1664 'uploader_id': 'colinfurze',
109dd3b2 1665 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
11f9be09 1666 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
109dd3b2 1667 },
1668 'params': {
1669 'format': '17', # 3gp format available on android
1670 'extractor_args': {'youtube': {'player_client': ['android']}},
1671 },
120916da 1672 },
109dd3b2 1673 {
1674 # Skip download of additional client configs (remix client config in this case)
1675 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1676 'only_matching': True,
1677 'params': {
1678 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1679 },
8fc54b12 1680 }, {
1681 # shorts
1682 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1683 'only_matching': True,
9222c381 1684 }, {
1685 'note': 'Storyboards',
1686 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
1687 'info_dict': {
1688 'id': '5KLPxDtMqe8',
1689 'ext': 'mhtml',
1690 'format_id': 'sb0',
1691 'title': 'Your Brain is Plastic',
1692 'uploader_id': 'scishow',
1693 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
1694 'upload_date': '20140324',
1695 'uploader': 'SciShow',
1696 }, 'params': {'format': 'mhtml', 'skip_download': True}
1697 }
2eb88d95
PH
1698 ]
1699
201c1459 1700 @classmethod
1701 def suitable(cls, url):
4dfbf869 1702 from ..utils import parse_qs
1703
201c1459 1704 qs = parse_qs(url)
1705 if qs.get('list', [None])[0]:
1706 return False
1707 return super(YoutubeIE, cls).suitable(url)
1708
e0df6211
PH
1709 def __init__(self, *args, **kwargs):
1710 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1711 self._code_cache = {}
83799698 1712 self._player_cache = {}
e0df6211 1713
b6de707d 1714 def _extract_player_url(self, *ytcfgs, webpage=None):
1715 player_url = traverse_obj(
1716 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1717 get_all=False, expected_type=compat_str)
11f9be09 1718 if not player_url:
b6de707d 1719 return
109dd3b2 1720 if player_url.startswith('//'):
1721 player_url = 'https:' + player_url
1722 elif not re.match(r'https?://', player_url):
1723 player_url = compat_urlparse.urljoin(
1724 'https://www.youtube.com', player_url)
1725 return player_url
1726
b6de707d 1727 def _download_player_url(self, video_id, fatal=False):
1728 res = self._download_webpage(
1729 'https://www.youtube.com/iframe_api',
1730 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1731 if res:
1732 player_version = self._search_regex(
1733 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1734 if player_version:
1735 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1736
60064c53
PH
1737 def _signature_cache_id(self, example_sig):
1738 """ Return a string representation of a signature """
78caa52a 1739 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1740
e40c758c
S
1741 @classmethod
1742 def _extract_player_info(cls, player_url):
1743 for player_re in cls._PLAYER_INFO_RE:
1744 id_m = re.search(player_re, player_url)
1745 if id_m:
1746 break
1747 else:
c081b35c 1748 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1749 return id_m.group('id')
e40c758c 1750
404f611f 1751 def _load_player(self, video_id, player_url, fatal=True):
109dd3b2 1752 player_id = self._extract_player_info(player_url)
1753 if player_id not in self._code_cache:
1276a43a 1754 code = self._download_webpage(
109dd3b2 1755 player_url, video_id, fatal=fatal,
1756 note='Downloading player ' + player_id,
1757 errnote='Download of %s failed' % player_url)
1276a43a 1758 if code:
1759 self._code_cache[player_id] = code
404f611f 1760 return self._code_cache.get(player_id)
109dd3b2 1761
e40c758c 1762 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1763 player_id = self._extract_player_info(player_url)
e0df6211 1764
c4417ddb 1765 # Read from filesystem cache
545cc85d 1766 func_id = 'js_%s_%s' % (
1767 player_id, self._signature_cache_id(example_sig))
c4417ddb 1768 assert os.path.basename(func_id) == func_id
a0e07d31 1769
69ea8ca4 1770 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1771 if cache_spec is not None:
78caa52a 1772 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1773
404f611f 1774 code = self._load_player(video_id, player_url)
1775 if code:
109dd3b2 1776 res = self._parse_sig_js(code)
e0df6211 1777
109dd3b2 1778 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1779 cache_res = res(test_string)
1780 cache_spec = [ord(c) for c in cache_res]
83799698 1781
109dd3b2 1782 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1783 return res
83799698 1784
60064c53 1785 def _print_sig_code(self, func, example_sig):
404f611f 1786 if not self.get_param('youtube_print_sig_code'):
1787 return
1788
edf3e38e
PH
1789 def gen_sig_code(idxs):
1790 def _genslice(start, end, step):
78caa52a 1791 starts = '' if start == 0 else str(start)
8bcc8756 1792 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1793 steps = '' if step == 1 else (':%d' % step)
78caa52a 1794 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1795
1796 step = None
7af808a5
PH
1797 # Quelch pyflakes warnings - start will be set when step is set
1798 start = '(Never used)'
edf3e38e
PH
1799 for i, prev in zip(idxs[1:], idxs[:-1]):
1800 if step is not None:
1801 if i - prev == step:
1802 continue
1803 yield _genslice(start, prev, step)
1804 step = None
1805 continue
1806 if i - prev in [-1, 1]:
1807 step = i - prev
1808 start = prev
1809 continue
1810 else:
78caa52a 1811 yield 's[%d]' % prev
edf3e38e 1812 if step is None:
78caa52a 1813 yield 's[%d]' % i
edf3e38e
PH
1814 else:
1815 yield _genslice(start, i, step)
1816
78caa52a 1817 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1818 cache_res = func(test_string)
edf3e38e 1819 cache_spec = [ord(c) for c in cache_res]
78caa52a 1820 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1821 signature_id_tuple = '(%s)' % (
1822 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1823 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1824 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1825 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1826
e0df6211
PH
1827 def _parse_sig_js(self, jscode):
1828 funcname = self._search_regex(
abefc03f
S
1829 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1830 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
1831 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1832 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1833 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1834 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1835 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1836 # Obsolete patterns
1837 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1838 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1839 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1840 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1841 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1842 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1843 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1844 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1845 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1846
1847 jsi = JSInterpreter(jscode)
1848 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1849 return lambda s: initial_function([s])
1850
545cc85d 1851 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1852 """Turn the encrypted s field into a working signature"""
6b37f0be 1853
c8bf86d5 1854 if player_url is None:
69ea8ca4 1855 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1856
c8bf86d5 1857 try:
62af3a0e 1858 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1859 if player_id not in self._player_cache:
1860 func = self._extract_signature_function(
60064c53 1861 video_id, player_url, s
c8bf86d5
PH
1862 )
1863 self._player_cache[player_id] = func
1864 func = self._player_cache[player_id]
404f611f 1865 self._print_sig_code(func, s)
c8bf86d5
PH
1866 return func(s)
1867 except Exception as e:
404f611f 1868 raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
1869
1870 def _decrypt_nsig(self, s, video_id, player_url):
1871 """Turn the encrypted n field into a working signature"""
1872 if player_url is None:
1873 raise ExtractorError('Cannot decrypt nsig without player_url')
1874 if player_url.startswith('//'):
1875 player_url = 'https:' + player_url
1876 elif not re.match(r'https?://', player_url):
1877 player_url = compat_urlparse.urljoin(
1878 'https://www.youtube.com', player_url)
1879
1880 sig_id = ('nsig_value', s)
1881 if sig_id in self._player_cache:
1882 return self._player_cache[sig_id]
1883
1884 try:
1885 player_id = ('nsig', player_url)
1886 if player_id not in self._player_cache:
1887 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
1888 func = self._player_cache[player_id]
1889 self._player_cache[sig_id] = func(s)
1890 self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
1891 return self._player_cache[sig_id]
1892 except Exception as e:
aa9369a2 1893 raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
404f611f 1894
1895 def _extract_n_function_name(self, jscode):
1896 return self._search_regex(
1897 (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
1898 jscode, 'Initial JS player n function name', group='nfunc')
1899
1900 def _extract_n_function(self, video_id, player_url):
1901 player_id = self._extract_player_info(player_url)
1902 func_code = self._downloader.cache.load('youtube-nsig', player_id)
1903
1904 if func_code:
1905 jsi = JSInterpreter(func_code)
1906 else:
1907 jscode = self._load_player(video_id, player_url)
1908 funcname = self._extract_n_function_name(jscode)
1909 jsi = JSInterpreter(jscode)
1910 func_code = jsi.extract_function_code(funcname)
1911 self._downloader.cache.store('youtube-nsig', player_id, func_code)
1912
1913 if self.get_param('youtube_print_sig_code'):
1914 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
1915
1916 return lambda s: jsi.extract_function_from_code(*func_code)([s])
e0df6211 1917
109dd3b2 1918 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1919 """
1920 Extract signatureTimestamp (sts)
1921 Required to tell API what sig/player version is in use.
1922 """
1923 sts = None
1924 if isinstance(ytcfg, dict):
1925 sts = int_or_none(ytcfg.get('STS'))
1926
1927 if not sts:
1928 # Attempt to extract from player
1929 if player_url is None:
1930 error_msg = 'Cannot extract signature timestamp without player_url.'
1931 if fatal:
1932 raise ExtractorError(error_msg)
1933 self.report_warning(error_msg)
1934 return
404f611f 1935 code = self._load_player(video_id, player_url, fatal=fatal)
1936 if code:
109dd3b2 1937 sts = int_or_none(self._search_regex(
1938 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1939 'JS player signature timestamp', group='sts', fatal=fatal))
1940 return sts
1941
11f9be09 1942 def _mark_watched(self, video_id, player_responses):
9222c381 1943 playback_url = get_first(
1944 player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1945 expected_type=url_or_none)
d77ab8e2 1946 if not playback_url:
352d63fd 1947 self.report_warning('Unable to mark watched')
d77ab8e2
S
1948 return
1949 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1950 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1951
1952 # cpn generation algorithm is reverse engineered from base.js.
1953 # In fact it works even with dummy cpn.
1954 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1955 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1956
1957 qs.update({
1958 'ver': ['2'],
1959 'cpn': [cpn],
1960 })
1961 playback_url = compat_urlparse.urlunparse(
15707c7e 1962 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1963
1964 self._download_webpage(
1965 playback_url, video_id, 'Marking watched',
1966 'Unable to mark watched', fatal=False)
1967
66c9fa36
S
1968 @staticmethod
1969 def _extract_urls(webpage):
1970 # Embedded YouTube player
1971 entries = [
1972 unescapeHTML(mobj.group('url'))
1973 for mobj in re.finditer(r'''(?x)
1974 (?:
1975 <iframe[^>]+?src=|
1976 data-video-url=|
1977 <embed[^>]+?src=|
1978 embedSWF\(?:\s*|
1979 <object[^>]+data=|
1980 new\s+SWFObject\(
1981 )
1982 (["\'])
1983 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1984 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1985 \1''', webpage)]
1986
1987 # lazyYT YouTube embed
1988 entries.extend(list(map(
1989 unescapeHTML,
1990 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1991
1992 # Wordpress "YouTube Video Importer" plugin
1993 matches = re.findall(r'''(?x)<div[^>]+
1994 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1995 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1996 entries.extend(m[-1] for m in matches)
1997
1998 return entries
1999
2000 @staticmethod
2001 def _extract_url(webpage):
2002 urls = YoutubeIE._extract_urls(webpage)
2003 return urls[0] if urls else None
2004
97665381
PH
2005 @classmethod
2006 def extract_id(cls, url):
2007 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 2008 if mobj is None:
69ea8ca4 2009 raise ExtractorError('Invalid URL: %s' % url)
5ad28e7f 2010 return mobj.group('id')
c5e8d7af 2011
7c365c21 2012 def _extract_chapters_from_json(self, data, duration):
2013 chapter_list = traverse_obj(
2014 data, (
2015 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2016 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2017 ), expected_type=list)
2018
2019 return self._extract_chapters(
2020 chapter_list,
2021 chapter_time=lambda chapter: float_or_none(
2022 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2023 chapter_title=lambda chapter: traverse_obj(
2024 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2025 duration=duration)
2026
2027 def _extract_chapters_from_engagement_panel(self, data, duration):
2028 content_list = traverse_obj(
8bdd16b4 2029 data,
7c365c21 2030 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2031 expected_type=list, default=[])
052e1350 2032 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2033 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 2034
2035 return next((
2036 filter(None, (
2037 self._extract_chapters(
2038 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2039 chapter_time, chapter_title, duration)
2040 for contents in content_list
2041 ))), [])
2042
2043 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 2044 chapters = []
7c365c21 2045 last_chapter = {'start_time': 0}
2046 for idx, chapter in enumerate(chapter_list or []):
2047 title = chapter_title(chapter)
84213ea8
S
2048 start_time = chapter_time(chapter)
2049 if start_time is None:
2050 continue
7c365c21 2051 last_chapter['end_time'] = start_time
2052 if start_time < last_chapter['start_time']:
2053 if idx == 1:
2054 chapters.pop()
2055 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2056 else:
2057 self.report_warning(f'Invalid start time for chapter "{title}"')
2058 continue
2059 last_chapter = {'start_time': start_time, 'title': title}
2060 chapters.append(last_chapter)
2061 last_chapter['end_time'] = duration
84213ea8
S
2062 return chapters
2063
545cc85d 2064 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2065 return self._parse_json(self._search_regex(
2066 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2067 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 2068
d92f5d5a 2069 @staticmethod
2070 def parse_time_text(time_text):
2071 """
2072 Parse the comment time text
2073 time_text is in the format 'X units ago (edited)'
2074 """
2075 time_text_split = time_text.split(' ')
2076 if len(time_text_split) >= 3:
da503b7a 2077 try:
2078 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2079 except ValueError:
2080 return None
d92f5d5a 2081
a1c5d2ca
M
2082 def _extract_comment(self, comment_renderer, parent=None):
2083 comment_id = comment_renderer.get('commentId')
2084 if not comment_id:
2085 return
fe93e2c4 2086
052e1350 2087 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 2088
49bd8c66 2089 # note: timestamp is an estimate calculated from the current time and time_text
052e1350 2090 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
fe93e2c4 2091 time_text_dt = self.parse_time_text(time_text)
2092 if isinstance(time_text_dt, datetime.datetime):
2093 timestamp = calendar.timegm(time_text_dt.timetuple())
052e1350 2094 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca
M
2095 author_id = try_get(comment_renderer,
2096 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2097
49bd8c66 2098 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2099 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2100 author_thumbnail = try_get(comment_renderer,
2101 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2102
2103 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2104 is_favorited = 'creatorHeart' in (try_get(
2105 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2106 return {
2107 'id': comment_id,
2108 'text': text,
d92f5d5a 2109 'timestamp': timestamp,
a1c5d2ca
M
2110 'time_text': time_text,
2111 'like_count': votes,
97524332 2112 'is_favorited': is_favorited,
a1c5d2ca
M
2113 'author': author,
2114 'author_id': author_id,
2115 'author_thumbnail': author_thumbnail,
2116 'author_is_uploader': author_is_uploader,
2117 'parent': parent or 'root'
2118 }
2119
99e9e001 2120 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, comment_counts=None):
2d6659b9 2121
2122 def extract_header(contents):
2d6659b9 2123 _continuation = None
2124 for content in contents:
2125 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
fe93e2c4 2126 expected_comment_count = parse_count(self._get_text(
052e1350 2127 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
fe93e2c4 2128
2d6659b9 2129 if expected_comment_count:
fe93e2c4 2130 comment_counts[1] = expected_comment_count
2131 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2d6659b9 2132 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2133 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2134
2135 sort_menu_item = try_get(
2136 comments_header_renderer,
2137 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2138 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2139
2140 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2141 if not _continuation:
2142 continue
2143
2144 sort_text = sort_menu_item.get('title')
2145 if isinstance(sort_text, compat_str):
2146 sort_text = sort_text.lower()
2147 else:
2148 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2149 self.to_screen('Sorting comments by %s' % sort_text)
2150 break
a2160aa4 2151 return _continuation
a1c5d2ca 2152
2d6659b9 2153 def extract_thread(contents):
a1c5d2ca
M
2154 if not parent:
2155 comment_counts[2] = 0
2156 for content in contents:
2157 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2158 comment_renderer = try_get(
2159 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2160 content, (lambda x: x['commentRenderer'], dict))
2161
2162 if not comment_renderer:
2163 continue
2164 comment = self._extract_comment(comment_renderer, parent)
2165 if not comment:
2166 continue
2167 comment_counts[0] += 1
2168 yield comment
2169 # Attempt to get the replies
2170 comment_replies_renderer = try_get(
2171 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2172
2173 if comment_replies_renderer:
2174 comment_counts[2] += 1
2175 comment_entries_iter = self._comment_entries(
99e9e001 2176 comment_replies_renderer, ytcfg, video_id,
2177 parent=comment.get('id'), comment_counts=comment_counts)
a1c5d2ca
M
2178
2179 for reply_comment in comment_entries_iter:
2180 yield reply_comment
2181
2d6659b9 2182 # YouTube comments have a max depth of 2
2183 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2184 if max_depth == 1 and parent:
2185 return
a1c5d2ca
M
2186 if not comment_counts:
2187 # comment so far, est. total comments, current comment thread #
2188 comment_counts = [0, 0, 0]
a1c5d2ca 2189
2d6659b9 2190 continuation = self._extract_continuation(root_continuation_data)
fe93e2c4 2191 if continuation and len(continuation['continuation']) < 27:
2d6659b9 2192 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2193 continuation_token = self._generate_comment_continuation(video_id)
fe93e2c4 2194 continuation = self._build_api_continuation_query(continuation_token, None)
2d6659b9 2195
aae16f6e 2196 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2197 if message and not parent:
2198 self.report_warning(message, video_id=video_id)
2199
2d6659b9 2200 visitor_data = None
2201 is_first_continuation = parent is None
a1c5d2ca
M
2202
2203 for page_num in itertools.count(0):
2204 if not continuation:
2205 break
99e9e001 2206 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
2d6659b9 2207 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2208 if page_num == 0:
2209 if is_first_continuation:
2210 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2211 else:
2d6659b9 2212 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2213 comment_counts[2], comment_prog_str)
2214 else:
2215 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2216 ' ' if parent else '', ' replies' if parent else '',
2217 page_num, comment_prog_str)
2218
2219 response = self._extract_response(
fe93e2c4 2220 item_id=None, query=continuation,
2d6659b9 2221 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2222 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca
M
2223 if not response:
2224 break
f4f751af 2225 visitor_data = try_get(
2226 response,
2227 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2228 compat_str) or visitor_data
a1c5d2ca 2229
2d6659b9 2230 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca 2231
2d6659b9 2232 continuation = None
2233 if isinstance(continuation_contents, list):
2234 for continuation_section in continuation_contents:
2235 if not isinstance(continuation_section, dict):
2236 continue
2237 continuation_items = try_get(
2238 continuation_section,
2239 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2240 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2241 list) or []
2242 if is_first_continuation:
a2160aa4 2243 continuation = extract_header(continuation_items)
2d6659b9 2244 is_first_continuation = False
2245 if continuation:
2246 break
2247 continue
2248 count = 0
2249 for count, entry in enumerate(extract_thread(continuation_items)):
2250 yield entry
2251 continuation = self._extract_continuation({'contents': continuation_items})
2252 if continuation:
2253 # Sometimes YouTube provides a continuation without any comments
2254 # In most cases we end up just downloading these with very little comments to come.
2255 if count == 0:
2256 if not parent:
2257 self.report_warning('No comments received - assuming end of comments')
2258 continuation = None
a1c5d2ca
M
2259 break
2260
2d6659b9 2261 # Deprecated response structure
2262 elif isinstance(continuation_contents, dict):
2263 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2264 for key, continuation_renderer in continuation_contents.items():
2265 if key not in known_continuation_renderers:
2266 continue
2267 if not isinstance(continuation_renderer, dict):
2268 continue
2269 if is_first_continuation:
2270 header_continuation_items = [continuation_renderer.get('header') or {}]
a2160aa4 2271 continuation = extract_header(header_continuation_items)
2d6659b9 2272 is_first_continuation = False
2273 if continuation:
2274 break
a1c5d2ca 2275
2d6659b9 2276 # Sometimes YouTube provides a continuation without any comments
2277 # In most cases we end up just downloading these with very little comments to come.
2278 count = 0
2279 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2280 yield entry
2281 continuation = self._extract_continuation(continuation_renderer)
2282 if count == 0:
2283 if not parent:
2284 self.report_warning('No comments received - assuming end of comments')
2285 continuation = None
2286 break
a1c5d2ca 2287
2d6659b9 2288 @staticmethod
2289 def _generate_comment_continuation(video_id):
2290 """
2291 Generates initial comment section continuation token from given video id
2292 """
2293 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2294 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2295 new_continuation_intlist = list(itertools.chain.from_iterable(
2296 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2297 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2298
a2160aa4 2299 def _get_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2300 """Entry for comment extraction"""
2d6659b9 2301 def _real_comment_extract(contents):
aae16f6e 2302 renderer = next((
2303 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2304 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2305 yield from self._comment_entries(renderer, ytcfg, video_id)
99e9e001 2306
a2160aa4 2307 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
65524694 2308 # Force English regardless of account setting to prevent parsing issues
2309 # See: https://github.com/yt-dlp/yt-dlp/issues/532
2310 ytcfg = copy.deepcopy(ytcfg)
2311 traverse_obj(
2312 ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
a2160aa4 2313 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
a1c5d2ca 2314
109dd3b2 2315 @staticmethod
99e9e001 2316 def _get_checkok_params():
2317 return {'contentCheckOk': True, 'racyCheckOk': True}
2318
2319 @classmethod
2320 def _generate_player_context(cls, sts=None):
109dd3b2 2321 context = {
2322 'html5Preference': 'HTML5_PREF_WANTS',
2323 }
2324 if sts is not None:
2325 context['signatureTimestamp'] = sts
2326 return {
2327 'playbackContext': {
2328 'contentPlaybackContext': context
a1a7907b 2329 },
99e9e001 2330 **cls._get_checkok_params()
109dd3b2 2331 }
2332
e7e94f2a
D
2333 @staticmethod
2334 def _is_agegated(player_response):
2335 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 2336 return True
e7e94f2a
D
2337
2338 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2339 AGE_GATE_REASONS = (
2340 'confirm your age', 'age-restricted', 'inappropriate', # reason
2341 'age_verification_required', 'age_check_required', # status
2342 )
2343 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2344
2345 @staticmethod
2346 def _is_unplayable(player_response):
2347 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 2348
99e9e001 2349 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
109dd3b2 2350
11f9be09 2351 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2352 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 2353 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 2354 headers = self.generate_api_headers(
99e9e001 2355 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 2356
11f9be09 2357 yt_query = {'videoId': video_id}
2358 yt_query.update(self._generate_player_context(sts))
2359 return self._extract_response(
2360 item_id=video_id, ep='player', query=yt_query,
379e44ed 2361 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 2362 default_client=client,
11f9be09 2363 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2364 ) or None
2365
11f9be09 2366 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 2367 requested_clients = []
d0d012d4 2368 default = ['android', 'web']
000c15a4 2369 allowed_clients = sorted(
2370 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2371 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 2372 for client in self._configuration_arg('player_client'):
2373 if client in allowed_clients:
2374 requested_clients.append(client)
d0d012d4 2375 elif client == 'default':
2376 requested_clients.extend(default)
b4c055ba 2377 elif client == 'all':
2378 requested_clients.extend(allowed_clients)
2379 else:
2380 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 2381 if not requested_clients:
d0d012d4 2382 requested_clients = default
cf7e015f 2383
11f9be09 2384 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2385 requested_clients.extend(
e7e94f2a 2386 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 2387
11f9be09 2388 return orderedSet(requested_clients)
cf7e015f 2389
c0bc527b
M
2390 def _extract_player_ytcfg(self, client, video_id):
2391 url = {
2392 'web_music': 'https://music.youtube.com',
2393 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2394 }.get(client)
2395 if not url:
2396 return {}
2397 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2398 return self.extract_ytcfg(video_id, webpage) or {}
2399
99e9e001 2400 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
11f9be09 2401 initial_pr = None
2402 if webpage:
2403 initial_pr = self._extract_yt_initial_variable(
2404 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2405 video_id, 'initial player response')
6b09401b 2406
c0bc527b
M
2407 original_clients = clients
2408 clients = clients[::-1]
b6de707d 2409 prs = []
e7e94f2a
D
2410
2411 def append_client(client_name):
2412 if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2413 clients.append(client_name)
2414
379e44ed 2415 # Android player_response does not have microFormats which are needed for
2416 # extraction of some data. So we return the initial_pr with formats
2417 # stripped out even if not requested by the user
2418 # See: https://github.com/yt-dlp/yt-dlp/issues/501
379e44ed 2419 if initial_pr:
2420 pr = dict(initial_pr)
2421 pr['streamingData'] = None
b6de707d 2422 prs.append(pr)
379e44ed 2423
2424 last_error = None
b6de707d 2425 tried_iframe_fallback = False
2426 player_url = None
c0bc527b
M
2427 while clients:
2428 client = clients.pop()
11f9be09 2429 player_ytcfg = master_ytcfg if client == 'web' else {}
c0bc527b
M
2430 if 'configs' not in self._configuration_arg('player_skip'):
2431 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
c0bc527b 2432
b6de707d 2433 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2434 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2435 if 'js' in self._configuration_arg('player_skip'):
2436 require_js_player = False
2437 player_url = None
2438
2439 if not player_url and not tried_iframe_fallback and require_js_player:
2440 player_url = self._download_player_url(video_id)
2441 tried_iframe_fallback = True
2442
379e44ed 2443 try:
2444 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
99e9e001 2445 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
379e44ed 2446 except ExtractorError as e:
2447 if last_error:
2448 self.report_warning(last_error)
2449 last_error = e
2450 continue
2451
11f9be09 2452 if pr:
b6de707d 2453 prs.append(pr)
c0bc527b 2454
e7e94f2a 2455 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
99e9e001 2456 if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
e7e94f2a
D
2457 append_client(client.replace('_agegate', '_creator'))
2458 elif self._is_agegated(pr):
2459 append_client(f'{client}_agegate')
c0bc527b 2460
379e44ed 2461 if last_error:
b6de707d 2462 if not len(prs):
379e44ed 2463 raise last_error
2464 self.report_warning(last_error)
b6de707d 2465 return prs, player_url
11f9be09 2466
2467 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
a0bb6ce5 2468 itags, stream_ids = {}, []
2a9c6dcd 2469 itag_qualities, res_qualities = {}, {}
d3fc8074 2470 q = qualities([
2a9c6dcd 2471 # Normally tiny is the smallest video-only formats. But
2472 # audio-only formats with unknown quality may get tagged as tiny
2473 'tiny',
2474 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 2475 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2476 ])
11f9be09 2477 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 2478
545cc85d 2479 for fmt in streaming_formats:
2480 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2481 continue
321bf820 2482
cc2db878 2483 itag = str_or_none(fmt.get('itag'))
9297939e 2484 audio_track = fmt.get('audioTrack') or {}
2485 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2486 if stream_id in stream_ids:
2487 continue
2488
cc2db878 2489 quality = fmt.get('quality')
2a9c6dcd 2490 height = int_or_none(fmt.get('height'))
d3fc8074 2491 if quality == 'tiny' or not quality:
2492 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 2493 # The 3gp format (17) in android client has a quality of "small",
2494 # but is actually worse than other formats
2495 if itag == '17':
2496 quality = 'tiny'
2497 if quality:
2498 if itag:
2499 itag_qualities[itag] = quality
2500 if height:
2501 res_qualities[height] = quality
cc2db878 2502 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2503 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2504 # number of fragment that would subsequently requested with (`&sq=N`)
2505 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2506 continue
2507
545cc85d 2508 fmt_url = fmt.get('url')
2509 if not fmt_url:
2510 sc = compat_parse_qs(fmt.get('signatureCipher'))
2511 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2512 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2513 if not (sc and fmt_url and encrypted_sig):
2514 continue
545cc85d 2515 if not player_url:
201e9eaa 2516 continue
545cc85d 2517 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2518 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2519 fmt_url += '&' + sp + '=' + signature
2520
404f611f 2521 query = parse_qs(fmt_url)
2522 throttled = False
b2916526 2523 if query.get('n'):
404f611f 2524 try:
2525 fmt_url = update_url_query(fmt_url, {
2526 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
2527 except ExtractorError as e:
aa9369a2 2528 self.report_warning(
2529 f'nsig extraction failed: You may experience throttling for some formats\n'
2530 f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
404f611f 2531 throttled = True
2532
545cc85d 2533 if itag:
a0bb6ce5 2534 itags[itag] = 'https'
9297939e 2535 stream_ids.append(stream_id)
2536
cc2db878 2537 tbr = float_or_none(
2538 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2539 dct = {
2540 'asr': int_or_none(fmt.get('audioSampleRate')),
2541 'filesize': int_or_none(fmt.get('contentLength')),
2542 'format_id': itag,
34921b43 2543 'format_note': join_nonempty(
26e8e044 2544 '%s%s' % (audio_track.get('displayName') or '',
2545 ' (default)' if audio_track.get('audioIsDefault') else ''),
404f611f 2546 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
34921b43 2547 throttled and 'THROTTLED', delim=', '),
c18d4482 2548 'source_preference': -10 if throttled else -1,
a4211baf 2549 'fps': int_or_none(fmt.get('fps')) or None,
2a9c6dcd 2550 'height': height,
dca3ff4a 2551 'quality': q(quality),
cc2db878 2552 'tbr': tbr,
545cc85d 2553 'url': fmt_url,
2a9c6dcd 2554 'width': int_or_none(fmt.get('width')),
0fb983f6 2555 'language': audio_track.get('id', '').split('.')[0],
26e8e044 2556 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
545cc85d 2557 }
60bdb7bd 2558 mime_mobj = re.match(
2559 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2560 if mime_mobj:
2561 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2562 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 2563 no_audio = dct.get('acodec') == 'none'
2564 no_video = dct.get('vcodec') == 'none'
2565 if no_audio:
2566 dct['vbr'] = tbr
2567 if no_video:
2568 dct['abr'] = tbr
2569 if no_audio or no_video:
545cc85d 2570 dct['downloader_options'] = {
2571 # Youtube throttles chunks >~10M
2572 'http_chunk_size': 10485760,
bf1317d2 2573 }
7c60c33e 2574 if dct.get('ext'):
2575 dct['container'] = dct['ext'] + '_dash'
11f9be09 2576 yield dct
545cc85d 2577
4bb6b02f 2578 skip_manifests = self._configuration_arg('skip')
57015a4a 2579 get_dash = (
2580 (not is_live or self._configuration_arg('include_live_dash'))
2581 and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
5d3a0e79 2582 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2583
a0bb6ce5 2584 def process_manifest_format(f, proto, itag):
2585 if itag in itags:
2586 if itags[itag] == proto or f'{itag}-{proto}' in itags:
2587 return False
2588 itag = f'{itag}-{proto}'
2589 if itag:
2590 f['format_id'] = itag
2591 itags[itag] = proto
2592
2593 f['quality'] = next((
2594 q(qdict[val])
e339d25a 2595 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
a0bb6ce5 2596 if val in qdict), -1)
2597 return True
2a9c6dcd 2598
11f9be09 2599 for sd in streaming_data:
5d3a0e79 2600 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2601 if hls_manifest_url:
2a9c6dcd 2602 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
a0bb6ce5 2603 if process_manifest_format(f, 'hls', self._search_regex(
2604 r'/itag/(\d+)', f['url'], 'itag', default=None)):
2605 yield f
545cc85d 2606
5d3a0e79 2607 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2608 if dash_manifest_url:
2a9c6dcd 2609 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
a0bb6ce5 2610 if process_manifest_format(f, 'dash', f['format_id']):
2611 f['filesize'] = int_or_none(self._search_regex(
2612 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
2613 yield f
11f9be09 2614
720c3099 2615 def _extract_storyboard(self, player_responses, duration):
2616 spec = get_first(
2617 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
2618 if not spec:
2619 return
2620 base_url = spec.pop()
2621 L = len(spec) - 1
2622 for i, args in enumerate(spec):
2623 args = args.split('#')
2624 counts = list(map(int_or_none, args[:5]))
2625 if len(args) != 8 or not all(counts):
2626 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
2627 continue
2628 width, height, frame_count, cols, rows = counts
2629 N, sigh = args[6:]
2630
2631 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
2632 fragment_count = frame_count / (cols * rows)
2633 fragment_duration = duration / fragment_count
2634 yield {
2635 'format_id': f'sb{i}',
2636 'format_note': 'storyboard',
2637 'ext': 'mhtml',
2638 'protocol': 'mhtml',
2639 'acodec': 'none',
2640 'vcodec': 'none',
2641 'url': url,
2642 'width': width,
2643 'height': height,
2644 'fragments': [{
2645 'path': url.replace('$M', str(j)),
2646 'duration': min(fragment_duration, duration - (j * fragment_duration)),
2647 } for j in range(math.ceil(fragment_count))],
2648 }
2649
11f9be09 2650 def _real_extract(self, url):
2651 url, smuggled_data = unsmuggle_url(url, {})
2652 video_id = self._match_id(url)
2653
2654 base_url = self.http_scheme() + '//www.youtube.com/'
2655 webpage_url = base_url + 'watch?v=' + video_id
b6de707d 2656 webpage = None
2657 if 'webpage' not in self._configuration_arg('player_skip'):
2658 webpage = self._download_webpage(
2659 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
11f9be09 2660
2661 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 2662
b6de707d 2663 player_responses, player_url = self._extract_player_responses(
11f9be09 2664 self._get_requested_clients(url, smuggled_data),
99e9e001 2665 video_id, webpage, master_ytcfg)
11f9be09 2666
11f9be09 2667 playability_statuses = traverse_obj(
2668 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2669
2670 trailer_video_id = get_first(
2671 playability_statuses,
2672 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2673 expected_type=str)
2674 if trailer_video_id:
2675 return self.url_result(
2676 trailer_video_id, self.ie_key(), trailer_video_id)
2677
2678 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2679 if webpage else (lambda x: None))
2680
2681 video_details = traverse_obj(
2682 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2683 microformats = traverse_obj(
2684 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2685 expected_type=dict, default=[])
2686 video_title = (
2687 get_first(video_details, 'title')
2688 or self._get_text(microformats, (..., 'title'))
2689 or search_meta(['og:title', 'twitter:title', 'title']))
2690 video_description = get_first(video_details, 'shortDescription')
2691
d89257f3 2692 multifeed_metadata_list = get_first(
2693 player_responses,
2694 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2695 expected_type=str)
2696 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
2697 if self.get_param('noplaylist'):
11f9be09 2698 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
d89257f3 2699 else:
2700 entries = []
2701 feed_ids = []
2702 for feed in multifeed_metadata_list.split(','):
2703 # Unquote should take place before split on comma (,) since textual
2704 # fields may contain comma as well (see
2705 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2706 feed_data = compat_parse_qs(
2707 compat_urllib_parse_unquote_plus(feed))
2708
2709 def feed_entry(name):
2710 return try_get(
2711 feed_data, lambda x: x[name][0], compat_str)
2712
2713 feed_id = feed_entry('id')
2714 if not feed_id:
2715 continue
2716 feed_title = feed_entry('title')
2717 title = video_title
2718 if feed_title:
2719 title += ' (%s)' % feed_title
2720 entries.append({
2721 '_type': 'url_transparent',
2722 'ie_key': 'Youtube',
2723 'url': smuggle_url(
2724 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2725 {'force_singlefeed': True}),
2726 'title': title,
2727 })
2728 feed_ids.append(feed_id)
2729 self.to_screen(
2730 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2731 % (', '.join(feed_ids), video_id))
2732 return self.playlist_result(
2733 entries, video_id, video_title, video_description)
11f9be09 2734
7ea65411 2735 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
11f9be09 2736 is_live = get_first(video_details, 'isLive')
7ea65411 2737 if is_live is None:
2738 is_live = get_first(live_broadcast_details, 'isLiveNow')
11f9be09 2739
2740 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2741 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
bf1317d2 2742
545cc85d 2743 if not formats:
11f9be09 2744 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 2745 self.report_drm(video_id)
11f9be09 2746 pemr = get_first(
2747 playability_statuses,
2748 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2749 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2750 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 2751 if subreason:
545cc85d 2752 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 2753 countries = get_first(microformats, 'availableCountries')
545cc85d 2754 if not countries:
2755 regions_allowed = search_meta('regionsAllowed')
2756 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2757 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 2758 reason += f'. {subreason}'
545cc85d 2759 if reason:
b7da73eb 2760 self.raise_no_formats(reason, expected=True)
bf1317d2 2761
11f9be09 2762 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 2763 if not keywords and webpage:
2764 keywords = [
2765 unescapeHTML(m.group('content'))
2766 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2767 for keyword in keywords:
2768 if keyword.startswith('yt:stretch='):
201c1459 2769 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2770 if mobj:
2771 # NB: float is intentional for forcing float division
2772 w, h = (float(v) for v in mobj.groups())
2773 if w > 0 and h > 0:
2774 ratio = w / h
2775 for f in formats:
2776 if f.get('vcodec') != 'none':
2777 f['stretched_ratio'] = ratio
2778 break
6449cd80 2779
545cc85d 2780 thumbnails = []
11f9be09 2781 thumbnail_dicts = traverse_obj(
2782 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2783 expected_type=dict, default=[])
2784 for thumbnail in thumbnail_dicts:
2785 thumbnail_url = thumbnail.get('url')
2786 if not thumbnail_url:
2787 continue
2788 # Sometimes youtube gives a wrong thumbnail URL. See:
2789 # https://github.com/yt-dlp/yt-dlp/issues/233
2790 # https://github.com/ytdl-org/youtube-dl/issues/28023
2791 if 'maxresdefault' in thumbnail_url:
2792 thumbnail_url = thumbnail_url.split('?')[0]
2793 thumbnails.append({
2794 'url': thumbnail_url,
2795 'height': int_or_none(thumbnail.get('height')),
2796 'width': int_or_none(thumbnail.get('width')),
2797 })
ff2751ac 2798 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2799 if thumbnail_url:
2800 thumbnails.append({
2801 'url': thumbnail_url,
ff2751ac 2802 })
fccf5021 2803 original_thumbnails = thumbnails.copy()
2804
0ba692ac 2805 # The best resolution thumbnails sometimes does not appear in the webpage
2806 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 2807 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
e820fbaa 2808 thumbnail_names = [
2809 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
cca80fe6 2810 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2811 'mqdefault', 'mq1', 'mq2', 'mq3',
2812 'default', '1', '2', '3'
2813 ]
cca80fe6 2814 n_thumbnail_names = len(thumbnail_names)
0ba692ac 2815 thumbnails.extend({
2816 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2817 video_id=video_id, name=name, ext=ext,
2818 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 2819 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 2820 for thumb in thumbnails:
cca80fe6 2821 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 2822 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 2823 self._remove_duplicate_formats(thumbnails)
fccf5021 2824 self._downloader._sort_thumbnails(original_thumbnails)
545cc85d 2825
7ea65411 2826 category = get_first(microformats, 'category') or search_meta('genre')
2827 channel_id = str_or_none(
2828 get_first(video_details, 'channelId')
2829 or get_first(microformats, 'externalChannelId')
2830 or search_meta('channelId'))
2831 duration = int_or_none(
2832 get_first(video_details, 'lengthSeconds')
2833 or get_first(microformats, 'lengthSeconds')
2834 or parse_duration(search_meta('duration'))) or None
2835 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2836
2837 live_content = get_first(video_details, 'isLiveContent')
2838 is_upcoming = get_first(video_details, 'isUpcoming')
2839 if is_live is None:
2840 if is_upcoming or live_content is False:
2841 is_live = False
2842 if is_upcoming is None and (live_content or is_live):
2843 is_upcoming = False
2844 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2845 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2846 if not duration and live_endtime and live_starttime:
2847 duration = live_endtime - live_starttime
2848
720c3099 2849 formats.extend(self._extract_storyboard(player_responses, duration))
2850
2851 # Source is given priority since formats that throttle are given lower source_preference
2852 # When throttling issue is fully fixed, remove this
2853 self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
2854
545cc85d 2855 info = {
2856 'id': video_id,
2857 'title': self._live_title(video_title) if is_live else video_title,
2858 'formats': formats,
2859 'thumbnails': thumbnails,
fccf5021 2860 # The best thumbnail that we are sure exists. Prevents unnecessary
2861 # URL checking if user don't care about getting the best possible thumbnail
2862 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
545cc85d 2863 'description': video_description,
2864 'upload_date': unified_strdate(
11f9be09 2865 get_first(microformats, 'uploadDate')
545cc85d 2866 or search_meta('uploadDate')),
11f9be09 2867 'uploader': get_first(video_details, 'author'),
545cc85d 2868 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2869 'uploader_url': owner_profile_url,
2870 'channel_id': channel_id,
11f9be09 2871 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
545cc85d 2872 'duration': duration,
2873 'view_count': int_or_none(
11f9be09 2874 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 2875 or search_meta('interactionCount')),
11f9be09 2876 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 2877 'age_limit': 18 if (
11f9be09 2878 get_first(microformats, 'isFamilySafe') is False
545cc85d 2879 or search_meta('isFamilyFriendly') == 'false'
2880 or search_meta('og:restrictions:age') == '18+') else 0,
2881 'webpage_url': webpage_url,
2882 'categories': [category] if category else None,
2883 'tags': keywords,
11f9be09 2884 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 2885 'is_live': is_live,
2886 'was_live': (False if is_live or is_upcoming or live_content is False
2887 else None if is_live is None or is_upcoming is None
2888 else live_content),
2889 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2890 'release_timestamp': live_starttime,
545cc85d 2891 }
b477fc13 2892
3944e7af 2893 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 2894 if pctr:
ecdc9049 2895 def get_lang_code(track):
2896 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
2897 or track.get('languageCode'))
2898
2899 # Converted into dicts to remove duplicates
2900 captions = {
2901 get_lang_code(sub): sub
2902 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2903 translation_languages = {
2904 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
2905 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2906
774d79cc 2907 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2908 lang_subs = container.setdefault(lang_code, [])
545cc85d 2909 for fmt in self._SUBTITLE_FORMATS:
2910 query.update({
2911 'fmt': fmt,
2912 })
2913 lang_subs.append({
2914 'ext': fmt,
2915 'url': update_url_query(base_url, query),
774d79cc 2916 'name': sub_name,
545cc85d 2917 })
7e72694b 2918
ecdc9049 2919 subtitles, automatic_captions = {}, {}
2920 for lang_code, caption_track in captions.items():
2921 base_url = caption_track.get('baseUrl')
545cc85d 2922 if not base_url:
2923 continue
ecdc9049 2924 lang_name = self._get_text(caption_track, 'name', max_runs=1)
545cc85d 2925 if caption_track.get('kind') != 'asr':
545cc85d 2926 if not lang_code:
2927 continue
2928 process_language(
ecdc9049 2929 subtitles, base_url, lang_code, lang_name, {})
2930 if not caption_track.get('isTranslatable'):
2931 continue
3944e7af 2932 for trans_code, trans_name in translation_languages.items():
2933 if not trans_code:
545cc85d 2934 continue
ecdc9049 2935 if caption_track.get('kind') != 'asr':
2936 trans_code += f'-{lang_code}'
2937 trans_name += format_field(lang_name, template=' from %s')
545cc85d 2938 process_language(
ecdc9049 2939 automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
2940 info['automatic_captions'] = automatic_captions
2941 info['subtitles'] = subtitles
7e72694b 2942
545cc85d 2943 parsed_url = compat_urllib_parse_urlparse(url)
2944 for component in [parsed_url.fragment, parsed_url.query]:
2945 query = compat_parse_qs(component)
2946 for k, v in query.items():
2947 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2948 d_k += '_time'
2949 if d_k not in info and k in s_ks:
2950 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2951
2952 # Youtube Music Auto-generated description
822b9d9c 2953 if video_description:
38d70284 2954 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2955 if mobj:
822b9d9c
RA
2956 release_year = mobj.group('release_year')
2957 release_date = mobj.group('release_date')
2958 if release_date:
2959 release_date = release_date.replace('-', '')
2960 if not release_year:
545cc85d 2961 release_year = release_date[:4]
2962 info.update({
2963 'album': mobj.group('album'.strip()),
2964 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2965 'track': mobj.group('track').strip(),
2966 'release_date': release_date,
cc2db878 2967 'release_year': int_or_none(release_year),
545cc85d 2968 })
7e72694b 2969
545cc85d 2970 initial_data = None
2971 if webpage:
2972 initial_data = self._extract_yt_initial_variable(
2973 webpage, self._YT_INITIAL_DATA_RE, video_id,
2974 'yt initial data')
2975 if not initial_data:
99e9e001 2976 query = {'videoId': video_id}
2977 query.update(self._get_checkok_params())
109dd3b2 2978 initial_data = self._extract_response(
2979 item_id=video_id, ep='next', fatal=False,
99e9e001 2980 ytcfg=master_ytcfg, query=query,
2981 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 2982 note='Downloading initial data API JSON')
545cc85d 2983
c60ee3a2 2984 try:
2985 # This will error if there is no livechat
2986 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
ecdc9049 2987 info.setdefault('subtitles', {})['live_chat'] = [{
c60ee3a2 2988 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2989 'video_id': video_id,
2990 'ext': 'json',
f6745c49 2991 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 2992 }]
2993 except (KeyError, IndexError, TypeError):
2994 pass
545cc85d 2995
2996 if initial_data:
7c365c21 2997 info['chapters'] = (
2998 self._extract_chapters_from_json(initial_data, duration)
2999 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3000 or None)
545cc85d 3001
3002 contents = try_get(
3003 initial_data,
3004 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3005 list) or []
3006 for content in contents:
3007 vpir = content.get('videoPrimaryInfoRenderer')
3008 if vpir:
3009 stl = vpir.get('superTitleLink')
3010 if stl:
fe93e2c4 3011 stl = self._get_text(stl)
545cc85d 3012 if try_get(
3013 vpir,
3014 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3015 info['location'] = stl
3016 else:
3017 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3018 if mobj:
3019 info.update({
3020 'series': mobj.group(1),
3021 'season_number': int(mobj.group(2)),
3022 'episode_number': int(mobj.group(3)),
3023 })
3024 for tlb in (try_get(
3025 vpir,
3026 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3027 list) or []):
3028 tbr = tlb.get('toggleButtonRenderer') or {}
3029 for getter, regex in [(
3030 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3031 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3032 lambda x: x['accessibility'],
3033 lambda x: x['accessibilityData']['accessibilityData'],
3034 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3035 label = (try_get(tbr, getter, dict) or {}).get('label')
3036 if label:
3037 mobj = re.match(regex, label)
3038 if mobj:
3039 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3040 break
3041 sbr_tooltip = try_get(
3042 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3043 if sbr_tooltip:
3044 like_count, dislike_count = sbr_tooltip.split(' / ')
3045 info.update({
3046 'like_count': str_to_int(like_count),
3047 'dislike_count': str_to_int(dislike_count),
3048 })
3049 vsir = content.get('videoSecondaryInfoRenderer')
3050 if vsir:
052e1350 3051 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
545cc85d 3052 rows = try_get(
3053 vsir,
3054 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3055 list) or []
3056 multiple_songs = False
3057 for row in rows:
3058 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3059 multiple_songs = True
3060 break
3061 for row in rows:
3062 mrr = row.get('metadataRowRenderer') or {}
3063 mrr_title = mrr.get('title')
3064 if not mrr_title:
3065 continue
052e1350 3066 mrr_title = self._get_text(mrr, 'title')
3067 mrr_contents_text = self._get_text(mrr, ('contents', 0))
545cc85d 3068 if mrr_title == 'License':
3069 info['license'] = mrr_contents_text
3070 elif not multiple_songs:
3071 if mrr_title == 'Album':
3072 info['album'] = mrr_contents_text
3073 elif mrr_title == 'Artist':
3074 info['artist'] = mrr_contents_text
3075 elif mrr_title == 'Song':
3076 info['track'] = mrr_contents_text
3077
3078 fallbacks = {
3079 'channel': 'uploader',
3080 'channel_id': 'uploader_id',
3081 'channel_url': 'uploader_url',
3082 }
3083 for to, frm in fallbacks.items():
3084 if not info.get(to):
3085 info[to] = info.get(frm)
3086
3087 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3088 v = info.get(s_k)
3089 if v:
3090 info[d_k] = v
b84071c0 3091
11f9be09 3092 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3093 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3094 is_membersonly = None
b28f8d24 3095 is_premium = None
c224251a
M
3096 if initial_data and is_private is not None:
3097 is_membersonly = False
b28f8d24 3098 is_premium = False
47193e02 3099 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3100 badge_labels = set()
3101 for content in contents:
3102 if not isinstance(content, dict):
3103 continue
3104 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3105 for badge_label in badge_labels:
3106 if badge_label.lower() == 'members only':
3107 is_membersonly = True
3108 elif badge_label.lower() == 'premium':
3109 is_premium = True
3110 elif badge_label.lower() == 'unlisted':
3111 is_unlisted = True
c224251a 3112
c224251a
M
3113 info['availability'] = self._availability(
3114 is_private=is_private,
b28f8d24 3115 needs_premium=is_premium,
c224251a
M
3116 needs_subscription=is_membersonly,
3117 needs_auth=info['age_limit'] >= 18,
3118 is_unlisted=None if is_private is None else is_unlisted)
3119
a2160aa4 3120 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3121
11f9be09 3122 self.mark_watched(video_id, player_responses)
d77ab8e2 3123
545cc85d 3124 return info
c5e8d7af 3125
a61fd4cf 3126
a6213a49 3127class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
5f6a1245 3128
a6213a49 3129 def _extract_channel_id(self, webpage):
3130 channel_id = self._html_search_meta(
3131 'channelId', webpage, 'channel id', default=None)
3132 if channel_id:
3133 return channel_id
3134 channel_url = self._html_search_meta(
3135 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3136 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3137 'twitter:app:url:googleplay'), webpage, 'channel url')
3138 return self._search_regex(
3139 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3140 channel_url, 'channel id')
15f6397c 3141
8bdd16b4 3142 @staticmethod
cd7c66cf 3143 def _extract_basic_item_renderer(item):
3144 # Modified from _extract_grid_item_renderer
201c1459 3145 known_basic_renderers = (
3146 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3147 )
3148 for key, renderer in item.items():
201c1459 3149 if not isinstance(renderer, dict):
cd7c66cf 3150 continue
201c1459 3151 elif key in known_basic_renderers:
3152 return renderer
3153 elif key.startswith('grid') and key.endswith('Renderer'):
3154 return renderer
8bdd16b4 3155
8bdd16b4 3156 def _grid_entries(self, grid_renderer):
3157 for item in grid_renderer['items']:
3158 if not isinstance(item, dict):
39b62db1 3159 continue
cd7c66cf 3160 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3161 if not isinstance(renderer, dict):
3162 continue
052e1350 3163 title = self._get_text(renderer, 'title')
fe93e2c4 3164
8bdd16b4 3165 # playlist
3166 playlist_id = renderer.get('playlistId')
3167 if playlist_id:
3168 yield self.url_result(
3169 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3170 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3171 video_title=title)
201c1459 3172 continue
8bdd16b4 3173 # video
3174 video_id = renderer.get('videoId')
3175 if video_id:
3176 yield self._extract_video(renderer)
201c1459 3177 continue
8bdd16b4 3178 # channel
3179 channel_id = renderer.get('channelId')
3180 if channel_id:
8bdd16b4 3181 yield self.url_result(
3182 'https://www.youtube.com/channel/%s' % channel_id,
3183 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3184 continue
3185 # generic endpoint URL support
3186 ep_url = urljoin('https://www.youtube.com/', try_get(
3187 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3188 compat_str))
3189 if ep_url:
3190 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3191 if ie.suitable(ep_url):
3192 yield self.url_result(
3193 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3194 break
8bdd16b4 3195
3d3dddc9 3196 def _shelf_entries_from_content(self, shelf_renderer):
3197 content = shelf_renderer.get('content')
3198 if not isinstance(content, dict):
8bdd16b4 3199 return
cd7c66cf 3200 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3201 if renderer:
3202 # TODO: add support for nested playlists so each shelf is processed
3203 # as separate playlist
3204 # TODO: this includes only first N items
3205 for entry in self._grid_entries(renderer):
3206 yield entry
3207 renderer = content.get('horizontalListRenderer')
3208 if renderer:
3209 # TODO
3210 pass
8bdd16b4 3211
29f7c58a 3212 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3213 ep = try_get(
3214 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3215 compat_str)
3216 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3217 if shelf_url:
29f7c58a 3218 # Skipping links to another channels, note that checking for
3219 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3220 # will not work
3221 if skip_channels and '/channels?' in shelf_url:
3222 return
052e1350 3223 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 3224 yield self.url_result(shelf_url, video_title=title)
3225 # Shelf may not contain shelf URL, fallback to extraction from content
3226 for entry in self._shelf_entries_from_content(shelf_renderer):
3227 yield entry
c5e8d7af 3228
8bdd16b4 3229 def _playlist_entries(self, video_list_renderer):
3230 for content in video_list_renderer['contents']:
3231 if not isinstance(content, dict):
3232 continue
3233 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3234 if not isinstance(renderer, dict):
3235 continue
3236 video_id = renderer.get('videoId')
3237 if not video_id:
3238 continue
3239 yield self._extract_video(renderer)
07aeced6 3240
3462ffa8 3241 def _rich_entries(self, rich_grid_renderer):
3242 renderer = try_get(
70d5c17b 3243 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3244 video_id = renderer.get('videoId')
3245 if not video_id:
3246 return
3247 yield self._extract_video(renderer)
3248
8bdd16b4 3249 def _video_entry(self, video_renderer):
3250 video_id = video_renderer.get('videoId')
3251 if video_id:
3252 return self._extract_video(video_renderer)
dacb3a86 3253
8bdd16b4 3254 def _post_thread_entries(self, post_thread_renderer):
3255 post_renderer = try_get(
3256 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3257 if not post_renderer:
3258 return
3259 # video attachment
3260 video_renderer = try_get(
895b0931 3261 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3262 video_id = video_renderer.get('videoId')
3263 if video_id:
3264 entry = self._extract_video(video_renderer)
8bdd16b4 3265 if entry:
3266 yield entry
895b0931 3267 # playlist attachment
3268 playlist_id = try_get(
3269 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3270 if playlist_id:
3271 yield self.url_result(
e28f1c0a 3272 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3273 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3274 # inline video links
3275 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3276 for run in runs:
3277 if not isinstance(run, dict):
3278 continue
3279 ep_url = try_get(
3280 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3281 if not ep_url:
3282 continue
3283 if not YoutubeIE.suitable(ep_url):
3284 continue
3285 ep_video_id = YoutubeIE._match_id(ep_url)
3286 if video_id == ep_video_id:
3287 continue
895b0931 3288 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3289
8bdd16b4 3290 def _post_thread_continuation_entries(self, post_thread_continuation):
3291 contents = post_thread_continuation.get('contents')
3292 if not isinstance(contents, list):
3293 return
3294 for content in contents:
3295 renderer = content.get('backstagePostThreadRenderer')
3296 if not isinstance(renderer, dict):
3297 continue
3298 for entry in self._post_thread_entries(renderer):
3299 yield entry
07aeced6 3300
39ed931e 3301 r''' # unused
3302 def _rich_grid_entries(self, contents):
3303 for content in contents:
3304 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3305 if video_renderer:
3306 entry = self._video_entry(video_renderer)
3307 if entry:
3308 yield entry
3309 '''
a6213a49 3310 def _extract_entries(self, parent_renderer, continuation_list):
3311 # continuation_list is modified in-place with continuation_list = [continuation_token]
3312 continuation_list[:] = [None]
3313 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3314 for content in contents:
3315 if not isinstance(content, dict):
3316 continue
3317 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3318 if not is_renderer:
3319 renderer = content.get('richItemRenderer')
3320 if renderer:
3321 for entry in self._rich_entries(renderer):
3322 yield entry
3323 continuation_list[0] = self._extract_continuation(parent_renderer)
3324 continue
3325 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3326 for isr_content in isr_contents:
3327 if not isinstance(isr_content, dict):
8bdd16b4 3328 continue
69184e41 3329
a6213a49 3330 known_renderers = {
3331 'playlistVideoListRenderer': self._playlist_entries,
3332 'gridRenderer': self._grid_entries,
3333 'shelfRenderer': lambda x: self._shelf_entries(x),
3334 'backstagePostThreadRenderer': self._post_thread_entries,
3335 'videoRenderer': lambda x: [self._video_entry(x)],
a61fd4cf 3336 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
3337 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
a6213a49 3338 }
3339 for key, renderer in isr_content.items():
3340 if key not in known_renderers:
3341 continue
3342 for entry in known_renderers[key](renderer):
3343 if entry:
3344 yield entry
3345 continuation_list[0] = self._extract_continuation(renderer)
3346 break
70d5c17b 3347
3348 if not continuation_list[0]:
a6213a49 3349 continuation_list[0] = self._extract_continuation(is_renderer)
3462ffa8 3350
a6213a49 3351 if not continuation_list[0]:
3352 continuation_list[0] = self._extract_continuation(parent_renderer)
3353
3354 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
3355 continuation_list = [None]
3356 extract_entries = lambda x: self._extract_entries(x, continuation_list)
29f7c58a 3357 tab_content = try_get(tab, lambda x: x['content'], dict)
3358 if not tab_content:
3359 return
3462ffa8 3360 parent_renderer = (
29f7c58a 3361 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3362 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3363 for entry in extract_entries(parent_renderer):
3364 yield entry
3462ffa8 3365 continuation = continuation_list[0]
d069eca7 3366
8bdd16b4 3367 for page_num in itertools.count(1):
3368 if not continuation:
3369 break
99e9e001 3370 headers = self.generate_api_headers(
3371 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 3372 response = self._extract_response(
3373 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3374 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3375 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3376
3377 if not response:
8bdd16b4 3378 break
ac56cf38 3379 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
3380 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
3381 visitor_data = self._extract_visitor_data(response) or visitor_data
ebf1b291 3382
69184e41 3383 known_continuation_renderers = {
3384 'playlistVideoListContinuation': self._playlist_entries,
3385 'gridContinuation': self._grid_entries,
3386 'itemSectionContinuation': self._post_thread_continuation_entries,
3387 'sectionListContinuation': extract_entries, # for feeds
3388 }
8bdd16b4 3389 continuation_contents = try_get(
69184e41 3390 response, lambda x: x['continuationContents'], dict) or {}
3391 continuation_renderer = None
3392 for key, value in continuation_contents.items():
3393 if key not in known_continuation_renderers:
3462ffa8 3394 continue
69184e41 3395 continuation_renderer = value
3396 continuation_list = [None]
3397 for entry in known_continuation_renderers[key](continuation_renderer):
3398 yield entry
3399 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3400 break
3401 if continuation_renderer:
3402 continue
c5e8d7af 3403
a1b535bd 3404 known_renderers = {
3405 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3406 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 3407 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 3408 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3409 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3410 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3411 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3412 }
cce889b9 3413 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3414 continuation_items = try_get(
cce889b9 3415 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3416 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3417 video_items_renderer = None
3418 for key, value in continuation_item.items():
3419 if key not in known_renderers:
8bdd16b4 3420 continue
a1b535bd 3421 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3422 continuation_list = [None]
a1b535bd 3423 for entry in known_renderers[key][0](video_items_renderer):
3424 yield entry
9ba5705a 3425 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3426 break
3427 if video_items_renderer:
3428 continue
8bdd16b4 3429 break
9558dcec 3430
8bdd16b4 3431 @staticmethod
3432 def _extract_selected_tab(tabs):
3433 for tab in tabs:
cd684175 3434 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3435 if renderer.get('selected') is True:
3436 return renderer
2b3c2546 3437 else:
8bdd16b4 3438 raise ExtractorError('Unable to find selected tab')
b82f815f 3439
47193e02 3440 @classmethod
3441 def _extract_uploader(cls, data):
8bdd16b4 3442 uploader = {}
47193e02 3443 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3444 owner = try_get(
3445 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3446 if owner:
3447 uploader['uploader'] = owner.get('text')
3448 uploader['uploader_id'] = try_get(
3449 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3450 uploader['uploader_url'] = urljoin(
3451 'https://www.youtube.com/',
3452 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3453 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3454
ac56cf38 3455 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
b60419c5 3456 playlist_id = title = description = channel_url = channel_name = channel_id = None
ac56cf38 3457 thumbnails_list = []
3458 tags = []
b60419c5 3459
8bdd16b4 3460 selected_tab = self._extract_selected_tab(tabs)
3461 renderer = try_get(
3462 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3463 if renderer:
b60419c5 3464 channel_name = renderer.get('title')
3465 channel_url = renderer.get('channelUrl')
3466 channel_id = renderer.get('externalId')
39ed931e 3467 else:
64c0d954 3468 renderer = try_get(
3469 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3470
8bdd16b4 3471 if renderer:
3472 title = renderer.get('title')
ecc97af3 3473 description = renderer.get('description', '')
b60419c5 3474 playlist_id = channel_id
3475 tags = renderer.get('keywords', '').split()
3476 thumbnails_list = (
3477 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3478 or try_get(
47193e02 3479 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3480 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
ff84930c 3481 list)
b60419c5 3482 or [])
3483
3484 thumbnails = []
3485 for t in thumbnails_list:
3486 if not isinstance(t, dict):
3487 continue
3488 thumbnail_url = url_or_none(t.get('url'))
3489 if not thumbnail_url:
3490 continue
3491 thumbnails.append({
3492 'url': thumbnail_url,
3493 'width': int_or_none(t.get('width')),
3494 'height': int_or_none(t.get('height')),
3495 })
3462ffa8 3496 if playlist_id is None:
70d5c17b 3497 playlist_id = item_id
3498 if title is None:
39ed931e 3499 title = (
3500 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3501 or playlist_id)
b60419c5 3502 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3503 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3504 metadata = {
3505 'playlist_id': playlist_id,
3506 'playlist_title': title,
3507 'playlist_description': description,
3508 'uploader': channel_name,
3509 'uploader_id': channel_id,
3510 'uploader_url': channel_url,
3511 'thumbnails': thumbnails,
3512 'tags': tags,
3513 }
47193e02 3514 availability = self._extract_availability(data)
3515 if availability:
3516 metadata['availability'] = availability
b60419c5 3517 if not channel_id:
3518 metadata.update(self._extract_uploader(data))
3519 metadata.update({
3520 'channel': metadata['uploader'],
3521 'channel_id': metadata['uploader_id'],
3522 'channel_url': metadata['uploader_url']})
3523 return self.playlist_result(
d069eca7 3524 self._entries(
ac56cf38 3525 selected_tab, playlist_id, ytcfg,
3526 self._extract_account_syncid(ytcfg, data),
3527 self._extract_visitor_data(data, ytcfg)),
b60419c5 3528 **metadata)
73c4ac2c 3529
ac56cf38 3530 def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
3531 first_id = last_id = response = None
2be71994 3532 for page_num in itertools.count(1):
cd7c66cf 3533 videos = list(self._playlist_entries(playlist))
3534 if not videos:
3535 return
2be71994 3536 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3537 if start >= len(videos):
3538 return
3539 for video in videos[start:]:
3540 if video['id'] == first_id:
3541 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3542 return
3543 yield video
3544 first_id = first_id or videos[0]['id']
3545 last_id = videos[-1]['id']
79360d99 3546 watch_endpoint = try_get(
3547 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
ac56cf38 3548 headers = self.generate_api_headers(
3549 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3550 visitor_data=self._extract_visitor_data(response, data, ytcfg))
79360d99 3551 query = {
3552 'playlistId': playlist_id,
3553 'videoId': watch_endpoint.get('videoId') or last_id,
3554 'index': watch_endpoint.get('index') or len(videos),
3555 'params': watch_endpoint.get('params') or 'OAE%3D'
3556 }
3557 response = self._extract_response(
3558 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 3559 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 3560 check_get_keys='contents'
3561 )
cd7c66cf 3562 playlist = try_get(
79360d99 3563 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3564
ac56cf38 3565 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
8bdd16b4 3566 title = playlist.get('title') or try_get(
3567 data, lambda x: x['titleText']['simpleText'], compat_str)
3568 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3569
3570 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3571 playlist_url = urljoin(url, try_get(
3572 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3573 compat_str))
3574 if playlist_url and playlist_url != url:
3575 return self.url_result(
3576 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3577 video_title=title)
cd7c66cf 3578
8bdd16b4 3579 return self.playlist_result(
ac56cf38 3580 self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
cd7c66cf 3581 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3582
47193e02 3583 def _extract_availability(self, data):
3584 """
3585 Gets the availability of a given playlist/tab.
3586 Note: Unless YouTube tells us explicitly, we do not assume it is public
3587 @param data: response
3588 """
3589 is_private = is_unlisted = None
3590 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3591 badge_labels = self._extract_badges(renderer)
3592
3593 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3594 privacy_dropdown_entries = try_get(
3595 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3596 for renderer_dict in privacy_dropdown_entries:
3597 is_selected = try_get(
3598 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3599 if not is_selected:
3600 continue
052e1350 3601 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
47193e02 3602 if label:
3603 badge_labels.add(label.lower())
3604 break
3605
3606 for badge_label in badge_labels:
3607 if badge_label == 'unlisted':
3608 is_unlisted = True
3609 elif badge_label == 'private':
3610 is_private = True
3611 elif badge_label == 'public':
3612 is_unlisted = is_private = False
3613 return self._availability(is_private, False, False, False, is_unlisted)
3614
3615 @staticmethod
3616 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
3617 sidebar_renderer = try_get(
3618 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
3619 for item in sidebar_renderer:
3620 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
3621 if renderer:
3622 return renderer
3623
ac56cf38 3624 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
358de58c 3625 """
3626 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3627 """
5d342002 3628 browse_id = params = None
47193e02 3629 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
3630 if not renderer:
3631 return
3632 menu_renderer = try_get(
3633 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3634 for menu_item in menu_renderer:
3635 if not isinstance(menu_item, dict):
358de58c 3636 continue
47193e02 3637 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3638 text = try_get(
3639 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3640 if not text or text.lower() != 'show unavailable videos':
3641 continue
3642 browse_endpoint = try_get(
3643 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3644 browse_id = browse_endpoint.get('browseId')
3645 params = browse_endpoint.get('params')
3646 break
5d342002 3647
11f9be09 3648 headers = self.generate_api_headers(
99e9e001 3649 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
ac56cf38 3650 visitor_data=self._extract_visitor_data(data, ytcfg))
47193e02 3651 query = {
3652 'params': params or 'wgYCCAA=',
3653 'browseId': browse_id or 'VL%s' % item_id
3654 }
3655 return self._extract_response(
3656 item_id=item_id, headers=headers, query=query,
fe93e2c4 3657 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 3658 note='Downloading API JSON with unavailable videos')
358de58c 3659
ac56cf38 3660 def _extract_webpage(self, url, item_id, fatal=True):
a06916d9 3661 retries = self.get_param('extractor_retries', 3)
62bff2c1 3662 count = -1
ac56cf38 3663 webpage = data = last_error = None
14fdfea9 3664 while count < retries:
62bff2c1 3665 count += 1
14fdfea9 3666 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3667 # See: https://github.com/yt-dlp/yt-dlp/issues/116
ac56cf38 3668 if last_error:
c705177d 3669 self.report_warning('%s. Retrying ...' % last_error)
ac56cf38 3670 try:
3671 webpage = self._download_webpage(
3672 url, item_id,
3673 note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
3674 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
3675 except ExtractorError as e:
3676 if isinstance(e.cause, network_exceptions):
3677 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
3678 last_error = error_to_compat_str(e.cause or e.msg)
3679 if count < retries:
3680 continue
3681 if fatal:
3682 raise
3683 self.report_warning(error_to_compat_str(e))
14fdfea9 3684 break
ac56cf38 3685 else:
3686 try:
3687 self._extract_and_report_alerts(data)
3688 except ExtractorError as e:
3689 if fatal:
3690 raise
3691 self.report_warning(error_to_compat_str(e))
3692 break
3693
3694 if dict_get(data, ('contents', 'currentVideoEndpoint')):
3695 break
3696
3697 last_error = 'Incomplete yt initial data received'
3698 if count >= retries:
3699 if fatal:
3700 raise ExtractorError(last_error)
3701 self.report_warning(last_error)
3702 break
3703
cd7c66cf 3704 return webpage, data
3705
ac56cf38 3706 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
3707 data = None
3708 if 'webpage' not in self._configuration_arg('skip'):
3709 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
3710 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
3711 if not data:
3712 if not ytcfg and self.is_authenticated:
3713 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
3714 if 'authcheck' not in self._configuration_arg('skip') and fatal:
3715 raise ExtractorError(
3716 msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
3717 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
3718 expected=True)
3719 self.report_warning(msg, only_once=True)
3720 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
3721 return data, ytcfg
3722
3723 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
3724 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
3725 resolve_response = self._extract_response(
3726 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
3727 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
3728 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
3729 for ep_key, ep in endpoints.items():
3730 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
3731 if params:
3732 return self._extract_response(
3733 item_id=item_id, query=params, ep=ep, headers=headers,
3734 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
3735 check_get_keys=('contents', 'currentVideoEndpoint'))
3736 err_note = 'Failed to resolve url (does the playlist exist?)'
3737 if fatal:
3738 raise ExtractorError(err_note, expected=True)
3739 self.report_warning(err_note, item_id)
3740
a6213a49 3741 @staticmethod
3742 def _smuggle_data(entries, data):
3743 for entry in entries:
3744 if data:
3745 entry['url'] = smuggle_url(entry['url'], data)
3746 yield entry
3747
3748 _SEARCH_PARAMS = None
3749
3750 def _search_results(self, query, params=NO_DEFAULT):
3751 data = {'query': query}
3752 if params is NO_DEFAULT:
3753 params = self._SEARCH_PARAMS
3754 if params:
3755 data['params'] = params
a61fd4cf 3756 continuation_list = [None]
a6213a49 3757 for page_num in itertools.count(1):
a61fd4cf 3758 data.update(continuation_list[0] or {})
a6213a49 3759 search = self._extract_response(
3760 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
a61fd4cf 3761 check_get_keys=('contents', 'onResponseReceivedCommands'))
a6213a49 3762 slr_contents = try_get(
3763 search,
3764 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3765 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3766 list)
a61fd4cf 3767 yield from self._extract_entries({'contents': slr_contents}, continuation_list)
3768 if not continuation_list[0]:
a6213a49 3769 break
3770
3771
3772class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
3773 IE_DESC = 'YouTube Tabs'
3774 _VALID_URL = r'''(?x:
3775 https?://
3776 (?:\w+\.)?
3777 (?:
3778 youtube(?:kids)?\.com|
3779 %(invidious)s
3780 )/
3781 (?:
3782 (?P<channel_type>channel|c|user|browse)/|
3783 (?P<not_channel>
3784 feed/|hashtag/|
3785 (?:playlist|watch)\?.*?\blist=
3786 )|
3787 (?!(?:%(reserved_names)s)\b) # Direct URLs
3788 )
3789 (?P<id>[^/?\#&]+)
3790 )''' % {
3791 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
3792 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
3793 }
3794 IE_NAME = 'youtube:tab'
3795
3796 _TESTS = [{
3797 'note': 'playlists, multipage',
3798 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3799 'playlist_mincount': 94,
3800 'info_dict': {
3801 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3802 'title': 'Игорь Клейнер - Playlists',
3803 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3804 'uploader': 'Игорь Клейнер',
3805 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3806 },
3807 }, {
3808 'note': 'playlists, multipage, different order',
3809 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3810 'playlist_mincount': 94,
3811 'info_dict': {
3812 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3813 'title': 'Игорь Клейнер - Playlists',
3814 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3815 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3816 'uploader': 'Игорь Клейнер',
3817 },
3818 }, {
3819 'note': 'playlists, series',
3820 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3821 'playlist_mincount': 5,
3822 'info_dict': {
3823 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3824 'title': '3Blue1Brown - Playlists',
3825 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3826 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3827 'uploader': '3Blue1Brown',
3828 },
3829 }, {
3830 'note': 'playlists, singlepage',
3831 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3832 'playlist_mincount': 4,
3833 'info_dict': {
3834 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3835 'title': 'ThirstForScience - Playlists',
3836 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3837 'uploader': 'ThirstForScience',
3838 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3839 }
3840 }, {
3841 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3842 'only_matching': True,
3843 }, {
3844 'note': 'basic, single video playlist',
3845 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3846 'info_dict': {
3847 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3848 'uploader': 'Sergey M.',
3849 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3850 'title': 'youtube-dl public playlist',
3851 },
3852 'playlist_count': 1,
3853 }, {
3854 'note': 'empty playlist',
3855 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3856 'info_dict': {
3857 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3858 'uploader': 'Sergey M.',
3859 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3860 'title': 'youtube-dl empty playlist',
3861 },
3862 'playlist_count': 0,
3863 }, {
3864 'note': 'Home tab',
3865 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3866 'info_dict': {
3867 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3868 'title': 'lex will - Home',
3869 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3870 'uploader': 'lex will',
3871 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3872 },
3873 'playlist_mincount': 2,
3874 }, {
3875 'note': 'Videos tab',
3876 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3877 'info_dict': {
3878 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3879 'title': 'lex will - Videos',
3880 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3881 'uploader': 'lex will',
3882 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3883 },
3884 'playlist_mincount': 975,
3885 }, {
3886 'note': 'Videos tab, sorted by popular',
3887 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3888 'info_dict': {
3889 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3890 'title': 'lex will - Videos',
3891 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3892 'uploader': 'lex will',
3893 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3894 },
3895 'playlist_mincount': 199,
3896 }, {
3897 'note': 'Playlists tab',
3898 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3899 'info_dict': {
3900 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3901 'title': 'lex will - Playlists',
3902 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3903 'uploader': 'lex will',
3904 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3905 },
3906 'playlist_mincount': 17,
3907 }, {
3908 'note': 'Community tab',
3909 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3910 'info_dict': {
3911 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3912 'title': 'lex will - Community',
3913 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3914 'uploader': 'lex will',
3915 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3916 },
3917 'playlist_mincount': 18,
3918 }, {
3919 'note': 'Channels tab',
3920 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3921 'info_dict': {
3922 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3923 'title': 'lex will - Channels',
3924 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3925 'uploader': 'lex will',
3926 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3927 },
3928 'playlist_mincount': 12,
3929 }, {
3930 'note': 'Search tab',
3931 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3932 'playlist_mincount': 40,
3933 'info_dict': {
3934 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3935 'title': '3Blue1Brown - Search - linear algebra',
3936 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3937 'uploader': '3Blue1Brown',
3938 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3939 },
3940 }, {
3941 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3942 'only_matching': True,
3943 }, {
3944 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3945 'only_matching': True,
3946 }, {
3947 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3948 'only_matching': True,
3949 }, {
3950 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3951 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3952 'info_dict': {
3953 'title': '29C3: Not my department',
3954 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3955 'uploader': 'Christiaan008',
3956 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3957 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3958 },
3959 'playlist_count': 96,
3960 }, {
3961 'note': 'Large playlist',
3962 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3963 'info_dict': {
3964 'title': 'Uploads from Cauchemar',
3965 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3966 'uploader': 'Cauchemar',
3967 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3968 },
3969 'playlist_mincount': 1123,
3970 }, {
3971 'note': 'even larger playlist, 8832 videos',
3972 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3973 'only_matching': True,
3974 }, {
3975 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3976 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3977 'info_dict': {
3978 'title': 'Uploads from Interstellar Movie',
3979 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3980 'uploader': 'Interstellar Movie',
3981 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3982 },
3983 'playlist_mincount': 21,
3984 }, {
3985 'note': 'Playlist with "show unavailable videos" button',
3986 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3987 'info_dict': {
3988 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3989 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3990 'uploader': 'Phim Siêu Nhân Nhật Bản',
3991 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3992 },
3993 'playlist_mincount': 200,
3994 }, {
3995 'note': 'Playlist with unavailable videos in page 7',
3996 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3997 'info_dict': {
3998 'title': 'Uploads from BlankTV',
3999 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
4000 'uploader': 'BlankTV',
4001 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
4002 },
4003 'playlist_mincount': 1000,
4004 }, {
4005 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
4006 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4007 'info_dict': {
4008 'title': 'Data Analysis with Dr Mike Pound',
4009 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4010 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
4011 'uploader': 'Computerphile',
4012 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
4013 },
4014 'playlist_mincount': 11,
4015 }, {
4016 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4017 'only_matching': True,
4018 }, {
4019 'note': 'Playlist URL that does not actually serve a playlist',
4020 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
4021 'info_dict': {
4022 'id': 'FqZTN594JQw',
4023 'ext': 'webm',
4024 'title': "Smiley's People 01 detective, Adventure Series, Action",
4025 'uploader': 'STREEM',
4026 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
4027 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
4028 'upload_date': '20150526',
4029 'license': 'Standard YouTube License',
4030 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
4031 'categories': ['People & Blogs'],
4032 'tags': list,
4033 'view_count': int,
4034 'like_count': int,
4035 'dislike_count': int,
4036 },
4037 'params': {
4038 'skip_download': True,
4039 },
4040 'skip': 'This video is not available.',
4041 'add_ie': [YoutubeIE.ie_key()],
4042 }, {
4043 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
4044 'only_matching': True,
4045 }, {
4046 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
4047 'only_matching': True,
4048 }, {
4049 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
4050 'info_dict': {
4051 'id': '3yImotZU3tw', # This will keep changing
4052 'ext': 'mp4',
4053 'title': compat_str,
4054 'uploader': 'Sky News',
4055 'uploader_id': 'skynews',
4056 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
4057 'upload_date': r're:\d{8}',
4058 'description': compat_str,
4059 'categories': ['News & Politics'],
4060 'tags': list,
4061 'like_count': int,
4062 'dislike_count': int,
4063 },
4064 'params': {
4065 'skip_download': True,
4066 },
4067 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
4068 }, {
4069 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
4070 'info_dict': {
4071 'id': 'a48o2S1cPoo',
4072 'ext': 'mp4',
4073 'title': 'The Young Turks - Live Main Show',
4074 'uploader': 'The Young Turks',
4075 'uploader_id': 'TheYoungTurks',
4076 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
4077 'upload_date': '20150715',
4078 'license': 'Standard YouTube License',
4079 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
4080 'categories': ['News & Politics'],
4081 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
4082 'like_count': int,
4083 'dislike_count': int,
4084 },
4085 'params': {
4086 'skip_download': True,
4087 },
4088 'only_matching': True,
4089 }, {
4090 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
4091 'only_matching': True,
4092 }, {
4093 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
4094 'only_matching': True,
4095 }, {
4096 'note': 'A channel that is not live. Should raise error',
4097 'url': 'https://www.youtube.com/user/numberphile/live',
4098 'only_matching': True,
4099 }, {
4100 'url': 'https://www.youtube.com/feed/trending',
4101 'only_matching': True,
4102 }, {
4103 'url': 'https://www.youtube.com/feed/library',
4104 'only_matching': True,
4105 }, {
4106 'url': 'https://www.youtube.com/feed/history',
4107 'only_matching': True,
4108 }, {
4109 'url': 'https://www.youtube.com/feed/subscriptions',
4110 'only_matching': True,
4111 }, {
4112 'url': 'https://www.youtube.com/feed/watch_later',
4113 'only_matching': True,
4114 }, {
4115 'note': 'Recommended - redirects to home page.',
4116 'url': 'https://www.youtube.com/feed/recommended',
4117 'only_matching': True,
4118 }, {
4119 'note': 'inline playlist with not always working continuations',
4120 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
4121 'only_matching': True,
4122 }, {
4123 'url': 'https://www.youtube.com/course',
4124 'only_matching': True,
4125 }, {
4126 'url': 'https://www.youtube.com/zsecurity',
4127 'only_matching': True,
4128 }, {
4129 'url': 'http://www.youtube.com/NASAgovVideo/videos',
4130 'only_matching': True,
4131 }, {
4132 'url': 'https://www.youtube.com/TheYoungTurks/live',
4133 'only_matching': True,
4134 }, {
4135 'url': 'https://www.youtube.com/hashtag/cctv9',
4136 'info_dict': {
4137 'id': 'cctv9',
4138 'title': '#cctv9',
4139 },
4140 'playlist_mincount': 350,
4141 }, {
4142 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
4143 'only_matching': True,
4144 }, {
4145 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
4146 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4147 'only_matching': True
4148 }, {
4149 'note': '/browse/ should redirect to /channel/',
4150 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
4151 'only_matching': True
4152 }, {
4153 'note': 'VLPL, should redirect to playlist?list=PL...',
4154 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4155 'info_dict': {
4156 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4157 'uploader': 'NoCopyrightSounds',
4158 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
4159 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
4160 'title': 'NCS Releases',
4161 },
4162 'playlist_mincount': 166,
4163 }, {
4164 'note': 'Topic, should redirect to playlist?list=UU...',
4165 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4166 'info_dict': {
4167 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4168 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4169 'title': 'Uploads from Royalty Free Music - Topic',
4170 'uploader': 'Royalty Free Music - Topic',
4171 },
4172 'expected_warnings': [
4173 'A channel/user page was given',
4174 'The URL does not have a videos tab',
4175 ],
4176 'playlist_mincount': 101,
4177 }, {
4178 'note': 'Topic without a UU playlist',
4179 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
4180 'info_dict': {
4181 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
4182 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
4183 },
4184 'expected_warnings': [
4185 'A channel/user page was given',
4186 'The URL does not have a videos tab',
4187 'Falling back to channel URL',
4188 ],
4189 'playlist_mincount': 9,
4190 }, {
4191 'note': 'Youtube music Album',
4192 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
4193 'info_dict': {
4194 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
4195 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
4196 },
4197 'playlist_count': 50,
4198 }, {
4199 'note': 'unlisted single video playlist',
4200 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4201 'info_dict': {
4202 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
4203 'uploader': 'colethedj',
4204 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4205 'title': 'yt-dlp unlisted playlist test',
4206 'availability': 'unlisted'
4207 },
4208 'playlist_count': 1,
4209 }, {
4210 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
4211 'url': 'https://www.youtube.com/feed/recommended',
4212 'info_dict': {
4213 'id': 'recommended',
4214 'title': 'recommended',
4215 },
4216 'playlist_mincount': 50,
4217 'params': {
4218 'skip_download': True,
4219 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4220 },
4221 }, {
4222 'note': 'API Fallback: /videos tab, sorted by oldest first',
4223 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
4224 'info_dict': {
4225 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4226 'title': 'Cody\'sLab - Videos',
4227 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
4228 'uploader': 'Cody\'sLab',
4229 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4230 },
4231 'playlist_mincount': 650,
4232 'params': {
4233 'skip_download': True,
4234 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4235 },
4236 }, {
4237 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
4238 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4239 'info_dict': {
4240 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4241 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4242 'title': 'Uploads from Royalty Free Music - Topic',
4243 'uploader': 'Royalty Free Music - Topic',
4244 },
4245 'expected_warnings': [
4246 'A channel/user page was given',
4247 'The URL does not have a videos tab',
4248 ],
4249 'playlist_mincount': 101,
4250 'params': {
4251 'skip_download': True,
4252 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4253 },
4254 }]
4255
4256 @classmethod
4257 def suitable(cls, url):
4258 return False if YoutubeIE.suitable(url) else super(
4259 YoutubeTabIE, cls).suitable(url)
9297939e 4260
cd7c66cf 4261 def _real_extract(self, url):
9297939e 4262 url, smuggled_data = unsmuggle_url(url, {})
4263 if self.is_music_url(url):
4264 smuggled_data['is_music_url'] = True
fe03a6cd 4265 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4266 if info_dict.get('entries'):
4267 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4268 return info_dict
4269
fe03a6cd 4270 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4271
4272 def __real_extract(self, url, smuggled_data):
cd7c66cf 4273 item_id = self._match_id(url)
4274 url = compat_urlparse.urlunparse(
4275 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4276 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4277
fe03a6cd 4278 def get_mobj(url):
4279 mobj = self._url_re.match(url).groupdict()
07cce701 4280 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4281 return mobj
4282
4283 mobj = get_mobj(url)
4284 # Youtube returns incomplete data if tabname is not lower case
4285 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
fe03a6cd 4286 if is_channel:
4287 if smuggled_data.get('is_music_url'):
4288 if item_id[:2] == 'VL':
4289 # Youtube music VL channels have an equivalent playlist
4290 item_id = item_id[2:]
4291 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4292 elif item_id[:2] == 'MP':
ac56cf38 4293 # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
4294 mdata = self._extract_tab_endpoint(
4295 'https://music.youtube.com/channel/%s' % item_id, item_id, default_client='web_music')
4296 murl = traverse_obj(
4297 mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'), get_all=False, expected_type=compat_str)
4298 if not murl:
4299 raise ExtractorError('Failed to resolve album to playlist.')
4300 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
fe03a6cd 4301 elif mobj['channel_type'] == 'browse':
4302 # Youtube music /browse/ should be changed to /channel/
4303 pre = 'https://www.youtube.com/channel/%s' % item_id
4304 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4305 # Home URLs should redirect to /videos/
6a39ee13 4306 self.report_warning(
cd7c66cf 4307 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4308 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4309 tab = '/videos'
4310
4311 url = ''.join((pre, tab, post))
4312 mobj = get_mobj(url)
cd7c66cf 4313
4314 # Handle both video/playlist URLs
201c1459 4315 qs = parse_qs(url)
cd7c66cf 4316 video_id = qs.get('v', [None])[0]
4317 playlist_id = qs.get('list', [None])[0]
4318
fe03a6cd 4319 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4320 if not playlist_id:
fe03a6cd 4321 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4322 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4323 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4324 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4325 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4326 mobj = get_mobj(url)
cd7c66cf 4327
4328 if video_id and playlist_id:
a06916d9 4329 if self.get_param('noplaylist'):
cd7c66cf 4330 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
5e3f2f8f 4331 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4332 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4333
ac56cf38 4334 data, ytcfg = self._extract_data(url, item_id)
14fdfea9 4335
18db7548 4336 tabs = try_get(
4337 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4338 if tabs:
4339 selected_tab = self._extract_selected_tab(tabs)
4340 tab_name = selected_tab.get('title', '')
09f1580e 4341 if 'no-youtube-channel-redirect' not in compat_opts:
4342 if mobj['tab'] == '/live':
4343 # Live tab should have redirected to the video
4344 raise ExtractorError('The channel is not currently live', expected=True)
4345 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4346 if not mobj['not_channel'] and item_id[:2] == 'UC':
4347 # Topic channels don't have /videos. Use the equivalent playlist instead
4348 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4349 pl_id = 'UU%s' % item_id[2:]
4350 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4351 try:
ac56cf38 4352 data, ytcfg, item_id, url = *self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True), pl_id, pl_url
09f1580e 4353 except ExtractorError:
4354 self.report_warning('The playlist gave error. Falling back to channel URL')
4355 else:
4356 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4357
4358 self.write_debug('Final URL: %s' % url)
4359
358de58c 4360 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4361 if 'no-youtube-unavailable-videos' not in compat_opts:
ac56cf38 4362 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
c0ac49bc 4363 self._extract_and_report_alerts(data, only_once=True)
8bdd16b4 4364 tabs = try_get(
4365 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4366 if tabs:
ac56cf38 4367 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
cd7c66cf 4368
8bdd16b4 4369 playlist = try_get(
4370 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4371 if playlist:
ac56cf38 4372 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
cd7c66cf 4373
a0566bbf 4374 video_id = try_get(
4375 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4376 compat_str) or video_id
8bdd16b4 4377 if video_id:
09f1580e 4378 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4379 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
5e3f2f8f 4380 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4381
8bdd16b4 4382 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4383
c5e8d7af 4384
8bdd16b4 4385class YoutubePlaylistIE(InfoExtractor):
96565c7e 4386 IE_DESC = 'YouTube playlists'
8bdd16b4 4387 _VALID_URL = r'''(?x)(?:
4388 (?:https?://)?
4389 (?:\w+\.)?
4390 (?:
4391 (?:
4392 youtube(?:kids)?\.com|
d9190e44 4393 %(invidious)s
8bdd16b4 4394 )
4395 /.*?\?.*?\blist=
4396 )?
4397 (?P<id>%(playlist_id)s)
d9190e44
RH
4398 )''' % {
4399 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
4400 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4401 }
8bdd16b4 4402 IE_NAME = 'youtube:playlist'
cdc628a4 4403 _TESTS = [{
8bdd16b4 4404 'note': 'issue #673',
4405 'url': 'PLBB231211A4F62143',
cdc628a4 4406 'info_dict': {
8bdd16b4 4407 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4408 'id': 'PLBB231211A4F62143',
4409 'uploader': 'Wickydoo',
4410 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 4411 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
8bdd16b4 4412 },
4413 'playlist_mincount': 29,
4414 }, {
4415 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4416 'info_dict': {
4417 'title': 'YDL_safe_search',
4418 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4419 },
4420 'playlist_count': 2,
4421 'skip': 'This playlist is private',
9558dcec 4422 }, {
8bdd16b4 4423 'note': 'embedded',
4424 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4425 'playlist_count': 4,
9558dcec 4426 'info_dict': {
8bdd16b4 4427 'title': 'JODA15',
4428 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4429 'uploader': 'milan',
4430 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4431 }
cdc628a4 4432 }, {
8bdd16b4 4433 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
11f9be09 4434 'playlist_mincount': 654,
8bdd16b4 4435 'info_dict': {
4436 'title': '2018 Chinese New Singles (11/6 updated)',
4437 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4438 'uploader': 'LBK',
4439 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 4440 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
8bdd16b4 4441 }
daa0df9e 4442 }, {
29f7c58a 4443 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4444 'only_matching': True,
4445 }, {
4446 # music album playlist
4447 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4448 'only_matching': True,
4449 }]
4450
4451 @classmethod
4452 def suitable(cls, url):
201c1459 4453 if YoutubeTabIE.suitable(url):
4454 return False
49a57e70 4455 from ..utils import parse_qs
201c1459 4456 qs = parse_qs(url)
4457 if qs.get('v', [None])[0]:
4458 return False
4459 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4460
4461 def _real_extract(self, url):
4462 playlist_id = self._match_id(url)
46953e7e 4463 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4464 url = update_url_query(
4465 'https://www.youtube.com/playlist',
4466 parse_qs(url) or {'list': playlist_id})
4467 if is_music_url:
4468 url = smuggle_url(url, {'is_music_url': True})
4469 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4470
4471
4472class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4473 IE_DESC = 'youtu.be'
29f7c58a 4474 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4475 _TESTS = [{
8bdd16b4 4476 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4477 'info_dict': {
4478 'id': 'yeWKywCrFtk',
4479 'ext': 'mp4',
4480 'title': 'Small Scale Baler and Braiding Rugs',
4481 'uploader': 'Backus-Page House Museum',
4482 'uploader_id': 'backuspagemuseum',
4483 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4484 'upload_date': '20161008',
4485 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4486 'categories': ['Nonprofits & Activism'],
4487 'tags': list,
4488 'like_count': int,
4489 'dislike_count': int,
4490 },
4491 'params': {
4492 'noplaylist': True,
4493 'skip_download': True,
4494 },
39e7107d 4495 }, {
8bdd16b4 4496 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4497 'only_matching': True,
cdc628a4
PH
4498 }]
4499
8bdd16b4 4500 def _real_extract(self, url):
5ad28e7f 4501 mobj = self._match_valid_url(url)
29f7c58a 4502 video_id = mobj.group('id')
4503 playlist_id = mobj.group('playlist_id')
8bdd16b4 4504 return self.url_result(
29f7c58a 4505 update_url_query('https://www.youtube.com/watch', {
4506 'v': video_id,
4507 'list': playlist_id,
4508 'feature': 'youtu.be',
4509 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4510
4511
4512class YoutubeYtUserIE(InfoExtractor):
96565c7e 4513 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
8bdd16b4 4514 _VALID_URL = r'ytuser:(?P<id>.+)'
4515 _TESTS = [{
4516 'url': 'ytuser:phihag',
4517 'only_matching': True,
4518 }]
4519
4520 def _real_extract(self, url):
4521 user_id = self._match_id(url)
4522 return self.url_result(
c586f9e8 4523 'https://www.youtube.com/user/%s/videos' % user_id,
8bdd16b4 4524 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4525
b05654f0 4526
3d3dddc9 4527class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4528 IE_NAME = 'youtube:favorites'
96565c7e 4529 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
70d5c17b 4530 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4531 _LOGIN_REQUIRED = True
4532 _TESTS = [{
4533 'url': ':ytfav',
4534 'only_matching': True,
4535 }, {
4536 'url': ':ytfavorites',
4537 'only_matching': True,
4538 }]
4539
4540 def _real_extract(self, url):
4541 return self.url_result(
4542 'https://www.youtube.com/playlist?list=LL',
4543 ie=YoutubeTabIE.ie_key())
4544
4545
a6213a49 4546class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
4547 IE_DESC = 'YouTube search'
78caa52a 4548 IE_NAME = 'youtube:search'
b05654f0 4549 _SEARCH_KEY = 'ytsearch'
a61fd4cf 4550 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
9dd8e46a 4551 _TESTS = []
b05654f0 4552
a61fd4cf 4553
5f7cb91a 4554class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
cb7fb546 4555 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4556 _SEARCH_KEY = 'ytsearchdate'
a6213a49 4557 IE_DESC = 'YouTube search, newest videos first'
a61fd4cf 4558 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
75dff0ee 4559
c9ae7b95 4560
a6213a49 4561class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
96565c7e 4562 IE_DESC = 'YouTube search URLs with sorting and filter support'
386e1dd9 4563 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4564 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
3462ffa8 4565 _TESTS = [{
4566 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4567 'playlist_mincount': 5,
4568 'info_dict': {
11f9be09 4569 'id': 'youtube-dl test video',
3462ffa8 4570 'title': 'youtube-dl test video',
4571 }
a61fd4cf 4572 }, {
4573 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
4574 'playlist_mincount': 5,
4575 'info_dict': {
4576 'id': 'python',
4577 'title': 'python',
4578 }
4579
3462ffa8 4580 }, {
4581 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4582 'only_matching': True,
4583 }]
4584
4585 def _real_extract(self, url):
4dfbf869 4586 qs = parse_qs(url)
386e1dd9 4587 query = (qs.get('search_query') or qs.get('q'))[0]
a6213a49 4588 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
3462ffa8 4589
4590
4591class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4592 """
25f14e9f 4593 Base class for feed extractors
3d3dddc9 4594 Subclasses must define the _FEED_NAME property.
d7ae0639 4595 """
b2e8bc1b 4596 _LOGIN_REQUIRED = True
ef2f3c7f 4597 _TESTS = []
d7ae0639
JMF
4598
4599 @property
4600 def IE_NAME(self):
78caa52a 4601 return 'youtube:%s' % self._FEED_NAME
04cc9617 4602
3853309f 4603 def _real_extract(self, url):
3d3dddc9 4604 return self.url_result(
4605 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4606 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4607
4608
ef2f3c7f 4609class YoutubeWatchLaterIE(InfoExtractor):
4610 IE_NAME = 'youtube:watchlater'
96565c7e 4611 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
3d3dddc9 4612 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4613 _TESTS = [{
8bdd16b4 4614 'url': ':ytwatchlater',
bc7a9cd8
S
4615 'only_matching': True,
4616 }]
25f14e9f
S
4617
4618 def _real_extract(self, url):
ef2f3c7f 4619 return self.url_result(
4620 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4621
4622
25f14e9f 4623class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
96565c7e 4624 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
3d3dddc9 4625 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4626 _FEED_NAME = 'recommended'
45db527f 4627 _LOGIN_REQUIRED = False
3d3dddc9 4628 _TESTS = [{
4629 'url': ':ytrec',
4630 'only_matching': True,
4631 }, {
4632 'url': ':ytrecommended',
4633 'only_matching': True,
4634 }, {
4635 'url': 'https://youtube.com',
4636 'only_matching': True,
4637 }]
1ed5b5c9 4638
1ed5b5c9 4639
25f14e9f 4640class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
96565c7e 4641 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
3d3dddc9 4642 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4643 _FEED_NAME = 'subscriptions'
3d3dddc9 4644 _TESTS = [{
4645 'url': ':ytsubs',
4646 'only_matching': True,
4647 }, {
4648 'url': ':ytsubscriptions',
4649 'only_matching': True,
4650 }]
1ed5b5c9 4651
1ed5b5c9 4652
25f14e9f 4653class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
96565c7e 4654 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
a5c56234 4655 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4656 _FEED_NAME = 'history'
3d3dddc9 4657 _TESTS = [{
4658 'url': ':ythistory',
4659 'only_matching': True,
4660 }]
1ed5b5c9
JMF
4661
4662
15870e90
PH
4663class YoutubeTruncatedURLIE(InfoExtractor):
4664 IE_NAME = 'youtube:truncated_url'
4665 IE_DESC = False # Do not list
975d35db 4666 _VALID_URL = r'''(?x)
b95aab84
PH
4667 (?:https?://)?
4668 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4669 (?:watch\?(?:
c4808c60 4670 feature=[a-z_]+|
b95aab84
PH
4671 annotation_id=annotation_[^&]+|
4672 x-yt-cl=[0-9]+|
c1708b89 4673 hl=[^&]*|
287be8c6 4674 t=[0-9]+
b95aab84
PH
4675 )?
4676 |
4677 attribution_link\?a=[^&]+
4678 )
4679 $
975d35db 4680 '''
15870e90 4681
c4808c60 4682 _TESTS = [{
2d3d2997 4683 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4684 'only_matching': True,
dc2fc736 4685 }, {
2d3d2997 4686 'url': 'https://www.youtube.com/watch?',
dc2fc736 4687 'only_matching': True,
b95aab84
PH
4688 }, {
4689 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4690 'only_matching': True,
4691 }, {
4692 'url': 'https://www.youtube.com/watch?feature=foo',
4693 'only_matching': True,
c1708b89
PH
4694 }, {
4695 'url': 'https://www.youtube.com/watch?hl=en-GB',
4696 'only_matching': True,
287be8c6
PH
4697 }, {
4698 'url': 'https://www.youtube.com/watch?t=2372',
4699 'only_matching': True,
c4808c60
PH
4700 }]
4701
15870e90
PH
4702 def _real_extract(self, url):
4703 raise ExtractorError(
78caa52a
PH
4704 'Did you forget to quote the URL? Remember that & is a meta '
4705 'character in most shells, so you want to put the URL in quotes, '
3867038a 4706 'like youtube-dl '
2d3d2997 4707 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4708 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4709 expected=True)
772fd5cc
PH
4710
4711
3cd786db 4712class YoutubeClipIE(InfoExtractor):
4713 IE_NAME = 'youtube:clip'
4714 IE_DESC = False # Do not list
4715 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4716
4717 def _real_extract(self, url):
4718 self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4719 return self.url_result(url, 'Generic')
4720
4721
772fd5cc
PH
4722class YoutubeTruncatedIDIE(InfoExtractor):
4723 IE_NAME = 'youtube:truncated_id'
4724 IE_DESC = False # Do not list
b95aab84 4725 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4726
4727 _TESTS = [{
4728 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4729 'only_matching': True,
4730 }]
4731
4732 def _real_extract(self, url):
4733 video_id = self._match_id(url)
4734 raise ExtractorError(
4735 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4736 expected=True)