]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[ExtractAudio] Support `alac`
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
2d6659b9 5import base64
d92f5d5a 6import calendar
109dd3b2 7import copy
fe93e2c4 8import datetime
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
720c3099 12import math
c4417ddb 13import os.path
d77ab8e2 14import random
c5e8d7af 15import re
8a784c74 16import time
e0df6211 17import traceback
c5e8d7af 18
b05654f0 19from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 20from ..compat import (
edf3e38e 21 compat_chr,
29f7c58a 22 compat_HTTPError,
c5e8d7af 23 compat_parse_qs,
545cc85d 24 compat_str,
7fd002c0 25 compat_urllib_parse_unquote_plus,
15707c7e 26 compat_urllib_parse_urlencode,
7c80519c 27 compat_urllib_parse_urlparse,
7c61bd36 28 compat_urlparse,
4bb4a188 29)
545cc85d 30from ..jsinterp import JSInterpreter
4bb4a188 31from ..utils import (
720c3099 32 bug_reports_message,
2d6659b9 33 bytes_to_intlist,
c5e8d7af 34 clean_html,
d92f5d5a 35 datetime_from_str,
11f9be09 36 dict_get,
358de58c 37 error_to_compat_str,
c5e8d7af 38 ExtractorError,
2d30521a 39 float_or_none,
11f9be09 40 format_field,
dd27fd17 41 int_or_none,
2d6659b9 42 intlist_to_bytes,
641ad5d8 43 is_html,
34921b43 44 join_nonempty,
94278f72 45 mimetype2ext,
9c0d7f49 46 network_exceptions,
11f9be09 47 orderedSet,
6310acf5 48 parse_codecs,
49bd8c66 49 parse_count,
7c80519c 50 parse_duration,
7ea65411 51 parse_iso8601,
4dfbf869 52 parse_qs,
dca3ff4a 53 qualities,
c0ac49bc 54 remove_end,
3995d37d 55 remove_start,
cf7e015f 56 smuggle_url,
dbdaaa23 57 str_or_none,
c93d53f5 58 str_to_int,
7c365c21 59 traverse_obj,
556dbe7f 60 try_get,
c5e8d7af
PH
61 unescapeHTML,
62 unified_strdate,
cf7e015f 63 unsmuggle_url,
8bdd16b4 64 update_url_query,
21c340b8 65 url_or_none,
fe93e2c4 66 urljoin,
7c365c21 67 variadic,
c5e8d7af
PH
68)
69
5f6a1245 70
720c3099 71def get_first(obj, keys, **kwargs):
72 return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
73
74
000c15a4 75# any clients starting with _ cannot be explicity requested by the user
76INNERTUBE_CLIENTS = {
77 'web': {
78 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
79 'INNERTUBE_CONTEXT': {
80 'client': {
81 'clientName': 'WEB',
82 'clientVersion': '2.20210622.10.00',
83 }
84 },
85 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
86 },
87 'web_embedded': {
88 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
89 'INNERTUBE_CONTEXT': {
90 'client': {
91 'clientName': 'WEB_EMBEDDED_PLAYER',
92 'clientVersion': '1.20210620.0.1',
93 },
94 },
95 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
96 },
97 'web_music': {
98 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
99 'INNERTUBE_HOST': 'music.youtube.com',
100 'INNERTUBE_CONTEXT': {
101 'client': {
102 'clientName': 'WEB_REMIX',
103 'clientVersion': '1.20210621.00.00',
104 }
105 },
106 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
107 },
e7e94f2a
D
108 'web_creator': {
109 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
110 'INNERTUBE_CONTEXT': {
111 'client': {
112 'clientName': 'WEB_CREATOR',
113 'clientVersion': '1.20210621.00.00',
114 }
115 },
116 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
117 },
000c15a4 118 'android': {
119 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
120 'INNERTUBE_CONTEXT': {
121 'client': {
122 'clientName': 'ANDROID',
123 'clientVersion': '16.20',
124 }
125 },
126 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
b6de707d 127 'REQUIRE_JS_PLAYER': False
000c15a4 128 },
129 'android_embedded': {
130 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
131 'INNERTUBE_CONTEXT': {
132 'client': {
133 'clientName': 'ANDROID_EMBEDDED_PLAYER',
134 'clientVersion': '16.20',
135 },
136 },
b6de707d 137 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
138 'REQUIRE_JS_PLAYER': False
000c15a4 139 },
140 'android_music': {
141 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
142 'INNERTUBE_HOST': 'music.youtube.com',
143 'INNERTUBE_CONTEXT': {
144 'client': {
145 'clientName': 'ANDROID_MUSIC',
146 'clientVersion': '4.32',
147 }
148 },
149 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
b6de707d 150 'REQUIRE_JS_PLAYER': False
000c15a4 151 },
e7e94f2a
D
152 'android_creator': {
153 'INNERTUBE_CONTEXT': {
154 'client': {
155 'clientName': 'ANDROID_CREATOR',
156 'clientVersion': '21.24.100',
157 },
158 },
b6de707d 159 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
160 'REQUIRE_JS_PLAYER': False
e7e94f2a 161 },
3619f78d 162 # ios has HLS live streams
163 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
000c15a4 164 'ios': {
165 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
166 'INNERTUBE_CONTEXT': {
167 'client': {
168 'clientName': 'IOS',
169 'clientVersion': '16.20',
170 }
171 },
b6de707d 172 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
173 'REQUIRE_JS_PLAYER': False
000c15a4 174 },
175 'ios_embedded': {
176 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
177 'INNERTUBE_CONTEXT': {
178 'client': {
179 'clientName': 'IOS_MESSAGES_EXTENSION',
180 'clientVersion': '16.20',
181 },
182 },
b6de707d 183 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
184 'REQUIRE_JS_PLAYER': False
000c15a4 185 },
186 'ios_music': {
187 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
188 'INNERTUBE_HOST': 'music.youtube.com',
189 'INNERTUBE_CONTEXT': {
190 'client': {
191 'clientName': 'IOS_MUSIC',
192 'clientVersion': '4.32',
193 },
194 },
b6de707d 195 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
196 'REQUIRE_JS_PLAYER': False
000c15a4 197 },
e7e94f2a
D
198 'ios_creator': {
199 'INNERTUBE_CONTEXT': {
200 'client': {
201 'clientName': 'IOS_CREATOR',
202 'clientVersion': '21.24.100',
203 },
204 },
b6de707d 205 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
206 'REQUIRE_JS_PLAYER': False
e7e94f2a 207 },
3619f78d 208 # mweb has 'ultralow' formats
209 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 210 'mweb': {
211 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
212 'INNERTUBE_CONTEXT': {
213 'client': {
214 'clientName': 'MWEB',
215 'clientVersion': '2.20210721.07.00',
216 }
217 },
218 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
219 },
220}
221
222
223def build_innertube_clients():
65c2fde2 224 third_party = {
225 'embedUrl': 'https://google.com', # Can be any valid URL
226 }
000c15a4 227 base_clients = ('android', 'web', 'ios', 'mweb')
228 priority = qualities(base_clients[::-1])
229
230 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 231 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 232 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 233 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 234 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
235 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
236
237 if client in base_clients:
238 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
239 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
65c2fde2 240 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 241 agegate_ytcfg['priority'] -= 1
242 elif client.endswith('_embedded'):
65c2fde2 243 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 244 ytcfg['priority'] -= 2
245 else:
246 ytcfg['priority'] -= 3
247
248
249build_innertube_clients()
250
251
de7f3446 252class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 253 """Provide base functions for Youtube extractors"""
e00eb564 254
3462ffa8 255 _RESERVED_NAMES = (
3cd786db 256 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
3619f78d 257 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
258 r'browse|oembed|get_video_info|iframe_api|s/player|'
cd7c66cf 259 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 260
3619f78d 261 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
262
b2e8bc1b 263 _NETRC_MACHINE = 'youtube'
3619f78d 264
b2e8bc1b
JMF
265 # If True it will raise an error if no login info is provided
266 _LOGIN_REQUIRED = False
267
d9190e44
RH
268 _INVIDIOUS_SITES = (
269 # invidious-redirect websites
270 r'(?:www\.)?redirect\.invidious\.io',
271 r'(?:(?:www|dev)\.)?invidio\.us',
272 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
273 r'(?:www\.)?invidious\.pussthecat\.org',
274 r'(?:www\.)?invidious\.zee\.li',
275 r'(?:www\.)?invidious\.ethibox\.fr',
276 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
277 # youtube-dl invidious instances list
278 r'(?:(?:www|no)\.)?invidiou\.sh',
279 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
280 r'(?:www\.)?invidious\.kabi\.tk',
281 r'(?:www\.)?invidious\.mastodon\.host',
282 r'(?:www\.)?invidious\.zapashcanon\.fr',
283 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
284 r'(?:www\.)?invidious\.tinfoil-hat\.net',
285 r'(?:www\.)?invidious\.himiko\.cloud',
286 r'(?:www\.)?invidious\.reallyancient\.tech',
287 r'(?:www\.)?invidious\.tube',
288 r'(?:www\.)?invidiou\.site',
289 r'(?:www\.)?invidious\.site',
290 r'(?:www\.)?invidious\.xyz',
291 r'(?:www\.)?invidious\.nixnet\.xyz',
292 r'(?:www\.)?invidious\.048596\.xyz',
293 r'(?:www\.)?invidious\.drycat\.fr',
294 r'(?:www\.)?inv\.skyn3t\.in',
295 r'(?:www\.)?tube\.poal\.co',
296 r'(?:www\.)?tube\.connect\.cafe',
297 r'(?:www\.)?vid\.wxzm\.sx',
298 r'(?:www\.)?vid\.mint\.lgbt',
299 r'(?:www\.)?vid\.puffyan\.us',
300 r'(?:www\.)?yewtu\.be',
301 r'(?:www\.)?yt\.elukerio\.org',
302 r'(?:www\.)?yt\.lelux\.fi',
303 r'(?:www\.)?invidious\.ggc-project\.de',
304 r'(?:www\.)?yt\.maisputain\.ovh',
305 r'(?:www\.)?ytprivate\.com',
306 r'(?:www\.)?invidious\.13ad\.de',
307 r'(?:www\.)?invidious\.toot\.koeln',
308 r'(?:www\.)?invidious\.fdn\.fr',
309 r'(?:www\.)?watch\.nettohikari\.com',
310 r'(?:www\.)?invidious\.namazso\.eu',
311 r'(?:www\.)?invidious\.silkky\.cloud',
312 r'(?:www\.)?invidious\.exonip\.de',
313 r'(?:www\.)?invidious\.riverside\.rocks',
314 r'(?:www\.)?invidious\.blamefran\.net',
315 r'(?:www\.)?invidious\.moomoo\.de',
316 r'(?:www\.)?ytb\.trom\.tf',
317 r'(?:www\.)?yt\.cyberhost\.uk',
318 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
319 r'(?:www\.)?qklhadlycap4cnod\.onion',
320 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
321 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
322 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
323 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
324 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
325 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
326 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
327 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
328 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
329 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
330 )
331
b2e8bc1b 332 def _login(self):
83317f69 333 """
334 Attempt to log in to YouTube.
83317f69 335 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
336 """
9d5d4d64 337
982ee69a
MB
338 if (self._LOGIN_REQUIRED
339 and self.get_param('cookiefile') is None
340 and self.get_param('cookiesfrombrowser') is None):
9d5d4d64 341 self.raise_login_required(
342 'Login details are needed to download this content', method='cookies')
68217024 343 username, password = self._get_login_info()
9d5d4d64 344 if username:
24b0a72b 345 self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
b2e8bc1b 346
cce889b9 347 def _initialize_consent(self):
348 cookies = self._get_cookies('https://www.youtube.com/')
349 if cookies.get('__Secure-3PSID'):
350 return
351 consent_id = None
352 consent = cookies.get('CONSENT')
353 if consent:
354 if 'YES' in consent.value:
355 return
356 consent_id = self._search_regex(
357 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
358 if not consent_id:
359 consent_id = random.randint(100, 999)
360 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 361
b2e8bc1b 362 def _real_initialize(self):
cce889b9 363 self._initialize_consent()
24b0a72b 364 self._login()
c5e8d7af 365
a0566bbf 366 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 367 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
368 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 369
000c15a4 370 def _get_default_ytcfg(self, client='web'):
371 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 372
000c15a4 373 def _get_innertube_host(self, client='web'):
374 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 375
000c15a4 376 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 377 # try_get but with fallback to default ytcfg client values when present
378 _func = lambda y: try_get(y, getter, expected_type)
379 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
380
000c15a4 381 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 382 return self._ytcfg_get_safe(
383 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
384 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
109dd3b2 385
000c15a4 386 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 387 return self._ytcfg_get_safe(
388 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
389 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
109dd3b2 390
000c15a4 391 def _extract_api_key(self, ytcfg=None, default_client='web'):
109dd3b2 392 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
393
000c15a4 394 def _extract_context(self, ytcfg=None, default_client='web'):
109dd3b2 395 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
396 context = _get_context(ytcfg)
397 if context:
398 return context
399
400 context = _get_context(self._get_default_ytcfg(default_client))
401 if not ytcfg:
402 return context
403
404 # Recreate the client context (required)
405 context['client'].update({
406 'clientVersion': self._extract_client_version(ytcfg, default_client),
407 'clientName': self._extract_client_name(ytcfg, default_client),
408 })
409 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
410 if visitor_data:
411 context['client']['visitorData'] = visitor_data
412 return context
413
cf87314d 414 _SAPISID = None
415
109dd3b2 416 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 417 time_now = round(time.time())
cf87314d 418 if self._SAPISID is None:
419 yt_cookies = self._get_cookies('https://www.youtube.com')
420 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
421 # See: https://github.com/yt-dlp/yt-dlp/issues/393
422 sapisid_cookie = dict_get(
423 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
424 if sapisid_cookie and sapisid_cookie.value:
425 self._SAPISID = sapisid_cookie.value
426 self.write_debug('Extracted SAPISID cookie')
427 # SAPISID cookie is required if not already present
428 if not yt_cookies.get('SAPISID'):
429 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
430 self._set_cookie(
431 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
432 else:
433 self._SAPISID = False
434 if not self._SAPISID:
435 return None
1974e99f 436 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
437 sapisidhash = hashlib.sha1(
cf87314d 438 f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
1974e99f 439 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
440
441 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 442 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 443 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 444
109dd3b2 445 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 446 data.update(query)
11f9be09 447 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 448 real_headers.update({'content-type': 'application/json'})
449 if headers:
450 real_headers.update(headers)
545cc85d 451 return self._download_json(
109dd3b2 452 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 453 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 454 data=json.dumps(data).encode('utf8'), headers=real_headers,
455 query={'key': api_key or self._extract_api_key()})
456
ac56cf38 457 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
458 data = self._search_regex(
459 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
460 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
461 if data:
462 return self._parse_json(data, item_id, fatal=fatal)
0c148415 463
99e9e001 464 @staticmethod
465 def _extract_session_index(*data):
466 """
467 Index of current account in account list.
468 See: https://github.com/yt-dlp/yt-dlp/pull/519
469 """
470 for ytcfg in data:
471 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
472 if session_index is not None:
473 return session_index
474
475 # Deprecated?
476 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca
M
477 if ytcfg:
478 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
479 if token:
480 return token
99e9e001 481 if webpage:
482 return self._search_regex(
483 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
484 'identity token', default=None, fatal=False)
a1c5d2ca
M
485
486 @staticmethod
fe93e2c4 487 def _extract_account_syncid(*args):
8ea3f7b9 488 """
489 Extract syncId required to download private playlists of secondary channels
fe93e2c4 490 @params response and/or ytcfg
8ea3f7b9 491 """
fe93e2c4 492 for data in args:
493 # ytcfg includes channel_syncid if on secondary channel
494 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
495 if delegated_sid:
496 return delegated_sid
497 sync_ids = (try_get(
498 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
e6f21b3d 499 lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
fe93e2c4 500 if len(sync_ids) >= 2 and sync_ids[1]:
501 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
502 # and just "user_syncid||" for primary channel. We only want the channel_syncid
503 return sync_ids[0]
a1c5d2ca 504
ac56cf38 505 @staticmethod
506 def _extract_visitor_data(*args):
507 """
508 Extracts visitorData from an API response or ytcfg
509 Appears to be used to track session state
510 """
511 return traverse_obj(
512 args, (..., ('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
513 expected_type=compat_str, get_all=False)
514
99e9e001 515 @property
516 def is_authenticated(self):
517 return bool(self._generate_sapisidhash_header())
518
11f9be09 519 def extract_ytcfg(self, video_id, webpage):
8c54a305 520 if not webpage:
521 return {}
29f7c58a 522 return self._parse_json(
523 self._search_regex(
524 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 525 default='{}'), video_id, fatal=False) or {}
526
11f9be09 527 def generate_api_headers(
99e9e001 528 self, *, ytcfg=None, account_syncid=None, session_index=None,
529 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
530
11f9be09 531 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
f4f751af 532 headers = {
109dd3b2 533 'X-YouTube-Client-Name': compat_str(
11f9be09 534 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
535 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 536 'Origin': origin,
537 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
538 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
ac56cf38 539 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
99e9e001 540 }
541 if session_index is None:
314ee305 542 session_index = self._extract_session_index(ytcfg)
543 if account_syncid or session_index is not None:
544 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 545
109dd3b2 546 auth = self._generate_sapisidhash_header(origin)
f4f751af 547 if auth is not None:
548 headers['Authorization'] = auth
109dd3b2 549 headers['X-Origin'] = origin
99e9e001 550 return {h: v for h, v in headers.items() if v is not None}
29f7c58a 551
2d6659b9 552 @staticmethod
553 def _build_api_continuation_query(continuation, ctp=None):
554 query = {
555 'continuation': continuation
556 }
557 # TODO: Inconsistency with clickTrackingParams.
558 # Currently we have a fixed ctp contained within context (from ytcfg)
559 # and a ctp in root query for continuation.
560 if ctp:
561 query['clickTracking'] = {'clickTrackingParams': ctp}
562 return query
563
2d6659b9 564 @classmethod
565 def _extract_next_continuation_data(cls, renderer):
566 next_continuation = try_get(
567 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
568 lambda x: x['continuation']['reloadContinuationData']), dict)
569 if not next_continuation:
570 return
571 continuation = next_continuation.get('continuation')
572 if not continuation:
573 return
574 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 575 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 576
577 @classmethod
578 def _extract_continuation_ep_data(cls, continuation_ep: dict):
579 if isinstance(continuation_ep, dict):
580 continuation = try_get(
581 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
582 if not continuation:
583 return
584 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 585 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 586
587 @classmethod
588 def _extract_continuation(cls, renderer):
589 next_continuation = cls._extract_next_continuation_data(renderer)
590 if next_continuation:
591 return next_continuation
fe93e2c4 592
2d6659b9 593 contents = []
594 for key in ('contents', 'items'):
595 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 596
2d6659b9 597 for content in contents:
598 if not isinstance(content, dict):
599 continue
600 continuation_ep = try_get(
601 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
602 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
603 dict)
604 continuation = cls._extract_continuation_ep_data(continuation_ep)
605 if continuation:
606 return continuation
607
fe93e2c4 608 @classmethod
609 def _extract_alerts(cls, data):
109dd3b2 610 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
611 if not isinstance(alert_dict, dict):
612 continue
613 for alert in alert_dict.values():
614 alert_type = alert.get('type')
615 if not alert_type:
616 continue
052e1350 617 message = cls._get_text(alert, 'text')
109dd3b2 618 if message:
619 yield alert_type, message
620
c0ac49bc 621 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
109dd3b2 622 errors = []
623 warnings = []
624 for alert_type, alert_message in alerts:
641ad5d8 625 if alert_type.lower() == 'error' and fatal:
109dd3b2 626 errors.append([alert_type, alert_message])
627 else:
628 warnings.append([alert_type, alert_message])
629
630 for alert_type, alert_message in (warnings + errors[:-1]):
c0ac49bc 631 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
109dd3b2 632 if errors:
633 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
634
635 def _extract_and_report_alerts(self, data, *args, **kwargs):
636 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
637
47193e02 638 def _extract_badges(self, renderer: dict):
639 badges = set()
640 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
641 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
642 if label:
643 badges.add(label.lower())
644 return badges
645
646 @staticmethod
052e1350 647 def _get_text(data, *path_list, max_runs=None):
648 for path in path_list or [None]:
649 if path is None:
650 obj = [data]
651 else:
652 obj = traverse_obj(data, path, default=[])
653 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
654 obj = [obj]
655 for item in obj:
656 text = try_get(item, lambda x: x['simpleText'], compat_str)
657 if text:
658 return text
659 runs = try_get(item, lambda x: x['runs'], list) or []
660 if not runs and isinstance(item, list):
661 runs = item
662
663 runs = runs[:min(len(runs), max_runs or len(runs))]
664 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
665 if text:
666 return text
47193e02 667
109dd3b2 668 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
669 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 670 default_client='web'):
109dd3b2 671 response = None
672 last_error = None
673 count = -1
674 retries = self.get_param('extractor_retries', 3)
675 if check_get_keys is None:
676 check_get_keys = []
677 while count < retries:
678 count += 1
679 if last_error:
c0ac49bc 680 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
109dd3b2 681 try:
682 response = self._call_api(
683 ep=ep, fatal=True, headers=headers,
684 video_id=item_id, query=query,
685 context=self._extract_context(ytcfg, default_client),
686 api_key=self._extract_api_key(ytcfg, default_client),
687 api_hostname=api_hostname, default_client=default_client,
688 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
689 except ExtractorError as e:
9c0d7f49 690 if isinstance(e.cause, network_exceptions):
641ad5d8 691 if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
692 e.cause.seek(0)
693 yt_error = try_get(
694 self._parse_json(e.cause.read().decode(), item_id, fatal=False),
695 lambda x: x['error']['message'], compat_str)
696 if yt_error:
697 self._report_alerts([('ERROR', yt_error)], fatal=False)
109dd3b2 698 # Downloading page may result in intermittent 5xx HTTP error
699 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
9c0d7f49 700 # We also want to catch all other network exceptions since errors in later pages can be troublesome
701 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
702 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
526d74ec 703 last_error = error_to_compat_str(e.cause or e.msg)
9c0d7f49 704 if count < retries:
705 continue
109dd3b2 706 if fatal:
707 raise
708 else:
709 self.report_warning(error_to_compat_str(e))
710 return
711
712 else:
109dd3b2 713 try:
ac56cf38 714 self._extract_and_report_alerts(response, only_once=True)
109dd3b2 715 except ExtractorError as e:
c0ac49bc 716 # YouTube servers may return errors we want to retry on in a 200 OK response
717 # See: https://github.com/yt-dlp/yt-dlp/issues/839
718 if 'unknown error' in e.msg.lower():
719 last_error = e.msg
720 continue
109dd3b2 721 if fatal:
722 raise
723 self.report_warning(error_to_compat_str(e))
724 return
725 if not check_get_keys or dict_get(response, check_get_keys):
726 break
727 # Youtube sometimes sends incomplete data
728 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
729 last_error = 'Incomplete data received'
730 if count >= retries:
731 if fatal:
732 raise ExtractorError(last_error)
733 else:
734 self.report_warning(last_error)
735 return
736 return response
737
9297939e 738 @staticmethod
739 def is_music_url(url):
740 return re.match(r'https?://music\.youtube\.com/', url) is not None
741
30a074c2 742 def _extract_video(self, renderer):
743 video_id = renderer.get('videoId')
052e1350 744 title = self._get_text(renderer, 'title')
745 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 746 duration = parse_duration(self._get_text(
747 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
052e1350 748 view_count_text = self._get_text(renderer, 'viewCountText') or ''
30a074c2 749 view_count = str_to_int(self._search_regex(
750 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
751 'view count', default=None))
fe93e2c4 752
052e1350 753 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
fe93e2c4 754
30a074c2 755 return {
39ed931e 756 '_type': 'url',
30a074c2 757 'ie_key': YoutubeIE.ie_key(),
758 'id': video_id,
5e3f2f8f 759 'url': f'https://www.youtube.com/watch?v={video_id}',
30a074c2 760 'title': title,
761 'description': description,
762 'duration': duration,
763 'view_count': view_count,
764 'uploader': uploader,
765 }
766
0c148415 767
360e1ca5 768class YoutubeIE(YoutubeBaseInfoExtractor):
96565c7e 769 IE_DESC = 'YouTube'
cb7dfeea 770 _VALID_URL = r"""(?x)^
c5e8d7af 771 (
edb53e2d 772 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 773 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
774 (?:www\.)?deturl\.com/www\.youtube\.com|
775 (?:www\.)?pwnyoutube\.com|
776 (?:www\.)?hooktube\.com|
777 (?:www\.)?yourepeat\.com|
778 tube\.majestyc\.net|
779 %(invidious)s|
780 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
781 (?:.*?\#/)? # handle anchor (#/) redirect urls
782 (?: # the various things that can precede the ID:
8fc54b12 783 (?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/
c5e8d7af 784 |(?: # or the v= param in all its forms
f7000f3a 785 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 786 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 787 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
788 v=
789 )
f4b05232 790 ))
cbaed4bb
S
791 |(?:
792 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
793 vid\.plus| # or vid.plus/xxxx
794 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 795 %(invidious)s
cbaed4bb 796 )/
edb53e2d 797 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 798 )
c5e8d7af 799 )? # all until now is optional -> you can pass the naked ID
201c1459 800 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 801 (?(1).+)? # if we found the ID, everything can follow
9297939e 802 (?:\#|$)""" % {
d9190e44 803 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
bc2ca1bb 804 }
e40c758c 805 _PLAYER_INFO_RE = (
cc2db878 806 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
807 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 808 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 809 )
2c62dc26 810 _formats = {
c2d3cb4c 811 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
812 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
813 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
814 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
815 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
816 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
817 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
818 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 819 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 820 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
821 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
822 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
823 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
824 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
825 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 826 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 827 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
828 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 829
830
831 # 3D videos
c2d3cb4c 832 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
833 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
834 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
835 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 836 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
837 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
838 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 839
96fb5605 840 # Apple HTTP Live Streaming
11f12195 841 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 842 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
843 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
844 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
845 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
846 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 847 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
848 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
849
850 # DASH mp4 video
d23028a8
S
851 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
852 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
853 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
854 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
855 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 856 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
857 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
858 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
859 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
860 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
861 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
862 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 863
f6f1fc92 864 # Dash mp4 audio
d23028a8
S
865 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
866 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
867 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
868 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
869 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
870 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
871 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
872
873 # Dash webm
d23028a8
S
874 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
875 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
876 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
877 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
878 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
879 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
880 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
881 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
882 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
883 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
884 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
885 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
886 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
887 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
888 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 889 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
890 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
891 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
892 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
893 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
894 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
895 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
896
897 # Dash webm audio
d23028a8
S
898 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
899 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 900
0857baad 901 # Dash webm audio with opus inside
d23028a8
S
902 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
903 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
904 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 905
ce6b9a2d
PH
906 # RTMP (unnamed)
907 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
908
909 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
910 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
911 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
912 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
913 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
914 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
915 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
916 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
917 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 918 }
29f7c58a 919 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 920
fd5c4aab
S
921 _GEO_BYPASS = False
922
78caa52a 923 IE_NAME = 'youtube'
2eb88d95
PH
924 _TESTS = [
925 {
2d3d2997 926 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
927 'info_dict': {
928 'id': 'BaW_jenozKc',
929 'ext': 'mp4',
3867038a 930 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
931 'uploader': 'Philipp Hagemeister',
932 'uploader_id': 'phihag',
ec85ded8 933 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
934 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
935 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 936 'upload_date': '20121002',
3867038a 937 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 938 'categories': ['Science & Technology'],
3867038a 939 'tags': ['youtube-dl'],
556dbe7f 940 'duration': 10,
dbdaaa23 941 'view_count': int,
3e7c1224
PH
942 'like_count': int,
943 'dislike_count': int,
7c80519c 944 'start_time': 1,
297a564b 945 'end_time': 9,
2eb88d95 946 }
0e853ca4 947 },
fccd3771 948 {
4bc3a23e
PH
949 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
950 'note': 'Embed-only video (#1746)',
951 'info_dict': {
952 'id': 'yZIXLfi8CZQ',
953 'ext': 'mp4',
954 'upload_date': '20120608',
955 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
956 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
957 'uploader': 'SET India',
94bfcd23 958 'uploader_id': 'setindia',
ec85ded8 959 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 960 'age_limit': 18,
545cc85d 961 },
962 'skip': 'Private video',
fccd3771 963 },
11b56058 964 {
8bdd16b4 965 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
966 'note': 'Use the first video ID in the URL',
967 'info_dict': {
968 'id': 'BaW_jenozKc',
969 'ext': 'mp4',
3867038a 970 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
971 'uploader': 'Philipp Hagemeister',
972 'uploader_id': 'phihag',
ec85ded8 973 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 974 'upload_date': '20121002',
3867038a 975 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 976 'categories': ['Science & Technology'],
3867038a 977 'tags': ['youtube-dl'],
556dbe7f 978 'duration': 10,
dbdaaa23 979 'view_count': int,
11b56058
PM
980 'like_count': int,
981 'dislike_count': int,
34a7de29
S
982 },
983 'params': {
984 'skip_download': True,
985 },
11b56058 986 },
dd27fd17 987 {
2d3d2997 988 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
989 'note': '256k DASH audio (format 141) via DASH manifest',
990 'info_dict': {
991 'id': 'a9LDPn-MO4I',
992 'ext': 'm4a',
993 'upload_date': '20121002',
994 'uploader_id': '8KVIDEO',
ec85ded8 995 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
996 'description': '',
997 'uploader': '8KVIDEO',
998 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 999 },
4bc3a23e
PH
1000 'params': {
1001 'youtube_include_dash_manifest': True,
1002 'format': '141',
4919603f 1003 },
de3c7fe0 1004 'skip': 'format 141 not served anymore',
dd27fd17 1005 },
8bdd16b4 1006 # DASH manifest with encrypted signature
1007 {
1008 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1009 'info_dict': {
1010 'id': 'IB3lcPjvWLA',
1011 'ext': 'm4a',
1012 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1013 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1014 'duration': 244,
1015 'uploader': 'AfrojackVEVO',
1016 'uploader_id': 'AfrojackVEVO',
1017 'upload_date': '20131011',
cc2db878 1018 'abr': 129.495,
8bdd16b4 1019 },
1020 'params': {
1021 'youtube_include_dash_manifest': True,
1022 'format': '141/bestaudio[ext=m4a]',
1023 },
1024 },
65c2fde2 1025 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1026 {
65c2fde2 1027 'note': 'Embed allowed age-gate video',
2d3d2997 1028 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1029 'info_dict': {
1030 'id': 'HtVdAasjOgU',
1031 'ext': 'mp4',
1032 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1033 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1034 'duration': 142,
c522adb1
JMF
1035 'uploader': 'The Witcher',
1036 'uploader_id': 'WitcherGame',
ec85ded8 1037 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1038 'upload_date': '20140605',
34952f09 1039 'age_limit': 18,
c522adb1
JMF
1040 },
1041 },
65c2fde2 1042 {
1043 'note': 'Age-gate video with embed allowed in public site',
1044 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1045 'info_dict': {
1046 'id': 'HsUATh_Nc2U',
1047 'ext': 'mp4',
1048 'title': 'Godzilla 2 (Official Video)',
1049 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1050 'upload_date': '20200408',
1051 'uploader_id': 'FlyingKitty900',
1052 'uploader': 'FlyingKitty',
1053 'age_limit': 18,
1054 },
1055 },
1056 {
1057 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1058 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1059 'info_dict': {
1060 'id': 'Tq92D6wQ1mg',
1061 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1062 'ext': 'mp4',
1063 'upload_date': '20191227',
65c2fde2 1064 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1065 'uploader': 'Projekt Melody',
1066 'description': 'md5:17eccca93a786d51bc67646756894066',
1067 'age_limit': 18,
1068 },
1069 },
1070 {
1071 'note': 'Non-Agegated non-embeddable video',
1072 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1073 'info_dict': {
1074 'id': 'MeJVWBSsPAY',
1075 'ext': 'mp4',
1076 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1077 'uploader': 'Herr Lurik',
1078 'uploader_id': 'st3in234',
1079 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1080 'upload_date': '20130730',
1081 },
1082 },
1083 {
1084 'note': 'Non-bypassable age-gated video',
1085 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1086 'only_matching': True,
1087 },
8bdd16b4 1088 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1089 # YouTube Red ad is not captured for creator
1090 {
1091 'url': '__2ABJjxzNo',
1092 'info_dict': {
1093 'id': '__2ABJjxzNo',
1094 'ext': 'mp4',
1095 'duration': 266,
1096 'upload_date': '20100430',
1097 'uploader_id': 'deadmau5',
1098 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1099 'creator': 'deadmau5',
1100 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1101 'uploader': 'deadmau5',
1102 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1103 'alt_title': 'Some Chords',
8bdd16b4 1104 },
1105 'expected_warnings': [
1106 'DASH manifest missing',
1107 ]
1108 },
067aa17e 1109 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1110 {
1111 'url': 'lqQg6PlCWgI',
1112 'info_dict': {
1113 'id': 'lqQg6PlCWgI',
1114 'ext': 'mp4',
556dbe7f 1115 'duration': 6085,
90227264 1116 'upload_date': '20150827',
cbe2bd91 1117 'uploader_id': 'olympic',
ec85ded8 1118 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1119 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
11f9be09 1120 'uploader': 'Olympics',
cbe2bd91
PH
1121 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1122 },
1123 'params': {
1124 'skip_download': 'requires avconv',
e52a40ab 1125 }
cbe2bd91 1126 },
6271f1ca
PH
1127 # Non-square pixels
1128 {
1129 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1130 'info_dict': {
1131 'id': '_b-2C3KPAM0',
1132 'ext': 'mp4',
1133 'stretched_ratio': 16 / 9.,
556dbe7f 1134 'duration': 85,
6271f1ca
PH
1135 'upload_date': '20110310',
1136 'uploader_id': 'AllenMeow',
ec85ded8 1137 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1138 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1139 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1140 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1141 },
06b491eb
S
1142 },
1143 # url_encoded_fmt_stream_map is empty string
1144 {
1145 'url': 'qEJwOuvDf7I',
1146 'info_dict': {
1147 'id': 'qEJwOuvDf7I',
f57b7835 1148 'ext': 'webm',
06b491eb
S
1149 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1150 'description': '',
1151 'upload_date': '20150404',
1152 'uploader_id': 'spbelect',
1153 'uploader': 'Наблюдатели Петербурга',
1154 },
1155 'params': {
1156 'skip_download': 'requires avconv',
e323cf3f
S
1157 },
1158 'skip': 'This live event has ended.',
06b491eb 1159 },
067aa17e 1160 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1161 {
1162 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1163 'info_dict': {
1164 'id': 'FIl7x6_3R5Y',
eb6793ba 1165 'ext': 'webm',
da77d856
S
1166 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1167 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1168 'duration': 220,
da77d856
S
1169 'upload_date': '20150625',
1170 'uploader_id': 'dorappi2000',
ec85ded8 1171 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1172 'uploader': 'dorappi2000',
eb6793ba 1173 'formats': 'mincount:31',
da77d856 1174 },
eb6793ba 1175 'skip': 'not actual anymore',
2ee8f5d8 1176 },
8a1a26ce
YCH
1177 # DASH manifest with segment_list
1178 {
1179 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1180 'md5': '8ce563a1d667b599d21064e982ab9e31',
1181 'info_dict': {
1182 'id': 'CsmdDsKjzN8',
1183 'ext': 'mp4',
17ee98e1 1184 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1185 'uploader': 'Airtek',
1186 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1187 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1188 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1189 },
1190 'params': {
1191 'youtube_include_dash_manifest': True,
1192 'format': '135', # bestvideo
be49068d
S
1193 },
1194 'skip': 'This live event has ended.',
2ee8f5d8 1195 },
cf7e015f
S
1196 {
1197 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1198 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1199 'info_dict': {
545cc85d 1200 'id': 'jvGDaLqkpTg',
1201 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1202 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1203 },
1204 'playlist': [{
1205 'info_dict': {
545cc85d 1206 'id': 'jvGDaLqkpTg',
cf7e015f 1207 'ext': 'mp4',
545cc85d 1208 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1209 'description': 'md5:e03b909557865076822aa169218d6a5d',
1210 'duration': 10643,
1211 'upload_date': '20161111',
1212 'uploader': 'Team PGP',
1213 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1214 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1215 },
1216 }, {
1217 'info_dict': {
545cc85d 1218 'id': '3AKt1R1aDnw',
cf7e015f 1219 'ext': 'mp4',
545cc85d 1220 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1221 'description': 'md5:e03b909557865076822aa169218d6a5d',
1222 'duration': 10991,
1223 'upload_date': '20161111',
1224 'uploader': 'Team PGP',
1225 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1226 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1227 },
1228 }, {
1229 'info_dict': {
545cc85d 1230 'id': 'RtAMM00gpVc',
cf7e015f 1231 'ext': 'mp4',
545cc85d 1232 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1233 'description': 'md5:e03b909557865076822aa169218d6a5d',
1234 'duration': 10995,
1235 'upload_date': '20161111',
1236 'uploader': 'Team PGP',
1237 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1238 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1239 },
1240 }, {
1241 'info_dict': {
545cc85d 1242 'id': '6N2fdlP3C5U',
cf7e015f 1243 'ext': 'mp4',
545cc85d 1244 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1245 'description': 'md5:e03b909557865076822aa169218d6a5d',
1246 'duration': 10990,
1247 'upload_date': '20161111',
1248 'uploader': 'Team PGP',
1249 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1250 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1251 },
1252 }],
1253 'params': {
1254 'skip_download': True,
1255 },
65c2fde2 1256 'skip': 'Not multifeed anymore',
cbaed4bb 1257 },
f9f49d87 1258 {
067aa17e 1259 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1260 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1261 'info_dict': {
1262 'id': 'gVfLd0zydlo',
1263 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1264 },
1265 'playlist_count': 2,
be49068d 1266 'skip': 'Not multifeed anymore',
f9f49d87 1267 },
cbaed4bb 1268 {
2d3d2997 1269 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1270 'only_matching': True,
0e49d9a6 1271 },
6d4fc66b 1272 {
2d3d2997 1273 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1274 'only_matching': True,
1275 },
0e49d9a6 1276 {
067aa17e 1277 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1278 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1279 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1280 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1281 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1282 'info_dict': {
1283 'id': 'lsguqyKfVQg',
1284 'ext': 'mp4',
1285 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1286 'alt_title': 'Dark Walk',
0e49d9a6 1287 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1288 'duration': 133,
0e49d9a6
LL
1289 'upload_date': '20151119',
1290 'uploader_id': 'IronSoulElf',
ec85ded8 1291 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1292 'uploader': 'IronSoulElf',
11f9be09 1293 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1294 'track': 'Dark Walk',
1295 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1296 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1297 },
1298 'params': {
1299 'skip_download': True,
1300 },
1301 },
61f92af1 1302 {
067aa17e 1303 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1304 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1305 'only_matching': True,
1306 },
313dfc45
LL
1307 {
1308 # Video with yt:stretch=17:0
1309 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1310 'info_dict': {
1311 'id': 'Q39EVAstoRM',
1312 'ext': 'mp4',
1313 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1314 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1315 'upload_date': '20151107',
1316 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1317 'uploader': 'CH GAMER DROID',
1318 },
1319 'params': {
1320 'skip_download': True,
1321 },
be49068d 1322 'skip': 'This video does not exist.',
313dfc45 1323 },
201c1459 1324 {
1325 # Video with incomplete 'yt:stretch=16:'
1326 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1327 'only_matching': True,
1328 },
7caf9830
S
1329 {
1330 # Video licensed under Creative Commons
1331 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1332 'info_dict': {
1333 'id': 'M4gD1WSo5mA',
1334 'ext': 'mp4',
1335 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1336 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1337 'duration': 721,
7caf9830
S
1338 'upload_date': '20150127',
1339 'uploader_id': 'BerkmanCenter',
ec85ded8 1340 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1341 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1342 'license': 'Creative Commons Attribution license (reuse allowed)',
1343 },
1344 'params': {
1345 'skip_download': True,
1346 },
1347 },
fd050249
S
1348 {
1349 # Channel-like uploader_url
1350 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1351 'info_dict': {
1352 'id': 'eQcmzGIKrzg',
1353 'ext': 'mp4',
1354 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1355 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1356 'duration': 4060,
fd050249 1357 'upload_date': '20151119',
eb6793ba 1358 'uploader': 'Bernie Sanders',
fd050249 1359 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1360 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1361 'license': 'Creative Commons Attribution license (reuse allowed)',
1362 },
1363 'params': {
1364 'skip_download': True,
1365 },
1366 },
040ac686
S
1367 {
1368 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1369 'only_matching': True,
7f29cf54
S
1370 },
1371 {
067aa17e 1372 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1373 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1374 'only_matching': True,
6496ccb4
S
1375 },
1376 {
1377 # Rental video preview
1378 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1379 'info_dict': {
1380 'id': 'uGpuVWrhIzE',
1381 'ext': 'mp4',
1382 'title': 'Piku - Trailer',
1383 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1384 'upload_date': '20150811',
1385 'uploader': 'FlixMatrix',
1386 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1387 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1388 'license': 'Standard YouTube License',
1389 },
1390 'params': {
1391 'skip_download': True,
1392 },
eb6793ba 1393 'skip': 'This video is not available.',
022a5d66 1394 },
12afdc2a
S
1395 {
1396 # YouTube Red video with episode data
1397 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1398 'info_dict': {
1399 'id': 'iqKdEhx-dD4',
1400 'ext': 'mp4',
1401 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1402 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1403 'duration': 2085,
12afdc2a
S
1404 'upload_date': '20170118',
1405 'uploader': 'Vsauce',
1406 'uploader_id': 'Vsauce',
1407 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1408 'series': 'Mind Field',
1409 'season_number': 1,
1410 'episode_number': 1,
1411 },
1412 'params': {
1413 'skip_download': True,
1414 },
1415 'expected_warnings': [
1416 'Skipping DASH manifest',
1417 ],
1418 },
c7121fa7
S
1419 {
1420 # The following content has been identified by the YouTube community
1421 # as inappropriate or offensive to some audiences.
1422 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1423 'info_dict': {
1424 'id': '6SJNVb0GnPI',
1425 'ext': 'mp4',
1426 'title': 'Race Differences in Intelligence',
1427 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1428 'duration': 965,
1429 'upload_date': '20140124',
1430 'uploader': 'New Century Foundation',
1431 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1432 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1433 },
1434 'params': {
1435 'skip_download': True,
1436 },
545cc85d 1437 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1438 },
022a5d66
S
1439 {
1440 # itag 212
1441 'url': '1t24XAntNCY',
1442 'only_matching': True,
fd5c4aab
S
1443 },
1444 {
1445 # geo restricted to JP
1446 'url': 'sJL6WA-aGkQ',
1447 'only_matching': True,
1448 },
cd5a74a2
S
1449 {
1450 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1451 'only_matching': True,
1452 },
bc2ca1bb 1453 {
1454 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1455 'only_matching': True,
1456 },
1457 {
1458 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1459 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1460 'only_matching': True,
1461 },
825cd268
RA
1462 {
1463 # DRM protected
1464 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1465 'only_matching': True,
4fe54c12
S
1466 },
1467 {
1468 # Video with unsupported adaptive stream type formats
1469 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1470 'info_dict': {
1471 'id': 'Z4Vy8R84T1U',
1472 'ext': 'mp4',
1473 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1474 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1475 'duration': 433,
1476 'upload_date': '20130923',
1477 'uploader': 'Amelia Putri Harwita',
1478 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1479 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1480 'formats': 'maxcount:10',
1481 },
1482 'params': {
1483 'skip_download': True,
1484 'youtube_include_dash_manifest': False,
1485 },
5429d6a9 1486 'skip': 'not actual anymore',
5caabd3c 1487 },
1488 {
822b9d9c 1489 # Youtube Music Auto-generated description
5caabd3c 1490 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1491 'info_dict': {
1492 'id': 'MgNrAu2pzNs',
1493 'ext': 'mp4',
1494 'title': 'Voyeur Girl',
1495 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1496 'upload_date': '20190312',
5429d6a9
S
1497 'uploader': 'Stephen - Topic',
1498 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1499 'artist': 'Stephen',
1500 'track': 'Voyeur Girl',
1501 'album': 'it\'s too much love to know my dear',
1502 'release_date': '20190313',
1503 'release_year': 2019,
1504 },
1505 'params': {
1506 'skip_download': True,
1507 },
1508 },
66b48727
RA
1509 {
1510 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1511 'only_matching': True,
1512 },
011e75e6
S
1513 {
1514 # invalid -> valid video id redirection
1515 'url': 'DJztXj2GPfl',
1516 'info_dict': {
1517 'id': 'DJztXj2GPfk',
1518 'ext': 'mp4',
1519 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1520 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1521 'upload_date': '20090125',
1522 'uploader': 'Prochorowka',
1523 'uploader_id': 'Prochorowka',
1524 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1525 'artist': 'Panjabi MC',
1526 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1527 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1528 },
1529 'params': {
1530 'skip_download': True,
1531 },
545cc85d 1532 'skip': 'Video unavailable',
ea74e00b
DP
1533 },
1534 {
1535 # empty description results in an empty string
1536 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1537 'info_dict': {
1538 'id': 'x41yOUIvK2k',
1539 'ext': 'mp4',
1540 'title': 'IMG 3456',
1541 'description': '',
1542 'upload_date': '20170613',
1543 'uploader_id': 'ElevageOrVert',
1544 'uploader': 'ElevageOrVert',
1545 },
1546 'params': {
1547 'skip_download': True,
1548 },
1549 },
a0566bbf 1550 {
29f7c58a 1551 # with '};' inside yt initial data (see [1])
1552 # see [2] for an example with '};' inside ytInitialPlayerResponse
1553 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1554 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1555 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1556 'info_dict': {
1557 'id': 'CHqg6qOn4no',
1558 'ext': 'mp4',
1559 'title': 'Part 77 Sort a list of simple types in c#',
1560 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1561 'upload_date': '20130831',
1562 'uploader_id': 'kudvenkat',
1563 'uploader': 'kudvenkat',
1564 },
1565 'params': {
1566 'skip_download': True,
1567 },
1568 },
29f7c58a 1569 {
1570 # another example of '};' in ytInitialData
1571 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1572 'only_matching': True,
1573 },
1574 {
1575 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1576 'only_matching': True,
1577 },
545cc85d 1578 {
cc2db878 1579 # https://github.com/ytdl-org/youtube-dl/pull/28094
1580 'url': 'OtqTfy26tG0',
1581 'info_dict': {
1582 'id': 'OtqTfy26tG0',
1583 'ext': 'mp4',
1584 'title': 'Burn Out',
1585 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1586 'upload_date': '20141120',
1587 'uploader': 'The Cinematic Orchestra - Topic',
1588 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1589 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1590 'artist': 'The Cinematic Orchestra',
1591 'track': 'Burn Out',
1592 'album': 'Every Day',
1593 'release_data': None,
1594 'release_year': None,
1595 },
1596 'params': {
1597 'skip_download': True,
1598 },
545cc85d 1599 },
bc2ca1bb 1600 {
1601 # controversial video, only works with bpctr when authenticated with cookies
1602 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1603 'only_matching': True,
1604 },
a1a7907b 1605 {
1606 # controversial video, requires bpctr/contentCheckOk
1607 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1608 'info_dict': {
1609 'id': 'SZJvDhaSDnc',
1610 'ext': 'mp4',
1611 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1612 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1613 'uploader': 'CBS This Morning',
11f9be09 1614 'uploader_id': 'CBSThisMorning',
a1a7907b 1615 'upload_date': '20140716',
1616 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1617 }
1618 },
f7ad7160 1619 {
1620 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1621 'url': 'cBvYw8_A0vQ',
1622 'info_dict': {
1623 'id': 'cBvYw8_A0vQ',
1624 'ext': 'mp4',
1625 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1626 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1627 'upload_date': '20201120',
1628 'uploader': 'Walk around Japan',
1629 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1630 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1631 },
1632 'params': {
1633 'skip_download': True,
1634 },
0fb983f6 1635 }, {
1636 # Has multiple audio streams
1637 'url': 'WaOKSUlf4TM',
1638 'only_matching': True
9297939e 1639 }, {
1640 # Requires Premium: has format 141 when requested using YTM url
1641 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1642 'only_matching': True
1643 }, {
120916da 1644 # multiple subtitles with same lang_code
1645 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1646 'only_matching': True,
109dd3b2 1647 }, {
1648 # Force use android client fallback
1649 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1650 'info_dict': {
1651 'id': 'YOelRv7fMxY',
11f9be09 1652 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 1653 'ext': '3gp',
1654 'upload_date': '20210624',
1655 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1656 'uploader': 'colinfurze',
11f9be09 1657 'uploader_id': 'colinfurze',
109dd3b2 1658 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
11f9be09 1659 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
109dd3b2 1660 },
1661 'params': {
1662 'format': '17', # 3gp format available on android
1663 'extractor_args': {'youtube': {'player_client': ['android']}},
1664 },
120916da 1665 },
109dd3b2 1666 {
1667 # Skip download of additional client configs (remix client config in this case)
1668 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1669 'only_matching': True,
1670 'params': {
1671 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1672 },
8fc54b12 1673 }, {
1674 # shorts
1675 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1676 'only_matching': True,
1677 },
2eb88d95
PH
1678 ]
1679
201c1459 1680 @classmethod
1681 def suitable(cls, url):
4dfbf869 1682 from ..utils import parse_qs
1683
201c1459 1684 qs = parse_qs(url)
1685 if qs.get('list', [None])[0]:
1686 return False
1687 return super(YoutubeIE, cls).suitable(url)
1688
e0df6211
PH
1689 def __init__(self, *args, **kwargs):
1690 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1691 self._code_cache = {}
83799698 1692 self._player_cache = {}
e0df6211 1693
b6de707d 1694 def _extract_player_url(self, *ytcfgs, webpage=None):
1695 player_url = traverse_obj(
1696 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1697 get_all=False, expected_type=compat_str)
11f9be09 1698 if not player_url:
b6de707d 1699 return
109dd3b2 1700 if player_url.startswith('//'):
1701 player_url = 'https:' + player_url
1702 elif not re.match(r'https?://', player_url):
1703 player_url = compat_urlparse.urljoin(
1704 'https://www.youtube.com', player_url)
1705 return player_url
1706
b6de707d 1707 def _download_player_url(self, video_id, fatal=False):
1708 res = self._download_webpage(
1709 'https://www.youtube.com/iframe_api',
1710 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1711 if res:
1712 player_version = self._search_regex(
1713 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1714 if player_version:
1715 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1716
60064c53
PH
1717 def _signature_cache_id(self, example_sig):
1718 """ Return a string representation of a signature """
78caa52a 1719 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1720
e40c758c
S
1721 @classmethod
1722 def _extract_player_info(cls, player_url):
1723 for player_re in cls._PLAYER_INFO_RE:
1724 id_m = re.search(player_re, player_url)
1725 if id_m:
1726 break
1727 else:
c081b35c 1728 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1729 return id_m.group('id')
e40c758c 1730
404f611f 1731 def _load_player(self, video_id, player_url, fatal=True):
109dd3b2 1732 player_id = self._extract_player_info(player_url)
1733 if player_id not in self._code_cache:
1276a43a 1734 code = self._download_webpage(
109dd3b2 1735 player_url, video_id, fatal=fatal,
1736 note='Downloading player ' + player_id,
1737 errnote='Download of %s failed' % player_url)
1276a43a 1738 if code:
1739 self._code_cache[player_id] = code
404f611f 1740 return self._code_cache.get(player_id)
109dd3b2 1741
e40c758c 1742 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1743 player_id = self._extract_player_info(player_url)
e0df6211 1744
c4417ddb 1745 # Read from filesystem cache
545cc85d 1746 func_id = 'js_%s_%s' % (
1747 player_id, self._signature_cache_id(example_sig))
c4417ddb 1748 assert os.path.basename(func_id) == func_id
a0e07d31 1749
69ea8ca4 1750 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1751 if cache_spec is not None:
78caa52a 1752 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1753
404f611f 1754 code = self._load_player(video_id, player_url)
1755 if code:
109dd3b2 1756 res = self._parse_sig_js(code)
e0df6211 1757
109dd3b2 1758 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1759 cache_res = res(test_string)
1760 cache_spec = [ord(c) for c in cache_res]
83799698 1761
109dd3b2 1762 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1763 return res
83799698 1764
60064c53 1765 def _print_sig_code(self, func, example_sig):
404f611f 1766 if not self.get_param('youtube_print_sig_code'):
1767 return
1768
edf3e38e
PH
1769 def gen_sig_code(idxs):
1770 def _genslice(start, end, step):
78caa52a 1771 starts = '' if start == 0 else str(start)
8bcc8756 1772 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1773 steps = '' if step == 1 else (':%d' % step)
78caa52a 1774 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1775
1776 step = None
7af808a5
PH
1777 # Quelch pyflakes warnings - start will be set when step is set
1778 start = '(Never used)'
edf3e38e
PH
1779 for i, prev in zip(idxs[1:], idxs[:-1]):
1780 if step is not None:
1781 if i - prev == step:
1782 continue
1783 yield _genslice(start, prev, step)
1784 step = None
1785 continue
1786 if i - prev in [-1, 1]:
1787 step = i - prev
1788 start = prev
1789 continue
1790 else:
78caa52a 1791 yield 's[%d]' % prev
edf3e38e 1792 if step is None:
78caa52a 1793 yield 's[%d]' % i
edf3e38e
PH
1794 else:
1795 yield _genslice(start, i, step)
1796
78caa52a 1797 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1798 cache_res = func(test_string)
edf3e38e 1799 cache_spec = [ord(c) for c in cache_res]
78caa52a 1800 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1801 signature_id_tuple = '(%s)' % (
1802 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1803 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1804 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1805 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1806
e0df6211
PH
1807 def _parse_sig_js(self, jscode):
1808 funcname = self._search_regex(
abefc03f
S
1809 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1810 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
1811 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1812 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1813 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1814 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1815 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1816 # Obsolete patterns
1817 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1818 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1819 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1820 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1821 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1822 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1823 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1824 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1825 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1826
1827 jsi = JSInterpreter(jscode)
1828 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1829 return lambda s: initial_function([s])
1830
545cc85d 1831 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1832 """Turn the encrypted s field into a working signature"""
6b37f0be 1833
c8bf86d5 1834 if player_url is None:
69ea8ca4 1835 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1836
c8bf86d5 1837 try:
62af3a0e 1838 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1839 if player_id not in self._player_cache:
1840 func = self._extract_signature_function(
60064c53 1841 video_id, player_url, s
c8bf86d5
PH
1842 )
1843 self._player_cache[player_id] = func
1844 func = self._player_cache[player_id]
404f611f 1845 self._print_sig_code(func, s)
c8bf86d5
PH
1846 return func(s)
1847 except Exception as e:
404f611f 1848 raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
1849
1850 def _decrypt_nsig(self, s, video_id, player_url):
1851 """Turn the encrypted n field into a working signature"""
1852 if player_url is None:
1853 raise ExtractorError('Cannot decrypt nsig without player_url')
1854 if player_url.startswith('//'):
1855 player_url = 'https:' + player_url
1856 elif not re.match(r'https?://', player_url):
1857 player_url = compat_urlparse.urljoin(
1858 'https://www.youtube.com', player_url)
1859
1860 sig_id = ('nsig_value', s)
1861 if sig_id in self._player_cache:
1862 return self._player_cache[sig_id]
1863
1864 try:
1865 player_id = ('nsig', player_url)
1866 if player_id not in self._player_cache:
1867 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
1868 func = self._player_cache[player_id]
1869 self._player_cache[sig_id] = func(s)
1870 self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
1871 return self._player_cache[sig_id]
1872 except Exception as e:
aa9369a2 1873 raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
404f611f 1874
1875 def _extract_n_function_name(self, jscode):
1876 return self._search_regex(
1877 (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
1878 jscode, 'Initial JS player n function name', group='nfunc')
1879
1880 def _extract_n_function(self, video_id, player_url):
1881 player_id = self._extract_player_info(player_url)
1882 func_code = self._downloader.cache.load('youtube-nsig', player_id)
1883
1884 if func_code:
1885 jsi = JSInterpreter(func_code)
1886 else:
1887 jscode = self._load_player(video_id, player_url)
1888 funcname = self._extract_n_function_name(jscode)
1889 jsi = JSInterpreter(jscode)
1890 func_code = jsi.extract_function_code(funcname)
1891 self._downloader.cache.store('youtube-nsig', player_id, func_code)
1892
1893 if self.get_param('youtube_print_sig_code'):
1894 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
1895
1896 return lambda s: jsi.extract_function_from_code(*func_code)([s])
e0df6211 1897
109dd3b2 1898 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1899 """
1900 Extract signatureTimestamp (sts)
1901 Required to tell API what sig/player version is in use.
1902 """
1903 sts = None
1904 if isinstance(ytcfg, dict):
1905 sts = int_or_none(ytcfg.get('STS'))
1906
1907 if not sts:
1908 # Attempt to extract from player
1909 if player_url is None:
1910 error_msg = 'Cannot extract signature timestamp without player_url.'
1911 if fatal:
1912 raise ExtractorError(error_msg)
1913 self.report_warning(error_msg)
1914 return
404f611f 1915 code = self._load_player(video_id, player_url, fatal=fatal)
1916 if code:
109dd3b2 1917 sts = int_or_none(self._search_regex(
1918 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1919 'JS player signature timestamp', group='sts', fatal=fatal))
1920 return sts
1921
11f9be09 1922 def _mark_watched(self, video_id, player_responses):
352d63fd 1923 playback_url = traverse_obj(
1924 player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1925 expected_type=url_or_none, get_all=False)
d77ab8e2 1926 if not playback_url:
352d63fd 1927 self.report_warning('Unable to mark watched')
d77ab8e2
S
1928 return
1929 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1930 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1931
1932 # cpn generation algorithm is reverse engineered from base.js.
1933 # In fact it works even with dummy cpn.
1934 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1935 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1936
1937 qs.update({
1938 'ver': ['2'],
1939 'cpn': [cpn],
1940 })
1941 playback_url = compat_urlparse.urlunparse(
15707c7e 1942 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1943
1944 self._download_webpage(
1945 playback_url, video_id, 'Marking watched',
1946 'Unable to mark watched', fatal=False)
1947
66c9fa36
S
1948 @staticmethod
1949 def _extract_urls(webpage):
1950 # Embedded YouTube player
1951 entries = [
1952 unescapeHTML(mobj.group('url'))
1953 for mobj in re.finditer(r'''(?x)
1954 (?:
1955 <iframe[^>]+?src=|
1956 data-video-url=|
1957 <embed[^>]+?src=|
1958 embedSWF\(?:\s*|
1959 <object[^>]+data=|
1960 new\s+SWFObject\(
1961 )
1962 (["\'])
1963 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1964 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1965 \1''', webpage)]
1966
1967 # lazyYT YouTube embed
1968 entries.extend(list(map(
1969 unescapeHTML,
1970 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1971
1972 # Wordpress "YouTube Video Importer" plugin
1973 matches = re.findall(r'''(?x)<div[^>]+
1974 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1975 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1976 entries.extend(m[-1] for m in matches)
1977
1978 return entries
1979
1980 @staticmethod
1981 def _extract_url(webpage):
1982 urls = YoutubeIE._extract_urls(webpage)
1983 return urls[0] if urls else None
1984
97665381
PH
1985 @classmethod
1986 def extract_id(cls, url):
1987 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1988 if mobj is None:
69ea8ca4 1989 raise ExtractorError('Invalid URL: %s' % url)
5ad28e7f 1990 return mobj.group('id')
c5e8d7af 1991
7c365c21 1992 def _extract_chapters_from_json(self, data, duration):
1993 chapter_list = traverse_obj(
1994 data, (
1995 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
1996 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
1997 ), expected_type=list)
1998
1999 return self._extract_chapters(
2000 chapter_list,
2001 chapter_time=lambda chapter: float_or_none(
2002 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2003 chapter_title=lambda chapter: traverse_obj(
2004 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2005 duration=duration)
2006
2007 def _extract_chapters_from_engagement_panel(self, data, duration):
2008 content_list = traverse_obj(
8bdd16b4 2009 data,
7c365c21 2010 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2011 expected_type=list, default=[])
052e1350 2012 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2013 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 2014
2015 return next((
2016 filter(None, (
2017 self._extract_chapters(
2018 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2019 chapter_time, chapter_title, duration)
2020 for contents in content_list
2021 ))), [])
2022
2023 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 2024 chapters = []
7c365c21 2025 last_chapter = {'start_time': 0}
2026 for idx, chapter in enumerate(chapter_list or []):
2027 title = chapter_title(chapter)
84213ea8
S
2028 start_time = chapter_time(chapter)
2029 if start_time is None:
2030 continue
7c365c21 2031 last_chapter['end_time'] = start_time
2032 if start_time < last_chapter['start_time']:
2033 if idx == 1:
2034 chapters.pop()
2035 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2036 else:
2037 self.report_warning(f'Invalid start time for chapter "{title}"')
2038 continue
2039 last_chapter = {'start_time': start_time, 'title': title}
2040 chapters.append(last_chapter)
2041 last_chapter['end_time'] = duration
84213ea8
S
2042 return chapters
2043
545cc85d 2044 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2045 return self._parse_json(self._search_regex(
2046 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2047 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 2048
d92f5d5a 2049 @staticmethod
2050 def parse_time_text(time_text):
2051 """
2052 Parse the comment time text
2053 time_text is in the format 'X units ago (edited)'
2054 """
2055 time_text_split = time_text.split(' ')
2056 if len(time_text_split) >= 3:
da503b7a 2057 try:
2058 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2059 except ValueError:
2060 return None
d92f5d5a 2061
a1c5d2ca
M
2062 def _extract_comment(self, comment_renderer, parent=None):
2063 comment_id = comment_renderer.get('commentId')
2064 if not comment_id:
2065 return
fe93e2c4 2066
052e1350 2067 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 2068
49bd8c66 2069 # note: timestamp is an estimate calculated from the current time and time_text
052e1350 2070 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
fe93e2c4 2071 time_text_dt = self.parse_time_text(time_text)
2072 if isinstance(time_text_dt, datetime.datetime):
2073 timestamp = calendar.timegm(time_text_dt.timetuple())
052e1350 2074 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca
M
2075 author_id = try_get(comment_renderer,
2076 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2077
49bd8c66 2078 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2079 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2080 author_thumbnail = try_get(comment_renderer,
2081 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2082
2083 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2084 is_favorited = 'creatorHeart' in (try_get(
2085 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2086 return {
2087 'id': comment_id,
2088 'text': text,
d92f5d5a 2089 'timestamp': timestamp,
a1c5d2ca
M
2090 'time_text': time_text,
2091 'like_count': votes,
97524332 2092 'is_favorited': is_favorited,
a1c5d2ca
M
2093 'author': author,
2094 'author_id': author_id,
2095 'author_thumbnail': author_thumbnail,
2096 'author_is_uploader': author_is_uploader,
2097 'parent': parent or 'root'
2098 }
2099
99e9e001 2100 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, comment_counts=None):
2d6659b9 2101
2102 def extract_header(contents):
2d6659b9 2103 _continuation = None
2104 for content in contents:
2105 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
fe93e2c4 2106 expected_comment_count = parse_count(self._get_text(
052e1350 2107 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
fe93e2c4 2108
2d6659b9 2109 if expected_comment_count:
fe93e2c4 2110 comment_counts[1] = expected_comment_count
2111 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2d6659b9 2112 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2113 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2114
2115 sort_menu_item = try_get(
2116 comments_header_renderer,
2117 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2118 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2119
2120 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2121 if not _continuation:
2122 continue
2123
2124 sort_text = sort_menu_item.get('title')
2125 if isinstance(sort_text, compat_str):
2126 sort_text = sort_text.lower()
2127 else:
2128 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2129 self.to_screen('Sorting comments by %s' % sort_text)
2130 break
a2160aa4 2131 return _continuation
a1c5d2ca 2132
2d6659b9 2133 def extract_thread(contents):
a1c5d2ca
M
2134 if not parent:
2135 comment_counts[2] = 0
2136 for content in contents:
2137 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2138 comment_renderer = try_get(
2139 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2140 content, (lambda x: x['commentRenderer'], dict))
2141
2142 if not comment_renderer:
2143 continue
2144 comment = self._extract_comment(comment_renderer, parent)
2145 if not comment:
2146 continue
2147 comment_counts[0] += 1
2148 yield comment
2149 # Attempt to get the replies
2150 comment_replies_renderer = try_get(
2151 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2152
2153 if comment_replies_renderer:
2154 comment_counts[2] += 1
2155 comment_entries_iter = self._comment_entries(
99e9e001 2156 comment_replies_renderer, ytcfg, video_id,
2157 parent=comment.get('id'), comment_counts=comment_counts)
a1c5d2ca
M
2158
2159 for reply_comment in comment_entries_iter:
2160 yield reply_comment
2161
2d6659b9 2162 # YouTube comments have a max depth of 2
2163 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2164 if max_depth == 1 and parent:
2165 return
a1c5d2ca
M
2166 if not comment_counts:
2167 # comment so far, est. total comments, current comment thread #
2168 comment_counts = [0, 0, 0]
a1c5d2ca 2169
2d6659b9 2170 continuation = self._extract_continuation(root_continuation_data)
fe93e2c4 2171 if continuation and len(continuation['continuation']) < 27:
2d6659b9 2172 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2173 continuation_token = self._generate_comment_continuation(video_id)
fe93e2c4 2174 continuation = self._build_api_continuation_query(continuation_token, None)
2d6659b9 2175
aae16f6e 2176 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2177 if message and not parent:
2178 self.report_warning(message, video_id=video_id)
2179
2d6659b9 2180 visitor_data = None
2181 is_first_continuation = parent is None
a1c5d2ca
M
2182
2183 for page_num in itertools.count(0):
2184 if not continuation:
2185 break
99e9e001 2186 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
2d6659b9 2187 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2188 if page_num == 0:
2189 if is_first_continuation:
2190 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2191 else:
2d6659b9 2192 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2193 comment_counts[2], comment_prog_str)
2194 else:
2195 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2196 ' ' if parent else '', ' replies' if parent else '',
2197 page_num, comment_prog_str)
2198
2199 response = self._extract_response(
fe93e2c4 2200 item_id=None, query=continuation,
2d6659b9 2201 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2202 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca
M
2203 if not response:
2204 break
f4f751af 2205 visitor_data = try_get(
2206 response,
2207 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2208 compat_str) or visitor_data
a1c5d2ca 2209
2d6659b9 2210 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca 2211
2d6659b9 2212 continuation = None
2213 if isinstance(continuation_contents, list):
2214 for continuation_section in continuation_contents:
2215 if not isinstance(continuation_section, dict):
2216 continue
2217 continuation_items = try_get(
2218 continuation_section,
2219 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2220 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2221 list) or []
2222 if is_first_continuation:
a2160aa4 2223 continuation = extract_header(continuation_items)
2d6659b9 2224 is_first_continuation = False
2225 if continuation:
2226 break
2227 continue
2228 count = 0
2229 for count, entry in enumerate(extract_thread(continuation_items)):
2230 yield entry
2231 continuation = self._extract_continuation({'contents': continuation_items})
2232 if continuation:
2233 # Sometimes YouTube provides a continuation without any comments
2234 # In most cases we end up just downloading these with very little comments to come.
2235 if count == 0:
2236 if not parent:
2237 self.report_warning('No comments received - assuming end of comments')
2238 continuation = None
a1c5d2ca
M
2239 break
2240
2d6659b9 2241 # Deprecated response structure
2242 elif isinstance(continuation_contents, dict):
2243 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2244 for key, continuation_renderer in continuation_contents.items():
2245 if key not in known_continuation_renderers:
2246 continue
2247 if not isinstance(continuation_renderer, dict):
2248 continue
2249 if is_first_continuation:
2250 header_continuation_items = [continuation_renderer.get('header') or {}]
a2160aa4 2251 continuation = extract_header(header_continuation_items)
2d6659b9 2252 is_first_continuation = False
2253 if continuation:
2254 break
a1c5d2ca 2255
2d6659b9 2256 # Sometimes YouTube provides a continuation without any comments
2257 # In most cases we end up just downloading these with very little comments to come.
2258 count = 0
2259 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2260 yield entry
2261 continuation = self._extract_continuation(continuation_renderer)
2262 if count == 0:
2263 if not parent:
2264 self.report_warning('No comments received - assuming end of comments')
2265 continuation = None
2266 break
a1c5d2ca 2267
2d6659b9 2268 @staticmethod
2269 def _generate_comment_continuation(video_id):
2270 """
2271 Generates initial comment section continuation token from given video id
2272 """
2273 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2274 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2275 new_continuation_intlist = list(itertools.chain.from_iterable(
2276 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2277 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2278
a2160aa4 2279 def _get_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2280 """Entry for comment extraction"""
2d6659b9 2281 def _real_comment_extract(contents):
aae16f6e 2282 renderer = next((
2283 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2284 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2285 yield from self._comment_entries(renderer, ytcfg, video_id)
99e9e001 2286
a2160aa4 2287 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
65524694 2288 # Force English regardless of account setting to prevent parsing issues
2289 # See: https://github.com/yt-dlp/yt-dlp/issues/532
2290 ytcfg = copy.deepcopy(ytcfg)
2291 traverse_obj(
2292 ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
a2160aa4 2293 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
a1c5d2ca 2294
109dd3b2 2295 @staticmethod
99e9e001 2296 def _get_checkok_params():
2297 return {'contentCheckOk': True, 'racyCheckOk': True}
2298
2299 @classmethod
2300 def _generate_player_context(cls, sts=None):
109dd3b2 2301 context = {
2302 'html5Preference': 'HTML5_PREF_WANTS',
2303 }
2304 if sts is not None:
2305 context['signatureTimestamp'] = sts
2306 return {
2307 'playbackContext': {
2308 'contentPlaybackContext': context
a1a7907b 2309 },
99e9e001 2310 **cls._get_checkok_params()
109dd3b2 2311 }
2312
e7e94f2a
D
2313 @staticmethod
2314 def _is_agegated(player_response):
2315 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 2316 return True
e7e94f2a
D
2317
2318 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2319 AGE_GATE_REASONS = (
2320 'confirm your age', 'age-restricted', 'inappropriate', # reason
2321 'age_verification_required', 'age_check_required', # status
2322 )
2323 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2324
2325 @staticmethod
2326 def _is_unplayable(player_response):
2327 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 2328
99e9e001 2329 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
109dd3b2 2330
11f9be09 2331 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2332 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 2333 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 2334 headers = self.generate_api_headers(
99e9e001 2335 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 2336
11f9be09 2337 yt_query = {'videoId': video_id}
2338 yt_query.update(self._generate_player_context(sts))
2339 return self._extract_response(
2340 item_id=video_id, ep='player', query=yt_query,
379e44ed 2341 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 2342 default_client=client,
11f9be09 2343 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2344 ) or None
2345
11f9be09 2346 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 2347 requested_clients = []
d0d012d4 2348 default = ['android', 'web']
000c15a4 2349 allowed_clients = sorted(
2350 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2351 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 2352 for client in self._configuration_arg('player_client'):
2353 if client in allowed_clients:
2354 requested_clients.append(client)
d0d012d4 2355 elif client == 'default':
2356 requested_clients.extend(default)
b4c055ba 2357 elif client == 'all':
2358 requested_clients.extend(allowed_clients)
2359 else:
2360 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 2361 if not requested_clients:
d0d012d4 2362 requested_clients = default
cf7e015f 2363
11f9be09 2364 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2365 requested_clients.extend(
e7e94f2a 2366 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 2367
11f9be09 2368 return orderedSet(requested_clients)
cf7e015f 2369
c0bc527b
M
2370 def _extract_player_ytcfg(self, client, video_id):
2371 url = {
2372 'web_music': 'https://music.youtube.com',
2373 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2374 }.get(client)
2375 if not url:
2376 return {}
2377 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2378 return self.extract_ytcfg(video_id, webpage) or {}
2379
99e9e001 2380 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
11f9be09 2381 initial_pr = None
2382 if webpage:
2383 initial_pr = self._extract_yt_initial_variable(
2384 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2385 video_id, 'initial player response')
6b09401b 2386
c0bc527b
M
2387 original_clients = clients
2388 clients = clients[::-1]
b6de707d 2389 prs = []
e7e94f2a
D
2390
2391 def append_client(client_name):
2392 if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2393 clients.append(client_name)
2394
379e44ed 2395 # Android player_response does not have microFormats which are needed for
2396 # extraction of some data. So we return the initial_pr with formats
2397 # stripped out even if not requested by the user
2398 # See: https://github.com/yt-dlp/yt-dlp/issues/501
379e44ed 2399 if initial_pr:
2400 pr = dict(initial_pr)
2401 pr['streamingData'] = None
b6de707d 2402 prs.append(pr)
379e44ed 2403
2404 last_error = None
b6de707d 2405 tried_iframe_fallback = False
2406 player_url = None
c0bc527b
M
2407 while clients:
2408 client = clients.pop()
11f9be09 2409 player_ytcfg = master_ytcfg if client == 'web' else {}
c0bc527b
M
2410 if 'configs' not in self._configuration_arg('player_skip'):
2411 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
c0bc527b 2412
b6de707d 2413 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2414 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2415 if 'js' in self._configuration_arg('player_skip'):
2416 require_js_player = False
2417 player_url = None
2418
2419 if not player_url and not tried_iframe_fallback and require_js_player:
2420 player_url = self._download_player_url(video_id)
2421 tried_iframe_fallback = True
2422
379e44ed 2423 try:
2424 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
99e9e001 2425 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
379e44ed 2426 except ExtractorError as e:
2427 if last_error:
2428 self.report_warning(last_error)
2429 last_error = e
2430 continue
2431
11f9be09 2432 if pr:
b6de707d 2433 prs.append(pr)
c0bc527b 2434
e7e94f2a 2435 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
99e9e001 2436 if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
e7e94f2a
D
2437 append_client(client.replace('_agegate', '_creator'))
2438 elif self._is_agegated(pr):
2439 append_client(f'{client}_agegate')
c0bc527b 2440
379e44ed 2441 if last_error:
b6de707d 2442 if not len(prs):
379e44ed 2443 raise last_error
2444 self.report_warning(last_error)
b6de707d 2445 return prs, player_url
11f9be09 2446
2447 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
a0bb6ce5 2448 itags, stream_ids = {}, []
2a9c6dcd 2449 itag_qualities, res_qualities = {}, {}
d3fc8074 2450 q = qualities([
2a9c6dcd 2451 # Normally tiny is the smallest video-only formats. But
2452 # audio-only formats with unknown quality may get tagged as tiny
2453 'tiny',
2454 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 2455 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2456 ])
11f9be09 2457 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 2458
545cc85d 2459 for fmt in streaming_formats:
2460 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2461 continue
321bf820 2462
cc2db878 2463 itag = str_or_none(fmt.get('itag'))
9297939e 2464 audio_track = fmt.get('audioTrack') or {}
2465 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2466 if stream_id in stream_ids:
2467 continue
2468
cc2db878 2469 quality = fmt.get('quality')
2a9c6dcd 2470 height = int_or_none(fmt.get('height'))
d3fc8074 2471 if quality == 'tiny' or not quality:
2472 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 2473 # The 3gp format (17) in android client has a quality of "small",
2474 # but is actually worse than other formats
2475 if itag == '17':
2476 quality = 'tiny'
2477 if quality:
2478 if itag:
2479 itag_qualities[itag] = quality
2480 if height:
2481 res_qualities[height] = quality
cc2db878 2482 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2483 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2484 # number of fragment that would subsequently requested with (`&sq=N`)
2485 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2486 continue
2487
545cc85d 2488 fmt_url = fmt.get('url')
2489 if not fmt_url:
2490 sc = compat_parse_qs(fmt.get('signatureCipher'))
2491 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2492 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2493 if not (sc and fmt_url and encrypted_sig):
2494 continue
545cc85d 2495 if not player_url:
201e9eaa 2496 continue
545cc85d 2497 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2498 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2499 fmt_url += '&' + sp + '=' + signature
2500
404f611f 2501 query = parse_qs(fmt_url)
2502 throttled = False
2503 if query.get('ratebypass') != ['yes'] and query.get('n'):
2504 try:
2505 fmt_url = update_url_query(fmt_url, {
2506 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
2507 except ExtractorError as e:
aa9369a2 2508 self.report_warning(
2509 f'nsig extraction failed: You may experience throttling for some formats\n'
2510 f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
404f611f 2511 throttled = True
2512
545cc85d 2513 if itag:
a0bb6ce5 2514 itags[itag] = 'https'
9297939e 2515 stream_ids.append(stream_id)
2516
cc2db878 2517 tbr = float_or_none(
2518 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2519 dct = {
2520 'asr': int_or_none(fmt.get('audioSampleRate')),
2521 'filesize': int_or_none(fmt.get('contentLength')),
2522 'format_id': itag,
34921b43 2523 'format_note': join_nonempty(
26e8e044 2524 '%s%s' % (audio_track.get('displayName') or '',
2525 ' (default)' if audio_track.get('audioIsDefault') else ''),
404f611f 2526 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
34921b43 2527 throttled and 'THROTTLED', delim=', '),
c18d4482 2528 'source_preference': -10 if throttled else -1,
a4211baf 2529 'fps': int_or_none(fmt.get('fps')) or None,
2a9c6dcd 2530 'height': height,
dca3ff4a 2531 'quality': q(quality),
cc2db878 2532 'tbr': tbr,
545cc85d 2533 'url': fmt_url,
2a9c6dcd 2534 'width': int_or_none(fmt.get('width')),
0fb983f6 2535 'language': audio_track.get('id', '').split('.')[0],
26e8e044 2536 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
545cc85d 2537 }
60bdb7bd 2538 mime_mobj = re.match(
2539 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2540 if mime_mobj:
2541 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2542 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 2543 no_audio = dct.get('acodec') == 'none'
2544 no_video = dct.get('vcodec') == 'none'
2545 if no_audio:
2546 dct['vbr'] = tbr
2547 if no_video:
2548 dct['abr'] = tbr
2549 if no_audio or no_video:
545cc85d 2550 dct['downloader_options'] = {
2551 # Youtube throttles chunks >~10M
2552 'http_chunk_size': 10485760,
bf1317d2 2553 }
7c60c33e 2554 if dct.get('ext'):
2555 dct['container'] = dct['ext'] + '_dash'
11f9be09 2556 yield dct
545cc85d 2557
4bb6b02f 2558 skip_manifests = self._configuration_arg('skip')
57015a4a 2559 get_dash = (
2560 (not is_live or self._configuration_arg('include_live_dash'))
2561 and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
5d3a0e79 2562 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2563
a0bb6ce5 2564 def process_manifest_format(f, proto, itag):
2565 if itag in itags:
2566 if itags[itag] == proto or f'{itag}-{proto}' in itags:
2567 return False
2568 itag = f'{itag}-{proto}'
2569 if itag:
2570 f['format_id'] = itag
2571 itags[itag] = proto
2572
2573 f['quality'] = next((
2574 q(qdict[val])
e339d25a 2575 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
a0bb6ce5 2576 if val in qdict), -1)
2577 return True
2a9c6dcd 2578
11f9be09 2579 for sd in streaming_data:
5d3a0e79 2580 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2581 if hls_manifest_url:
2a9c6dcd 2582 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
a0bb6ce5 2583 if process_manifest_format(f, 'hls', self._search_regex(
2584 r'/itag/(\d+)', f['url'], 'itag', default=None)):
2585 yield f
545cc85d 2586
5d3a0e79 2587 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2588 if dash_manifest_url:
2a9c6dcd 2589 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
a0bb6ce5 2590 if process_manifest_format(f, 'dash', f['format_id']):
2591 f['filesize'] = int_or_none(self._search_regex(
2592 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
2593 yield f
11f9be09 2594
720c3099 2595 def _extract_storyboard(self, player_responses, duration):
2596 spec = get_first(
2597 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
2598 if not spec:
2599 return
2600 base_url = spec.pop()
2601 L = len(spec) - 1
2602 for i, args in enumerate(spec):
2603 args = args.split('#')
2604 counts = list(map(int_or_none, args[:5]))
2605 if len(args) != 8 or not all(counts):
2606 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
2607 continue
2608 width, height, frame_count, cols, rows = counts
2609 N, sigh = args[6:]
2610
2611 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
2612 fragment_count = frame_count / (cols * rows)
2613 fragment_duration = duration / fragment_count
2614 yield {
2615 'format_id': f'sb{i}',
2616 'format_note': 'storyboard',
2617 'ext': 'mhtml',
2618 'protocol': 'mhtml',
2619 'acodec': 'none',
2620 'vcodec': 'none',
2621 'url': url,
2622 'width': width,
2623 'height': height,
2624 'fragments': [{
2625 'path': url.replace('$M', str(j)),
2626 'duration': min(fragment_duration, duration - (j * fragment_duration)),
2627 } for j in range(math.ceil(fragment_count))],
2628 }
2629
11f9be09 2630 def _real_extract(self, url):
2631 url, smuggled_data = unsmuggle_url(url, {})
2632 video_id = self._match_id(url)
2633
2634 base_url = self.http_scheme() + '//www.youtube.com/'
2635 webpage_url = base_url + 'watch?v=' + video_id
b6de707d 2636 webpage = None
2637 if 'webpage' not in self._configuration_arg('player_skip'):
2638 webpage = self._download_webpage(
2639 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
11f9be09 2640
2641 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 2642
b6de707d 2643 player_responses, player_url = self._extract_player_responses(
11f9be09 2644 self._get_requested_clients(url, smuggled_data),
99e9e001 2645 video_id, webpage, master_ytcfg)
11f9be09 2646
11f9be09 2647 playability_statuses = traverse_obj(
2648 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2649
2650 trailer_video_id = get_first(
2651 playability_statuses,
2652 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2653 expected_type=str)
2654 if trailer_video_id:
2655 return self.url_result(
2656 trailer_video_id, self.ie_key(), trailer_video_id)
2657
2658 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2659 if webpage else (lambda x: None))
2660
2661 video_details = traverse_obj(
2662 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2663 microformats = traverse_obj(
2664 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2665 expected_type=dict, default=[])
2666 video_title = (
2667 get_first(video_details, 'title')
2668 or self._get_text(microformats, (..., 'title'))
2669 or search_meta(['og:title', 'twitter:title', 'title']))
2670 video_description = get_first(video_details, 'shortDescription')
2671
d89257f3 2672 multifeed_metadata_list = get_first(
2673 player_responses,
2674 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2675 expected_type=str)
2676 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
2677 if self.get_param('noplaylist'):
11f9be09 2678 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
d89257f3 2679 else:
2680 entries = []
2681 feed_ids = []
2682 for feed in multifeed_metadata_list.split(','):
2683 # Unquote should take place before split on comma (,) since textual
2684 # fields may contain comma as well (see
2685 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2686 feed_data = compat_parse_qs(
2687 compat_urllib_parse_unquote_plus(feed))
2688
2689 def feed_entry(name):
2690 return try_get(
2691 feed_data, lambda x: x[name][0], compat_str)
2692
2693 feed_id = feed_entry('id')
2694 if not feed_id:
2695 continue
2696 feed_title = feed_entry('title')
2697 title = video_title
2698 if feed_title:
2699 title += ' (%s)' % feed_title
2700 entries.append({
2701 '_type': 'url_transparent',
2702 'ie_key': 'Youtube',
2703 'url': smuggle_url(
2704 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2705 {'force_singlefeed': True}),
2706 'title': title,
2707 })
2708 feed_ids.append(feed_id)
2709 self.to_screen(
2710 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2711 % (', '.join(feed_ids), video_id))
2712 return self.playlist_result(
2713 entries, video_id, video_title, video_description)
11f9be09 2714
7ea65411 2715 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
11f9be09 2716 is_live = get_first(video_details, 'isLive')
7ea65411 2717 if is_live is None:
2718 is_live = get_first(live_broadcast_details, 'isLiveNow')
11f9be09 2719
2720 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2721 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
bf1317d2 2722
545cc85d 2723 if not formats:
11f9be09 2724 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 2725 self.report_drm(video_id)
11f9be09 2726 pemr = get_first(
2727 playability_statuses,
2728 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2729 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2730 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 2731 if subreason:
545cc85d 2732 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 2733 countries = get_first(microformats, 'availableCountries')
545cc85d 2734 if not countries:
2735 regions_allowed = search_meta('regionsAllowed')
2736 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2737 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 2738 reason += f'. {subreason}'
545cc85d 2739 if reason:
b7da73eb 2740 self.raise_no_formats(reason, expected=True)
bf1317d2 2741
11f9be09 2742 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 2743 if not keywords and webpage:
2744 keywords = [
2745 unescapeHTML(m.group('content'))
2746 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2747 for keyword in keywords:
2748 if keyword.startswith('yt:stretch='):
201c1459 2749 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2750 if mobj:
2751 # NB: float is intentional for forcing float division
2752 w, h = (float(v) for v in mobj.groups())
2753 if w > 0 and h > 0:
2754 ratio = w / h
2755 for f in formats:
2756 if f.get('vcodec') != 'none':
2757 f['stretched_ratio'] = ratio
2758 break
6449cd80 2759
545cc85d 2760 thumbnails = []
11f9be09 2761 thumbnail_dicts = traverse_obj(
2762 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2763 expected_type=dict, default=[])
2764 for thumbnail in thumbnail_dicts:
2765 thumbnail_url = thumbnail.get('url')
2766 if not thumbnail_url:
2767 continue
2768 # Sometimes youtube gives a wrong thumbnail URL. See:
2769 # https://github.com/yt-dlp/yt-dlp/issues/233
2770 # https://github.com/ytdl-org/youtube-dl/issues/28023
2771 if 'maxresdefault' in thumbnail_url:
2772 thumbnail_url = thumbnail_url.split('?')[0]
2773 thumbnails.append({
2774 'url': thumbnail_url,
2775 'height': int_or_none(thumbnail.get('height')),
2776 'width': int_or_none(thumbnail.get('width')),
2777 })
ff2751ac 2778 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2779 if thumbnail_url:
2780 thumbnails.append({
2781 'url': thumbnail_url,
ff2751ac 2782 })
fccf5021 2783 original_thumbnails = thumbnails.copy()
2784
0ba692ac 2785 # The best resolution thumbnails sometimes does not appear in the webpage
2786 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 2787 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
e820fbaa 2788 thumbnail_names = [
2789 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
cca80fe6 2790 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2791 'mqdefault', 'mq1', 'mq2', 'mq3',
2792 'default', '1', '2', '3'
2793 ]
cca80fe6 2794 n_thumbnail_names = len(thumbnail_names)
0ba692ac 2795 thumbnails.extend({
2796 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2797 video_id=video_id, name=name, ext=ext,
2798 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 2799 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 2800 for thumb in thumbnails:
cca80fe6 2801 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 2802 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 2803 self._remove_duplicate_formats(thumbnails)
fccf5021 2804 self._downloader._sort_thumbnails(original_thumbnails)
545cc85d 2805
7ea65411 2806 category = get_first(microformats, 'category') or search_meta('genre')
2807 channel_id = str_or_none(
2808 get_first(video_details, 'channelId')
2809 or get_first(microformats, 'externalChannelId')
2810 or search_meta('channelId'))
2811 duration = int_or_none(
2812 get_first(video_details, 'lengthSeconds')
2813 or get_first(microformats, 'lengthSeconds')
2814 or parse_duration(search_meta('duration'))) or None
2815 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2816
2817 live_content = get_first(video_details, 'isLiveContent')
2818 is_upcoming = get_first(video_details, 'isUpcoming')
2819 if is_live is None:
2820 if is_upcoming or live_content is False:
2821 is_live = False
2822 if is_upcoming is None and (live_content or is_live):
2823 is_upcoming = False
2824 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2825 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2826 if not duration and live_endtime and live_starttime:
2827 duration = live_endtime - live_starttime
2828
720c3099 2829 formats.extend(self._extract_storyboard(player_responses, duration))
2830
2831 # Source is given priority since formats that throttle are given lower source_preference
2832 # When throttling issue is fully fixed, remove this
2833 self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
2834
545cc85d 2835 info = {
2836 'id': video_id,
2837 'title': self._live_title(video_title) if is_live else video_title,
2838 'formats': formats,
2839 'thumbnails': thumbnails,
fccf5021 2840 # The best thumbnail that we are sure exists. Prevents unnecessary
2841 # URL checking if user don't care about getting the best possible thumbnail
2842 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
545cc85d 2843 'description': video_description,
2844 'upload_date': unified_strdate(
11f9be09 2845 get_first(microformats, 'uploadDate')
545cc85d 2846 or search_meta('uploadDate')),
11f9be09 2847 'uploader': get_first(video_details, 'author'),
545cc85d 2848 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2849 'uploader_url': owner_profile_url,
2850 'channel_id': channel_id,
11f9be09 2851 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
545cc85d 2852 'duration': duration,
2853 'view_count': int_or_none(
11f9be09 2854 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 2855 or search_meta('interactionCount')),
11f9be09 2856 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 2857 'age_limit': 18 if (
11f9be09 2858 get_first(microformats, 'isFamilySafe') is False
545cc85d 2859 or search_meta('isFamilyFriendly') == 'false'
2860 or search_meta('og:restrictions:age') == '18+') else 0,
2861 'webpage_url': webpage_url,
2862 'categories': [category] if category else None,
2863 'tags': keywords,
11f9be09 2864 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 2865 'is_live': is_live,
2866 'was_live': (False if is_live or is_upcoming or live_content is False
2867 else None if is_live is None or is_upcoming is None
2868 else live_content),
2869 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2870 'release_timestamp': live_starttime,
545cc85d 2871 }
b477fc13 2872
3944e7af 2873 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 2874 if pctr:
ecdc9049 2875 def get_lang_code(track):
2876 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
2877 or track.get('languageCode'))
2878
2879 # Converted into dicts to remove duplicates
2880 captions = {
2881 get_lang_code(sub): sub
2882 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2883 translation_languages = {
2884 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
2885 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2886
774d79cc 2887 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2888 lang_subs = container.setdefault(lang_code, [])
545cc85d 2889 for fmt in self._SUBTITLE_FORMATS:
2890 query.update({
2891 'fmt': fmt,
2892 })
2893 lang_subs.append({
2894 'ext': fmt,
2895 'url': update_url_query(base_url, query),
774d79cc 2896 'name': sub_name,
545cc85d 2897 })
7e72694b 2898
ecdc9049 2899 subtitles, automatic_captions = {}, {}
2900 for lang_code, caption_track in captions.items():
2901 base_url = caption_track.get('baseUrl')
545cc85d 2902 if not base_url:
2903 continue
ecdc9049 2904 lang_name = self._get_text(caption_track, 'name', max_runs=1)
545cc85d 2905 if caption_track.get('kind') != 'asr':
545cc85d 2906 if not lang_code:
2907 continue
2908 process_language(
ecdc9049 2909 subtitles, base_url, lang_code, lang_name, {})
2910 if not caption_track.get('isTranslatable'):
2911 continue
3944e7af 2912 for trans_code, trans_name in translation_languages.items():
2913 if not trans_code:
545cc85d 2914 continue
ecdc9049 2915 if caption_track.get('kind') != 'asr':
2916 trans_code += f'-{lang_code}'
2917 trans_name += format_field(lang_name, template=' from %s')
545cc85d 2918 process_language(
ecdc9049 2919 automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
2920 info['automatic_captions'] = automatic_captions
2921 info['subtitles'] = subtitles
7e72694b 2922
545cc85d 2923 parsed_url = compat_urllib_parse_urlparse(url)
2924 for component in [parsed_url.fragment, parsed_url.query]:
2925 query = compat_parse_qs(component)
2926 for k, v in query.items():
2927 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2928 d_k += '_time'
2929 if d_k not in info and k in s_ks:
2930 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2931
2932 # Youtube Music Auto-generated description
822b9d9c 2933 if video_description:
38d70284 2934 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2935 if mobj:
822b9d9c
RA
2936 release_year = mobj.group('release_year')
2937 release_date = mobj.group('release_date')
2938 if release_date:
2939 release_date = release_date.replace('-', '')
2940 if not release_year:
545cc85d 2941 release_year = release_date[:4]
2942 info.update({
2943 'album': mobj.group('album'.strip()),
2944 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2945 'track': mobj.group('track').strip(),
2946 'release_date': release_date,
cc2db878 2947 'release_year': int_or_none(release_year),
545cc85d 2948 })
7e72694b 2949
545cc85d 2950 initial_data = None
2951 if webpage:
2952 initial_data = self._extract_yt_initial_variable(
2953 webpage, self._YT_INITIAL_DATA_RE, video_id,
2954 'yt initial data')
2955 if not initial_data:
99e9e001 2956 query = {'videoId': video_id}
2957 query.update(self._get_checkok_params())
109dd3b2 2958 initial_data = self._extract_response(
2959 item_id=video_id, ep='next', fatal=False,
99e9e001 2960 ytcfg=master_ytcfg, query=query,
2961 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 2962 note='Downloading initial data API JSON')
545cc85d 2963
c60ee3a2 2964 try:
2965 # This will error if there is no livechat
2966 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
ecdc9049 2967 info.setdefault('subtitles', {})['live_chat'] = [{
c60ee3a2 2968 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2969 'video_id': video_id,
2970 'ext': 'json',
f6745c49 2971 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 2972 }]
2973 except (KeyError, IndexError, TypeError):
2974 pass
545cc85d 2975
2976 if initial_data:
7c365c21 2977 info['chapters'] = (
2978 self._extract_chapters_from_json(initial_data, duration)
2979 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2980 or None)
545cc85d 2981
2982 contents = try_get(
2983 initial_data,
2984 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2985 list) or []
2986 for content in contents:
2987 vpir = content.get('videoPrimaryInfoRenderer')
2988 if vpir:
2989 stl = vpir.get('superTitleLink')
2990 if stl:
fe93e2c4 2991 stl = self._get_text(stl)
545cc85d 2992 if try_get(
2993 vpir,
2994 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2995 info['location'] = stl
2996 else:
2997 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2998 if mobj:
2999 info.update({
3000 'series': mobj.group(1),
3001 'season_number': int(mobj.group(2)),
3002 'episode_number': int(mobj.group(3)),
3003 })
3004 for tlb in (try_get(
3005 vpir,
3006 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3007 list) or []):
3008 tbr = tlb.get('toggleButtonRenderer') or {}
3009 for getter, regex in [(
3010 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3011 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3012 lambda x: x['accessibility'],
3013 lambda x: x['accessibilityData']['accessibilityData'],
3014 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3015 label = (try_get(tbr, getter, dict) or {}).get('label')
3016 if label:
3017 mobj = re.match(regex, label)
3018 if mobj:
3019 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3020 break
3021 sbr_tooltip = try_get(
3022 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3023 if sbr_tooltip:
3024 like_count, dislike_count = sbr_tooltip.split(' / ')
3025 info.update({
3026 'like_count': str_to_int(like_count),
3027 'dislike_count': str_to_int(dislike_count),
3028 })
3029 vsir = content.get('videoSecondaryInfoRenderer')
3030 if vsir:
052e1350 3031 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
545cc85d 3032 rows = try_get(
3033 vsir,
3034 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3035 list) or []
3036 multiple_songs = False
3037 for row in rows:
3038 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3039 multiple_songs = True
3040 break
3041 for row in rows:
3042 mrr = row.get('metadataRowRenderer') or {}
3043 mrr_title = mrr.get('title')
3044 if not mrr_title:
3045 continue
052e1350 3046 mrr_title = self._get_text(mrr, 'title')
3047 mrr_contents_text = self._get_text(mrr, ('contents', 0))
545cc85d 3048 if mrr_title == 'License':
3049 info['license'] = mrr_contents_text
3050 elif not multiple_songs:
3051 if mrr_title == 'Album':
3052 info['album'] = mrr_contents_text
3053 elif mrr_title == 'Artist':
3054 info['artist'] = mrr_contents_text
3055 elif mrr_title == 'Song':
3056 info['track'] = mrr_contents_text
3057
3058 fallbacks = {
3059 'channel': 'uploader',
3060 'channel_id': 'uploader_id',
3061 'channel_url': 'uploader_url',
3062 }
3063 for to, frm in fallbacks.items():
3064 if not info.get(to):
3065 info[to] = info.get(frm)
3066
3067 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3068 v = info.get(s_k)
3069 if v:
3070 info[d_k] = v
b84071c0 3071
11f9be09 3072 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3073 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3074 is_membersonly = None
b28f8d24 3075 is_premium = None
c224251a
M
3076 if initial_data and is_private is not None:
3077 is_membersonly = False
b28f8d24 3078 is_premium = False
47193e02 3079 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3080 badge_labels = set()
3081 for content in contents:
3082 if not isinstance(content, dict):
3083 continue
3084 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3085 for badge_label in badge_labels:
3086 if badge_label.lower() == 'members only':
3087 is_membersonly = True
3088 elif badge_label.lower() == 'premium':
3089 is_premium = True
3090 elif badge_label.lower() == 'unlisted':
3091 is_unlisted = True
c224251a 3092
c224251a
M
3093 info['availability'] = self._availability(
3094 is_private=is_private,
b28f8d24 3095 needs_premium=is_premium,
c224251a
M
3096 needs_subscription=is_membersonly,
3097 needs_auth=info['age_limit'] >= 18,
3098 is_unlisted=None if is_private is None else is_unlisted)
3099
a2160aa4 3100 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3101
11f9be09 3102 self.mark_watched(video_id, player_responses)
d77ab8e2 3103
545cc85d 3104 return info
c5e8d7af 3105
5f6a1245 3106
8bdd16b4 3107class YoutubeTabIE(YoutubeBaseInfoExtractor):
96565c7e 3108 IE_DESC = 'YouTube Tabs'
70d5c17b 3109 _VALID_URL = r'''(?x)
3110 https?://
3111 (?:\w+\.)?
3112 (?:
3113 youtube(?:kids)?\.com|
d9190e44 3114 %(invidious)s
70d5c17b 3115 )/
3116 (?:
fe03a6cd 3117 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 3118 (?P<not_channel>
9ba5705a 3119 feed/|hashtag/|
70d5c17b 3120 (?:playlist|watch)\?.*?\blist=
3121 )|
d9190e44 3122 (?!(?:%(reserved_names)s)\b) # Direct URLs
70d5c17b 3123 )
3124 (?P<id>[^/?\#&]+)
d9190e44
RH
3125 ''' % {
3126 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
3127 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
3128 }
8bdd16b4 3129 IE_NAME = 'youtube:tab'
3130
81127aa5 3131 _TESTS = [{
da692b79 3132 'note': 'playlists, multipage',
8bdd16b4 3133 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3134 'playlist_mincount': 94,
3135 'info_dict': {
3136 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3137 'title': 'Игорь Клейнер - Playlists',
3138 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3139 'uploader': 'Игорь Клейнер',
3140 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 3141 },
3142 }, {
da692b79 3143 'note': 'playlists, multipage, different order',
8bdd16b4 3144 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3145 'playlist_mincount': 94,
3146 'info_dict': {
3147 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3148 'title': 'Игорь Клейнер - Playlists',
3149 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3150 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3151 'uploader': 'Игорь Клейнер',
8bdd16b4 3152 },
201c1459 3153 }, {
da692b79 3154 'note': 'playlists, series',
201c1459 3155 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3156 'playlist_mincount': 5,
3157 'info_dict': {
3158 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3159 'title': '3Blue1Brown - Playlists',
3160 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 3161 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3162 'uploader': '3Blue1Brown',
201c1459 3163 },
8bdd16b4 3164 }, {
da692b79 3165 'note': 'playlists, singlepage',
8bdd16b4 3166 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3167 'playlist_mincount': 4,
3168 'info_dict': {
3169 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3170 'title': 'ThirstForScience - Playlists',
3171 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 3172 'uploader': 'ThirstForScience',
3173 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 3174 }
3175 }, {
3176 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3177 'only_matching': True,
3178 }, {
da692b79 3179 'note': 'basic, single video playlist',
0e30a7b9 3180 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3181 'info_dict': {
0e30a7b9 3182 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3183 'uploader': 'Sergey M.',
3184 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3185 'title': 'youtube-dl public playlist',
81127aa5 3186 },
0e30a7b9 3187 'playlist_count': 1,
9291475f 3188 }, {
da692b79 3189 'note': 'empty playlist',
0e30a7b9 3190 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3191 'info_dict': {
0e30a7b9 3192 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3193 'uploader': 'Sergey M.',
3194 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3195 'title': 'youtube-dl empty playlist',
9291475f
PH
3196 },
3197 'playlist_count': 0,
3198 }, {
da692b79 3199 'note': 'Home tab',
8bdd16b4 3200 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3201 'info_dict': {
8bdd16b4 3202 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3203 'title': 'lex will - Home',
3204 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3205 'uploader': 'lex will',
3206 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3207 },
8bdd16b4 3208 'playlist_mincount': 2,
9291475f 3209 }, {
da692b79 3210 'note': 'Videos tab',
8bdd16b4 3211 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3212 'info_dict': {
8bdd16b4 3213 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3214 'title': 'lex will - Videos',
3215 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3216 'uploader': 'lex will',
3217 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3218 },
8bdd16b4 3219 'playlist_mincount': 975,
9291475f 3220 }, {
da692b79 3221 'note': 'Videos tab, sorted by popular',
8bdd16b4 3222 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3223 'info_dict': {
8bdd16b4 3224 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3225 'title': 'lex will - Videos',
3226 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3227 'uploader': 'lex will',
3228 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3229 },
8bdd16b4 3230 'playlist_mincount': 199,
9291475f 3231 }, {
da692b79 3232 'note': 'Playlists tab',
8bdd16b4 3233 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3234 'info_dict': {
8bdd16b4 3235 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3236 'title': 'lex will - Playlists',
3237 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3238 'uploader': 'lex will',
3239 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3240 },
8bdd16b4 3241 'playlist_mincount': 17,
ac7553d0 3242 }, {
da692b79 3243 'note': 'Community tab',
8bdd16b4 3244 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3245 'info_dict': {
8bdd16b4 3246 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3247 'title': 'lex will - Community',
3248 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3249 'uploader': 'lex will',
3250 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3251 },
3252 'playlist_mincount': 18,
87dadd45 3253 }, {
da692b79 3254 'note': 'Channels tab',
8bdd16b4 3255 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3256 'info_dict': {
8bdd16b4 3257 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3258 'title': 'lex will - Channels',
3259 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3260 'uploader': 'lex will',
3261 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3262 },
deaec5af 3263 'playlist_mincount': 12,
cd684175 3264 }, {
3265 'note': 'Search tab',
3266 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3267 'playlist_mincount': 40,
3268 'info_dict': {
3269 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3270 'title': '3Blue1Brown - Search - linear algebra',
3271 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3272 'uploader': '3Blue1Brown',
3273 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3274 },
6b08cdf6 3275 }, {
a0566bbf 3276 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3277 'only_matching': True,
3278 }, {
a0566bbf 3279 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3280 'only_matching': True,
3281 }, {
a0566bbf 3282 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3283 'only_matching': True,
3284 }, {
3285 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3286 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3287 'info_dict': {
3288 'title': '29C3: Not my department',
3289 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3290 'uploader': 'Christiaan008',
3291 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3292 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3293 },
3294 'playlist_count': 96,
3295 }, {
3296 'note': 'Large playlist',
3297 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3298 'info_dict': {
8bdd16b4 3299 'title': 'Uploads from Cauchemar',
3300 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3301 'uploader': 'Cauchemar',
3302 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3303 },
8bdd16b4 3304 'playlist_mincount': 1123,
3305 }, {
da692b79 3306 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3307 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3308 'only_matching': True,
4b7df0d3
JMF
3309 }, {
3310 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3311 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3312 'info_dict': {
acf757f4
PH
3313 'title': 'Uploads from Interstellar Movie',
3314 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3315 'uploader': 'Interstellar Movie',
8bdd16b4 3316 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3317 },
481cc733 3318 'playlist_mincount': 21,
358de58c 3319 }, {
3320 'note': 'Playlist with "show unavailable videos" button',
3321 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3322 'info_dict': {
3323 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3324 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3325 'uploader': 'Phim Siêu Nhân Nhật Bản',
3326 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3327 },
da692b79 3328 'playlist_mincount': 200,
5d342002 3329 }, {
da692b79 3330 'note': 'Playlist with unavailable videos in page 7',
5d342002 3331 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3332 'info_dict': {
3333 'title': 'Uploads from BlankTV',
3334 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3335 'uploader': 'BlankTV',
3336 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3337 },
da692b79 3338 'playlist_mincount': 1000,
8bdd16b4 3339 }, {
da692b79 3340 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3341 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3342 'info_dict': {
3343 'title': 'Data Analysis with Dr Mike Pound',
3344 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3345 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3346 'uploader': 'Computerphile',
deaec5af 3347 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3348 },
3349 'playlist_mincount': 11,
3350 }, {
a0566bbf 3351 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3352 'only_matching': True,
dacb3a86 3353 }, {
da692b79 3354 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3355 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3356 'info_dict': {
3357 'id': 'FqZTN594JQw',
3358 'ext': 'webm',
3359 'title': "Smiley's People 01 detective, Adventure Series, Action",
3360 'uploader': 'STREEM',
3361 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3362 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3363 'upload_date': '20150526',
3364 'license': 'Standard YouTube License',
3365 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3366 'categories': ['People & Blogs'],
3367 'tags': list,
dbdaaa23 3368 'view_count': int,
dacb3a86
S
3369 'like_count': int,
3370 'dislike_count': int,
3371 },
3372 'params': {
3373 'skip_download': True,
3374 },
13a75688 3375 'skip': 'This video is not available.',
dacb3a86 3376 'add_ie': [YoutubeIE.ie_key()],
481cc733 3377 }, {
8bdd16b4 3378 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3379 'only_matching': True,
66b48727 3380 }, {
8bdd16b4 3381 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3382 'only_matching': True,
a0566bbf 3383 }, {
3384 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3385 'info_dict': {
57015a4a 3386 'id': '3yImotZU3tw', # This will keep changing
a0566bbf 3387 'ext': 'mp4',
deaec5af 3388 'title': compat_str,
a0566bbf 3389 'uploader': 'Sky News',
3390 'uploader_id': 'skynews',
3391 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3392 'upload_date': r're:\d{8}',
3393 'description': compat_str,
a0566bbf 3394 'categories': ['News & Politics'],
3395 'tags': list,
3396 'like_count': int,
3397 'dislike_count': int,
3398 },
3399 'params': {
3400 'skip_download': True,
3401 },
da692b79 3402 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3403 }, {
3404 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3405 'info_dict': {
3406 'id': 'a48o2S1cPoo',
3407 'ext': 'mp4',
3408 'title': 'The Young Turks - Live Main Show',
3409 'uploader': 'The Young Turks',
3410 'uploader_id': 'TheYoungTurks',
3411 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3412 'upload_date': '20150715',
3413 'license': 'Standard YouTube License',
3414 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3415 'categories': ['News & Politics'],
3416 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3417 'like_count': int,
3418 'dislike_count': int,
3419 },
3420 'params': {
3421 'skip_download': True,
3422 },
3423 'only_matching': True,
3424 }, {
3425 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3426 'only_matching': True,
3427 }, {
3428 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3429 'only_matching': True,
09f1580e 3430 }, {
3431 'note': 'A channel that is not live. Should raise error',
3432 'url': 'https://www.youtube.com/user/numberphile/live',
3433 'only_matching': True,
3d3dddc9 3434 }, {
3435 'url': 'https://www.youtube.com/feed/trending',
3436 'only_matching': True,
3437 }, {
3d3dddc9 3438 'url': 'https://www.youtube.com/feed/library',
3439 'only_matching': True,
3440 }, {
3d3dddc9 3441 'url': 'https://www.youtube.com/feed/history',
3442 'only_matching': True,
3443 }, {
3d3dddc9 3444 'url': 'https://www.youtube.com/feed/subscriptions',
3445 'only_matching': True,
3446 }, {
3d3dddc9 3447 'url': 'https://www.youtube.com/feed/watch_later',
3448 'only_matching': True,
3449 }, {
ac56cf38 3450 'note': 'Recommended - redirects to home page.',
3d3dddc9 3451 'url': 'https://www.youtube.com/feed/recommended',
3452 'only_matching': True,
29f7c58a 3453 }, {
da692b79 3454 'note': 'inline playlist with not always working continuations',
29f7c58a 3455 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3456 'only_matching': True,
3457 }, {
3458 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3459 'only_matching': True,
3460 }, {
3461 'url': 'https://www.youtube.com/course',
3462 'only_matching': True,
3463 }, {
3464 'url': 'https://www.youtube.com/zsecurity',
3465 'only_matching': True,
3466 }, {
3467 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3468 'only_matching': True,
3469 }, {
3470 'url': 'https://www.youtube.com/TheYoungTurks/live',
3471 'only_matching': True,
39ed931e 3472 }, {
3473 'url': 'https://www.youtube.com/hashtag/cctv9',
3474 'info_dict': {
3475 'id': 'cctv9',
3476 'title': '#cctv9',
3477 },
3478 'playlist_mincount': 350,
201c1459 3479 }, {
3480 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3481 'only_matching': True,
9297939e 3482 }, {
da692b79 3483 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3484 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3485 'only_matching': True
fe03a6cd 3486 }, {
3487 'note': '/browse/ should redirect to /channel/',
3488 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3489 'only_matching': True
3490 }, {
3491 'note': 'VLPL, should redirect to playlist?list=PL...',
3492 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3493 'info_dict': {
3494 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3495 'uploader': 'NoCopyrightSounds',
3496 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3497 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3498 'title': 'NCS Releases',
3499 },
3500 'playlist_mincount': 166,
18db7548 3501 }, {
3502 'note': 'Topic, should redirect to playlist?list=UU...',
3503 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3504 'info_dict': {
3505 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3506 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3507 'title': 'Uploads from Royalty Free Music - Topic',
3508 'uploader': 'Royalty Free Music - Topic',
3509 },
3510 'expected_warnings': [
3511 'A channel/user page was given',
3512 'The URL does not have a videos tab',
3513 ],
3514 'playlist_mincount': 101,
3515 }, {
3516 'note': 'Topic without a UU playlist',
3517 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3518 'info_dict': {
3519 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3520 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3521 },
3522 'expected_warnings': [
3523 'A channel/user page was given',
3524 'The URL does not have a videos tab',
3525 'Falling back to channel URL',
3526 ],
3527 'playlist_mincount': 9,
abcdd12b 3528 }, {
3529 'note': 'Youtube music Album',
3530 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3531 'info_dict': {
3532 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3533 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3534 },
3535 'playlist_count': 50,
47193e02 3536 }, {
3537 'note': 'unlisted single video playlist',
3538 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3539 'info_dict': {
3540 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3541 'uploader': 'colethedj',
3542 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3543 'title': 'yt-dlp unlisted playlist test',
3544 'availability': 'unlisted'
3545 },
3546 'playlist_count': 1,
ac56cf38 3547 }, {
3548 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
3549 'url': 'https://www.youtube.com/feed/recommended',
3550 'info_dict': {
3551 'id': 'recommended',
3552 'title': 'recommended',
3553 },
3554 'playlist_mincount': 50,
3555 'params': {
3556 'skip_download': True,
3557 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
3558 },
3559 }, {
3560 'note': 'API Fallback: /videos tab, sorted by oldest first',
3561 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
3562 'info_dict': {
3563 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
3564 'title': 'Cody\'sLab - Videos',
3565 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
3566 'uploader': 'Cody\'sLab',
3567 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
3568 },
3569 'playlist_mincount': 650,
3570 'params': {
3571 'skip_download': True,
3572 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
3573 },
3574 }, {
3575 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
3576 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3577 'info_dict': {
3578 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3579 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3580 'title': 'Uploads from Royalty Free Music - Topic',
3581 'uploader': 'Royalty Free Music - Topic',
3582 },
3583 'expected_warnings': [
3584 'A channel/user page was given',
3585 'The URL does not have a videos tab',
3586 ],
3587 'playlist_mincount': 101,
3588 'params': {
3589 'skip_download': True,
3590 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
3591 },
29f7c58a 3592 }]
3593
3594 @classmethod
3595 def suitable(cls, url):
3596 return False if YoutubeIE.suitable(url) else super(
3597 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3598
3599 def _extract_channel_id(self, webpage):
3600 channel_id = self._html_search_meta(
3601 'channelId', webpage, 'channel id', default=None)
3602 if channel_id:
3603 return channel_id
3604 channel_url = self._html_search_meta(
3605 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3606 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3607 'twitter:app:url:googleplay'), webpage, 'channel url')
3608 return self._search_regex(
3609 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3610 channel_url, 'channel id')
15f6397c 3611
8bdd16b4 3612 @staticmethod
cd7c66cf 3613 def _extract_basic_item_renderer(item):
3614 # Modified from _extract_grid_item_renderer
201c1459 3615 known_basic_renderers = (
3616 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3617 )
3618 for key, renderer in item.items():
201c1459 3619 if not isinstance(renderer, dict):
cd7c66cf 3620 continue
201c1459 3621 elif key in known_basic_renderers:
3622 return renderer
3623 elif key.startswith('grid') and key.endswith('Renderer'):
3624 return renderer
8bdd16b4 3625
8bdd16b4 3626 def _grid_entries(self, grid_renderer):
3627 for item in grid_renderer['items']:
3628 if not isinstance(item, dict):
39b62db1 3629 continue
cd7c66cf 3630 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3631 if not isinstance(renderer, dict):
3632 continue
052e1350 3633 title = self._get_text(renderer, 'title')
fe93e2c4 3634
8bdd16b4 3635 # playlist
3636 playlist_id = renderer.get('playlistId')
3637 if playlist_id:
3638 yield self.url_result(
3639 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3640 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3641 video_title=title)
201c1459 3642 continue
8bdd16b4 3643 # video
3644 video_id = renderer.get('videoId')
3645 if video_id:
3646 yield self._extract_video(renderer)
201c1459 3647 continue
8bdd16b4 3648 # channel
3649 channel_id = renderer.get('channelId')
3650 if channel_id:
8bdd16b4 3651 yield self.url_result(
3652 'https://www.youtube.com/channel/%s' % channel_id,
3653 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3654 continue
3655 # generic endpoint URL support
3656 ep_url = urljoin('https://www.youtube.com/', try_get(
3657 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3658 compat_str))
3659 if ep_url:
3660 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3661 if ie.suitable(ep_url):
3662 yield self.url_result(
3663 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3664 break
8bdd16b4 3665
3d3dddc9 3666 def _shelf_entries_from_content(self, shelf_renderer):
3667 content = shelf_renderer.get('content')
3668 if not isinstance(content, dict):
8bdd16b4 3669 return
cd7c66cf 3670 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3671 if renderer:
3672 # TODO: add support for nested playlists so each shelf is processed
3673 # as separate playlist
3674 # TODO: this includes only first N items
3675 for entry in self._grid_entries(renderer):
3676 yield entry
3677 renderer = content.get('horizontalListRenderer')
3678 if renderer:
3679 # TODO
3680 pass
8bdd16b4 3681
29f7c58a 3682 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3683 ep = try_get(
3684 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3685 compat_str)
3686 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3687 if shelf_url:
29f7c58a 3688 # Skipping links to another channels, note that checking for
3689 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3690 # will not work
3691 if skip_channels and '/channels?' in shelf_url:
3692 return
052e1350 3693 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 3694 yield self.url_result(shelf_url, video_title=title)
3695 # Shelf may not contain shelf URL, fallback to extraction from content
3696 for entry in self._shelf_entries_from_content(shelf_renderer):
3697 yield entry
c5e8d7af 3698
8bdd16b4 3699 def _playlist_entries(self, video_list_renderer):
3700 for content in video_list_renderer['contents']:
3701 if not isinstance(content, dict):
3702 continue
3703 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3704 if not isinstance(renderer, dict):
3705 continue
3706 video_id = renderer.get('videoId')
3707 if not video_id:
3708 continue
3709 yield self._extract_video(renderer)
07aeced6 3710
3462ffa8 3711 def _rich_entries(self, rich_grid_renderer):
3712 renderer = try_get(
70d5c17b 3713 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3714 video_id = renderer.get('videoId')
3715 if not video_id:
3716 return
3717 yield self._extract_video(renderer)
3718
8bdd16b4 3719 def _video_entry(self, video_renderer):
3720 video_id = video_renderer.get('videoId')
3721 if video_id:
3722 return self._extract_video(video_renderer)
dacb3a86 3723
8bdd16b4 3724 def _post_thread_entries(self, post_thread_renderer):
3725 post_renderer = try_get(
3726 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3727 if not post_renderer:
3728 return
3729 # video attachment
3730 video_renderer = try_get(
895b0931 3731 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3732 video_id = video_renderer.get('videoId')
3733 if video_id:
3734 entry = self._extract_video(video_renderer)
8bdd16b4 3735 if entry:
3736 yield entry
895b0931 3737 # playlist attachment
3738 playlist_id = try_get(
3739 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3740 if playlist_id:
3741 yield self.url_result(
e28f1c0a 3742 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3743 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3744 # inline video links
3745 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3746 for run in runs:
3747 if not isinstance(run, dict):
3748 continue
3749 ep_url = try_get(
3750 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3751 if not ep_url:
3752 continue
3753 if not YoutubeIE.suitable(ep_url):
3754 continue
3755 ep_video_id = YoutubeIE._match_id(ep_url)
3756 if video_id == ep_video_id:
3757 continue
895b0931 3758 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3759
8bdd16b4 3760 def _post_thread_continuation_entries(self, post_thread_continuation):
3761 contents = post_thread_continuation.get('contents')
3762 if not isinstance(contents, list):
3763 return
3764 for content in contents:
3765 renderer = content.get('backstagePostThreadRenderer')
3766 if not isinstance(renderer, dict):
3767 continue
3768 for entry in self._post_thread_entries(renderer):
3769 yield entry
07aeced6 3770
39ed931e 3771 r''' # unused
3772 def _rich_grid_entries(self, contents):
3773 for content in contents:
3774 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3775 if video_renderer:
3776 entry = self._video_entry(video_renderer)
3777 if entry:
3778 yield entry
3779 '''
ac56cf38 3780 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
3462ffa8 3781
70d5c17b 3782 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3783 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3784 for content in contents:
3785 if not isinstance(content, dict):
8bdd16b4 3786 continue
70d5c17b 3787 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3788 if not is_renderer:
70d5c17b 3789 renderer = content.get('richItemRenderer')
3462ffa8 3790 if renderer:
3791 for entry in self._rich_entries(renderer):
3792 yield entry
3793 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3794 continue
3462ffa8 3795 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3796 for isr_content in isr_contents:
3797 if not isinstance(isr_content, dict):
3798 continue
69184e41 3799
3800 known_renderers = {
3801 'playlistVideoListRenderer': self._playlist_entries,
3802 'gridRenderer': self._grid_entries,
3803 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3804 'backstagePostThreadRenderer': self._post_thread_entries,
3805 'videoRenderer': lambda x: [self._video_entry(x)],
3806 }
3807 for key, renderer in isr_content.items():
3808 if key not in known_renderers:
3809 continue
3810 for entry in known_renderers[key](renderer):
3811 if entry:
3812 yield entry
3462ffa8 3813 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3814 break
70d5c17b 3815
3462ffa8 3816 if not continuation_list[0]:
3817 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3818
3819 if not continuation_list[0]:
3820 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3821
ac56cf38 3822 continuation_list = [None] # Python 2 does not support nonlocal
29f7c58a 3823 tab_content = try_get(tab, lambda x: x['content'], dict)
3824 if not tab_content:
3825 return
3462ffa8 3826 parent_renderer = (
29f7c58a 3827 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3828 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3829 for entry in extract_entries(parent_renderer):
3830 yield entry
3462ffa8 3831 continuation = continuation_list[0]
d069eca7 3832
8bdd16b4 3833 for page_num in itertools.count(1):
3834 if not continuation:
3835 break
99e9e001 3836 headers = self.generate_api_headers(
3837 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 3838 response = self._extract_response(
3839 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3840 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3841 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3842
3843 if not response:
8bdd16b4 3844 break
ac56cf38 3845 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
3846 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
3847 visitor_data = self._extract_visitor_data(response) or visitor_data
ebf1b291 3848
69184e41 3849 known_continuation_renderers = {
3850 'playlistVideoListContinuation': self._playlist_entries,
3851 'gridContinuation': self._grid_entries,
3852 'itemSectionContinuation': self._post_thread_continuation_entries,
3853 'sectionListContinuation': extract_entries, # for feeds
3854 }
8bdd16b4 3855 continuation_contents = try_get(
69184e41 3856 response, lambda x: x['continuationContents'], dict) or {}
3857 continuation_renderer = None
3858 for key, value in continuation_contents.items():
3859 if key not in known_continuation_renderers:
3462ffa8 3860 continue
69184e41 3861 continuation_renderer = value
3862 continuation_list = [None]
3863 for entry in known_continuation_renderers[key](continuation_renderer):
3864 yield entry
3865 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3866 break
3867 if continuation_renderer:
3868 continue
c5e8d7af 3869
a1b535bd 3870 known_renderers = {
3871 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3872 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 3873 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 3874 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3875 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3876 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3877 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3878 }
cce889b9 3879 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3880 continuation_items = try_get(
cce889b9 3881 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3882 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3883 video_items_renderer = None
3884 for key, value in continuation_item.items():
3885 if key not in known_renderers:
8bdd16b4 3886 continue
a1b535bd 3887 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3888 continuation_list = [None]
a1b535bd 3889 for entry in known_renderers[key][0](video_items_renderer):
3890 yield entry
9ba5705a 3891 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3892 break
3893 if video_items_renderer:
3894 continue
8bdd16b4 3895 break
9558dcec 3896
8bdd16b4 3897 @staticmethod
3898 def _extract_selected_tab(tabs):
3899 for tab in tabs:
cd684175 3900 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3901 if renderer.get('selected') is True:
3902 return renderer
2b3c2546 3903 else:
8bdd16b4 3904 raise ExtractorError('Unable to find selected tab')
b82f815f 3905
47193e02 3906 @classmethod
3907 def _extract_uploader(cls, data):
8bdd16b4 3908 uploader = {}
47193e02 3909 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3910 owner = try_get(
3911 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3912 if owner:
3913 uploader['uploader'] = owner.get('text')
3914 uploader['uploader_id'] = try_get(
3915 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3916 uploader['uploader_url'] = urljoin(
3917 'https://www.youtube.com/',
3918 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3919 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3920
ac56cf38 3921 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
b60419c5 3922 playlist_id = title = description = channel_url = channel_name = channel_id = None
ac56cf38 3923 thumbnails_list = []
3924 tags = []
b60419c5 3925
8bdd16b4 3926 selected_tab = self._extract_selected_tab(tabs)
3927 renderer = try_get(
3928 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3929 if renderer:
b60419c5 3930 channel_name = renderer.get('title')
3931 channel_url = renderer.get('channelUrl')
3932 channel_id = renderer.get('externalId')
39ed931e 3933 else:
64c0d954 3934 renderer = try_get(
3935 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3936
8bdd16b4 3937 if renderer:
3938 title = renderer.get('title')
ecc97af3 3939 description = renderer.get('description', '')
b60419c5 3940 playlist_id = channel_id
3941 tags = renderer.get('keywords', '').split()
3942 thumbnails_list = (
3943 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3944 or try_get(
47193e02 3945 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3946 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
ff84930c 3947 list)
b60419c5 3948 or [])
3949
3950 thumbnails = []
3951 for t in thumbnails_list:
3952 if not isinstance(t, dict):
3953 continue
3954 thumbnail_url = url_or_none(t.get('url'))
3955 if not thumbnail_url:
3956 continue
3957 thumbnails.append({
3958 'url': thumbnail_url,
3959 'width': int_or_none(t.get('width')),
3960 'height': int_or_none(t.get('height')),
3961 })
3462ffa8 3962 if playlist_id is None:
70d5c17b 3963 playlist_id = item_id
3964 if title is None:
39ed931e 3965 title = (
3966 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3967 or playlist_id)
b60419c5 3968 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3969 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3970 metadata = {
3971 'playlist_id': playlist_id,
3972 'playlist_title': title,
3973 'playlist_description': description,
3974 'uploader': channel_name,
3975 'uploader_id': channel_id,
3976 'uploader_url': channel_url,
3977 'thumbnails': thumbnails,
3978 'tags': tags,
3979 }
47193e02 3980 availability = self._extract_availability(data)
3981 if availability:
3982 metadata['availability'] = availability
b60419c5 3983 if not channel_id:
3984 metadata.update(self._extract_uploader(data))
3985 metadata.update({
3986 'channel': metadata['uploader'],
3987 'channel_id': metadata['uploader_id'],
3988 'channel_url': metadata['uploader_url']})
3989 return self.playlist_result(
d069eca7 3990 self._entries(
ac56cf38 3991 selected_tab, playlist_id, ytcfg,
3992 self._extract_account_syncid(ytcfg, data),
3993 self._extract_visitor_data(data, ytcfg)),
b60419c5 3994 **metadata)
73c4ac2c 3995
ac56cf38 3996 def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
3997 first_id = last_id = response = None
2be71994 3998 for page_num in itertools.count(1):
cd7c66cf 3999 videos = list(self._playlist_entries(playlist))
4000 if not videos:
4001 return
2be71994 4002 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4003 if start >= len(videos):
4004 return
4005 for video in videos[start:]:
4006 if video['id'] == first_id:
4007 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4008 return
4009 yield video
4010 first_id = first_id or videos[0]['id']
4011 last_id = videos[-1]['id']
79360d99 4012 watch_endpoint = try_get(
4013 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
ac56cf38 4014 headers = self.generate_api_headers(
4015 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4016 visitor_data=self._extract_visitor_data(response, data, ytcfg))
79360d99 4017 query = {
4018 'playlistId': playlist_id,
4019 'videoId': watch_endpoint.get('videoId') or last_id,
4020 'index': watch_endpoint.get('index') or len(videos),
4021 'params': watch_endpoint.get('params') or 'OAE%3D'
4022 }
4023 response = self._extract_response(
4024 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 4025 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 4026 check_get_keys='contents'
4027 )
cd7c66cf 4028 playlist = try_get(
79360d99 4029 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 4030
ac56cf38 4031 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
8bdd16b4 4032 title = playlist.get('title') or try_get(
4033 data, lambda x: x['titleText']['simpleText'], compat_str)
4034 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 4035
4036 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 4037 playlist_url = urljoin(url, try_get(
4038 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4039 compat_str))
4040 if playlist_url and playlist_url != url:
4041 return self.url_result(
4042 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4043 video_title=title)
cd7c66cf 4044
8bdd16b4 4045 return self.playlist_result(
ac56cf38 4046 self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
cd7c66cf 4047 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4048
47193e02 4049 def _extract_availability(self, data):
4050 """
4051 Gets the availability of a given playlist/tab.
4052 Note: Unless YouTube tells us explicitly, we do not assume it is public
4053 @param data: response
4054 """
4055 is_private = is_unlisted = None
4056 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4057 badge_labels = self._extract_badges(renderer)
4058
4059 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4060 privacy_dropdown_entries = try_get(
4061 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4062 for renderer_dict in privacy_dropdown_entries:
4063 is_selected = try_get(
4064 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4065 if not is_selected:
4066 continue
052e1350 4067 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
47193e02 4068 if label:
4069 badge_labels.add(label.lower())
4070 break
4071
4072 for badge_label in badge_labels:
4073 if badge_label == 'unlisted':
4074 is_unlisted = True
4075 elif badge_label == 'private':
4076 is_private = True
4077 elif badge_label == 'public':
4078 is_unlisted = is_private = False
4079 return self._availability(is_private, False, False, False, is_unlisted)
4080
4081 @staticmethod
4082 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4083 sidebar_renderer = try_get(
4084 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4085 for item in sidebar_renderer:
4086 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4087 if renderer:
4088 return renderer
4089
ac56cf38 4090 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
358de58c 4091 """
4092 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4093 """
5d342002 4094 browse_id = params = None
47193e02 4095 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4096 if not renderer:
4097 return
4098 menu_renderer = try_get(
4099 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4100 for menu_item in menu_renderer:
4101 if not isinstance(menu_item, dict):
358de58c 4102 continue
47193e02 4103 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4104 text = try_get(
4105 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4106 if not text or text.lower() != 'show unavailable videos':
4107 continue
4108 browse_endpoint = try_get(
4109 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4110 browse_id = browse_endpoint.get('browseId')
4111 params = browse_endpoint.get('params')
4112 break
5d342002 4113
11f9be09 4114 headers = self.generate_api_headers(
99e9e001 4115 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
ac56cf38 4116 visitor_data=self._extract_visitor_data(data, ytcfg))
47193e02 4117 query = {
4118 'params': params or 'wgYCCAA=',
4119 'browseId': browse_id or 'VL%s' % item_id
4120 }
4121 return self._extract_response(
4122 item_id=item_id, headers=headers, query=query,
fe93e2c4 4123 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4124 note='Downloading API JSON with unavailable videos')
358de58c 4125
ac56cf38 4126 def _extract_webpage(self, url, item_id, fatal=True):
a06916d9 4127 retries = self.get_param('extractor_retries', 3)
62bff2c1 4128 count = -1
ac56cf38 4129 webpage = data = last_error = None
14fdfea9 4130 while count < retries:
62bff2c1 4131 count += 1
14fdfea9 4132 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 4133 # See: https://github.com/yt-dlp/yt-dlp/issues/116
ac56cf38 4134 if last_error:
c705177d 4135 self.report_warning('%s. Retrying ...' % last_error)
ac56cf38 4136 try:
4137 webpage = self._download_webpage(
4138 url, item_id,
4139 note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
4140 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4141 except ExtractorError as e:
4142 if isinstance(e.cause, network_exceptions):
4143 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
4144 last_error = error_to_compat_str(e.cause or e.msg)
4145 if count < retries:
4146 continue
4147 if fatal:
4148 raise
4149 self.report_warning(error_to_compat_str(e))
14fdfea9 4150 break
ac56cf38 4151 else:
4152 try:
4153 self._extract_and_report_alerts(data)
4154 except ExtractorError as e:
4155 if fatal:
4156 raise
4157 self.report_warning(error_to_compat_str(e))
4158 break
4159
4160 if dict_get(data, ('contents', 'currentVideoEndpoint')):
4161 break
4162
4163 last_error = 'Incomplete yt initial data received'
4164 if count >= retries:
4165 if fatal:
4166 raise ExtractorError(last_error)
4167 self.report_warning(last_error)
4168 break
4169
cd7c66cf 4170 return webpage, data
4171
ac56cf38 4172 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4173 data = None
4174 if 'webpage' not in self._configuration_arg('skip'):
4175 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4176 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
4177 if not data:
4178 if not ytcfg and self.is_authenticated:
4179 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
4180 if 'authcheck' not in self._configuration_arg('skip') and fatal:
4181 raise ExtractorError(
4182 msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
4183 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4184 expected=True)
4185 self.report_warning(msg, only_once=True)
4186 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4187 return data, ytcfg
4188
4189 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4190 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4191 resolve_response = self._extract_response(
4192 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4193 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4194 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4195 for ep_key, ep in endpoints.items():
4196 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4197 if params:
4198 return self._extract_response(
4199 item_id=item_id, query=params, ep=ep, headers=headers,
4200 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
4201 check_get_keys=('contents', 'currentVideoEndpoint'))
4202 err_note = 'Failed to resolve url (does the playlist exist?)'
4203 if fatal:
4204 raise ExtractorError(err_note, expected=True)
4205 self.report_warning(err_note, item_id)
4206
9297939e 4207 @staticmethod
4208 def _smuggle_data(entries, data):
4209 for entry in entries:
4210 if data:
4211 entry['url'] = smuggle_url(entry['url'], data)
4212 yield entry
4213
cd7c66cf 4214 def _real_extract(self, url):
9297939e 4215 url, smuggled_data = unsmuggle_url(url, {})
4216 if self.is_music_url(url):
4217 smuggled_data['is_music_url'] = True
fe03a6cd 4218 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4219 if info_dict.get('entries'):
4220 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4221 return info_dict
4222
fe03a6cd 4223 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4224
4225 def __real_extract(self, url, smuggled_data):
cd7c66cf 4226 item_id = self._match_id(url)
4227 url = compat_urlparse.urlunparse(
4228 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4229 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4230
fe03a6cd 4231 def get_mobj(url):
4232 mobj = self._url_re.match(url).groupdict()
07cce701 4233 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4234 return mobj
4235
4236 mobj = get_mobj(url)
4237 # Youtube returns incomplete data if tabname is not lower case
4238 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
fe03a6cd 4239 if is_channel:
4240 if smuggled_data.get('is_music_url'):
4241 if item_id[:2] == 'VL':
4242 # Youtube music VL channels have an equivalent playlist
4243 item_id = item_id[2:]
4244 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4245 elif item_id[:2] == 'MP':
ac56cf38 4246 # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
4247 mdata = self._extract_tab_endpoint(
4248 'https://music.youtube.com/channel/%s' % item_id, item_id, default_client='web_music')
4249 murl = traverse_obj(
4250 mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'), get_all=False, expected_type=compat_str)
4251 if not murl:
4252 raise ExtractorError('Failed to resolve album to playlist.')
4253 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
fe03a6cd 4254 elif mobj['channel_type'] == 'browse':
4255 # Youtube music /browse/ should be changed to /channel/
4256 pre = 'https://www.youtube.com/channel/%s' % item_id
4257 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4258 # Home URLs should redirect to /videos/
6a39ee13 4259 self.report_warning(
cd7c66cf 4260 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4261 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4262 tab = '/videos'
4263
4264 url = ''.join((pre, tab, post))
4265 mobj = get_mobj(url)
cd7c66cf 4266
4267 # Handle both video/playlist URLs
201c1459 4268 qs = parse_qs(url)
cd7c66cf 4269 video_id = qs.get('v', [None])[0]
4270 playlist_id = qs.get('list', [None])[0]
4271
fe03a6cd 4272 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4273 if not playlist_id:
fe03a6cd 4274 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4275 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4276 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4277 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4278 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4279 mobj = get_mobj(url)
cd7c66cf 4280
4281 if video_id and playlist_id:
a06916d9 4282 if self.get_param('noplaylist'):
cd7c66cf 4283 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
5e3f2f8f 4284 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4285 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4286
ac56cf38 4287 data, ytcfg = self._extract_data(url, item_id)
14fdfea9 4288
18db7548 4289 tabs = try_get(
4290 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4291 if tabs:
4292 selected_tab = self._extract_selected_tab(tabs)
4293 tab_name = selected_tab.get('title', '')
09f1580e 4294 if 'no-youtube-channel-redirect' not in compat_opts:
4295 if mobj['tab'] == '/live':
4296 # Live tab should have redirected to the video
4297 raise ExtractorError('The channel is not currently live', expected=True)
4298 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4299 if not mobj['not_channel'] and item_id[:2] == 'UC':
4300 # Topic channels don't have /videos. Use the equivalent playlist instead
4301 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4302 pl_id = 'UU%s' % item_id[2:]
4303 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4304 try:
ac56cf38 4305 data, ytcfg, item_id, url = *self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True), pl_id, pl_url
09f1580e 4306 except ExtractorError:
4307 self.report_warning('The playlist gave error. Falling back to channel URL')
4308 else:
4309 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4310
4311 self.write_debug('Final URL: %s' % url)
4312
358de58c 4313 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4314 if 'no-youtube-unavailable-videos' not in compat_opts:
ac56cf38 4315 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
c0ac49bc 4316 self._extract_and_report_alerts(data, only_once=True)
8bdd16b4 4317 tabs = try_get(
4318 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4319 if tabs:
ac56cf38 4320 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
cd7c66cf 4321
8bdd16b4 4322 playlist = try_get(
4323 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4324 if playlist:
ac56cf38 4325 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
cd7c66cf 4326
a0566bbf 4327 video_id = try_get(
4328 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4329 compat_str) or video_id
8bdd16b4 4330 if video_id:
09f1580e 4331 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4332 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
5e3f2f8f 4333 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4334
8bdd16b4 4335 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4336
c5e8d7af 4337
8bdd16b4 4338class YoutubePlaylistIE(InfoExtractor):
96565c7e 4339 IE_DESC = 'YouTube playlists'
8bdd16b4 4340 _VALID_URL = r'''(?x)(?:
4341 (?:https?://)?
4342 (?:\w+\.)?
4343 (?:
4344 (?:
4345 youtube(?:kids)?\.com|
d9190e44 4346 %(invidious)s
8bdd16b4 4347 )
4348 /.*?\?.*?\blist=
4349 )?
4350 (?P<id>%(playlist_id)s)
d9190e44
RH
4351 )''' % {
4352 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
4353 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4354 }
8bdd16b4 4355 IE_NAME = 'youtube:playlist'
cdc628a4 4356 _TESTS = [{
8bdd16b4 4357 'note': 'issue #673',
4358 'url': 'PLBB231211A4F62143',
cdc628a4 4359 'info_dict': {
8bdd16b4 4360 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4361 'id': 'PLBB231211A4F62143',
4362 'uploader': 'Wickydoo',
4363 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 4364 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
8bdd16b4 4365 },
4366 'playlist_mincount': 29,
4367 }, {
4368 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4369 'info_dict': {
4370 'title': 'YDL_safe_search',
4371 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4372 },
4373 'playlist_count': 2,
4374 'skip': 'This playlist is private',
9558dcec 4375 }, {
8bdd16b4 4376 'note': 'embedded',
4377 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4378 'playlist_count': 4,
9558dcec 4379 'info_dict': {
8bdd16b4 4380 'title': 'JODA15',
4381 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4382 'uploader': 'milan',
4383 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4384 }
cdc628a4 4385 }, {
8bdd16b4 4386 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
11f9be09 4387 'playlist_mincount': 654,
8bdd16b4 4388 'info_dict': {
4389 'title': '2018 Chinese New Singles (11/6 updated)',
4390 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4391 'uploader': 'LBK',
4392 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 4393 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
8bdd16b4 4394 }
daa0df9e 4395 }, {
29f7c58a 4396 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4397 'only_matching': True,
4398 }, {
4399 # music album playlist
4400 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4401 'only_matching': True,
4402 }]
4403
4404 @classmethod
4405 def suitable(cls, url):
201c1459 4406 if YoutubeTabIE.suitable(url):
4407 return False
49a57e70 4408 from ..utils import parse_qs
201c1459 4409 qs = parse_qs(url)
4410 if qs.get('v', [None])[0]:
4411 return False
4412 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4413
4414 def _real_extract(self, url):
4415 playlist_id = self._match_id(url)
46953e7e 4416 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4417 url = update_url_query(
4418 'https://www.youtube.com/playlist',
4419 parse_qs(url) or {'list': playlist_id})
4420 if is_music_url:
4421 url = smuggle_url(url, {'is_music_url': True})
4422 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4423
4424
4425class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4426 IE_DESC = 'youtu.be'
29f7c58a 4427 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4428 _TESTS = [{
8bdd16b4 4429 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4430 'info_dict': {
4431 'id': 'yeWKywCrFtk',
4432 'ext': 'mp4',
4433 'title': 'Small Scale Baler and Braiding Rugs',
4434 'uploader': 'Backus-Page House Museum',
4435 'uploader_id': 'backuspagemuseum',
4436 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4437 'upload_date': '20161008',
4438 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4439 'categories': ['Nonprofits & Activism'],
4440 'tags': list,
4441 'like_count': int,
4442 'dislike_count': int,
4443 },
4444 'params': {
4445 'noplaylist': True,
4446 'skip_download': True,
4447 },
39e7107d 4448 }, {
8bdd16b4 4449 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4450 'only_matching': True,
cdc628a4
PH
4451 }]
4452
8bdd16b4 4453 def _real_extract(self, url):
5ad28e7f 4454 mobj = self._match_valid_url(url)
29f7c58a 4455 video_id = mobj.group('id')
4456 playlist_id = mobj.group('playlist_id')
8bdd16b4 4457 return self.url_result(
29f7c58a 4458 update_url_query('https://www.youtube.com/watch', {
4459 'v': video_id,
4460 'list': playlist_id,
4461 'feature': 'youtu.be',
4462 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4463
4464
4465class YoutubeYtUserIE(InfoExtractor):
96565c7e 4466 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
8bdd16b4 4467 _VALID_URL = r'ytuser:(?P<id>.+)'
4468 _TESTS = [{
4469 'url': 'ytuser:phihag',
4470 'only_matching': True,
4471 }]
4472
4473 def _real_extract(self, url):
4474 user_id = self._match_id(url)
4475 return self.url_result(
c586f9e8 4476 'https://www.youtube.com/user/%s/videos' % user_id,
8bdd16b4 4477 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4478
b05654f0 4479
3d3dddc9 4480class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4481 IE_NAME = 'youtube:favorites'
96565c7e 4482 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
70d5c17b 4483 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4484 _LOGIN_REQUIRED = True
4485 _TESTS = [{
4486 'url': ':ytfav',
4487 'only_matching': True,
4488 }, {
4489 'url': ':ytfavorites',
4490 'only_matching': True,
4491 }]
4492
4493 def _real_extract(self, url):
4494 return self.url_result(
4495 'https://www.youtube.com/playlist?list=LL',
4496 ie=YoutubeTabIE.ie_key())
4497
4498
79360d99 4499class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
96565c7e 4500 IE_DESC = 'YouTube searches'
78caa52a 4501 IE_NAME = 'youtube:search'
b05654f0 4502 _SEARCH_KEY = 'ytsearch'
6c894ea1 4503 _SEARCH_PARAMS = None
9dd8e46a 4504 _TESTS = []
b05654f0 4505
cc16383f 4506 def _search_results(self, query):
a5c56234 4507 data = {'query': query}
6c894ea1
U
4508 if self._SEARCH_PARAMS:
4509 data['params'] = self._SEARCH_PARAMS
fe93e2c4 4510 continuation = {}
6c894ea1 4511 for page_num in itertools.count(1):
fe93e2c4 4512 data.update(continuation)
79360d99 4513 search = self._extract_response(
4514 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4515 check_get_keys=('contents', 'onResponseReceivedCommands')
4516 )
6c894ea1 4517 if not search:
b4c08069 4518 break
6c894ea1
U
4519 slr_contents = try_get(
4520 search,
4521 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4522 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4523 list)
4524 if not slr_contents:
a22b2fd1 4525 break
0366ae87 4526
0366ae87
M
4527 # Youtube sometimes adds promoted content to searches,
4528 # changing the index location of videos and token.
4529 # So we search through all entries till we find them.
fe93e2c4 4530 continuation = None
30a074c2 4531 for slr_content in slr_contents:
fe93e2c4 4532 if not continuation:
4533 continuation = self._extract_continuation({'contents': [slr_content]})
a96c6d15 4534
30a074c2 4535 isr_contents = try_get(
4536 slr_content,
4537 lambda x: x['itemSectionRenderer']['contents'],
4538 list)
9da76d30 4539 if not isr_contents:
30a074c2 4540 continue
4541 for content in isr_contents:
4542 if not isinstance(content, dict):
4543 continue
4544 video = content.get('videoRenderer')
4545 if not isinstance(video, dict):
4546 continue
4547 video_id = video.get('videoId')
4548 if not video_id:
4549 continue
4550
4551 yield self._extract_video(video)
0366ae87 4552
fe93e2c4 4553 if not continuation:
6c894ea1 4554 break
b05654f0 4555
c9ae7b95 4556
a3dd9248 4557class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4558 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4559 _SEARCH_KEY = 'ytsearchdate'
96565c7e 4560 IE_DESC = 'YouTube searches, newest videos first'
6c894ea1 4561 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4562
c9ae7b95 4563
386e1dd9 4564class YoutubeSearchURLIE(YoutubeSearchIE):
96565c7e 4565 IE_DESC = 'YouTube search URLs with sorting and filter support'
386e1dd9 4566 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
96565c7e 4567 _SEARCH_KEY = None
386e1dd9 4568 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4569 # _MAX_RESULTS = 100
3462ffa8 4570 _TESTS = [{
4571 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4572 'playlist_mincount': 5,
4573 'info_dict': {
11f9be09 4574 'id': 'youtube-dl test video',
3462ffa8 4575 'title': 'youtube-dl test video',
4576 }
4577 }, {
4578 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4579 'only_matching': True,
4580 }]
4581
386e1dd9 4582 @classmethod
4583 def _make_valid_url(cls):
4584 return cls._VALID_URL
4585
3462ffa8 4586 def _real_extract(self, url):
4dfbf869 4587 qs = parse_qs(url)
386e1dd9 4588 query = (qs.get('search_query') or qs.get('q'))[0]
4589 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4590 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4591
4592
4593class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4594 """
25f14e9f 4595 Base class for feed extractors
3d3dddc9 4596 Subclasses must define the _FEED_NAME property.
d7ae0639 4597 """
b2e8bc1b 4598 _LOGIN_REQUIRED = True
ef2f3c7f 4599 _TESTS = []
d7ae0639
JMF
4600
4601 @property
4602 def IE_NAME(self):
78caa52a 4603 return 'youtube:%s' % self._FEED_NAME
04cc9617 4604
3853309f 4605 def _real_extract(self, url):
3d3dddc9 4606 return self.url_result(
4607 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4608 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4609
4610
ef2f3c7f 4611class YoutubeWatchLaterIE(InfoExtractor):
4612 IE_NAME = 'youtube:watchlater'
96565c7e 4613 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
3d3dddc9 4614 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4615 _TESTS = [{
8bdd16b4 4616 'url': ':ytwatchlater',
bc7a9cd8
S
4617 'only_matching': True,
4618 }]
25f14e9f
S
4619
4620 def _real_extract(self, url):
ef2f3c7f 4621 return self.url_result(
4622 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4623
4624
25f14e9f 4625class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
96565c7e 4626 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
3d3dddc9 4627 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4628 _FEED_NAME = 'recommended'
45db527f 4629 _LOGIN_REQUIRED = False
3d3dddc9 4630 _TESTS = [{
4631 'url': ':ytrec',
4632 'only_matching': True,
4633 }, {
4634 'url': ':ytrecommended',
4635 'only_matching': True,
4636 }, {
4637 'url': 'https://youtube.com',
4638 'only_matching': True,
4639 }]
1ed5b5c9 4640
1ed5b5c9 4641
25f14e9f 4642class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
96565c7e 4643 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
3d3dddc9 4644 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4645 _FEED_NAME = 'subscriptions'
3d3dddc9 4646 _TESTS = [{
4647 'url': ':ytsubs',
4648 'only_matching': True,
4649 }, {
4650 'url': ':ytsubscriptions',
4651 'only_matching': True,
4652 }]
1ed5b5c9 4653
1ed5b5c9 4654
25f14e9f 4655class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
96565c7e 4656 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
a5c56234 4657 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4658 _FEED_NAME = 'history'
3d3dddc9 4659 _TESTS = [{
4660 'url': ':ythistory',
4661 'only_matching': True,
4662 }]
1ed5b5c9
JMF
4663
4664
15870e90
PH
4665class YoutubeTruncatedURLIE(InfoExtractor):
4666 IE_NAME = 'youtube:truncated_url'
4667 IE_DESC = False # Do not list
975d35db 4668 _VALID_URL = r'''(?x)
b95aab84
PH
4669 (?:https?://)?
4670 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4671 (?:watch\?(?:
c4808c60 4672 feature=[a-z_]+|
b95aab84
PH
4673 annotation_id=annotation_[^&]+|
4674 x-yt-cl=[0-9]+|
c1708b89 4675 hl=[^&]*|
287be8c6 4676 t=[0-9]+
b95aab84
PH
4677 )?
4678 |
4679 attribution_link\?a=[^&]+
4680 )
4681 $
975d35db 4682 '''
15870e90 4683
c4808c60 4684 _TESTS = [{
2d3d2997 4685 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4686 'only_matching': True,
dc2fc736 4687 }, {
2d3d2997 4688 'url': 'https://www.youtube.com/watch?',
dc2fc736 4689 'only_matching': True,
b95aab84
PH
4690 }, {
4691 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4692 'only_matching': True,
4693 }, {
4694 'url': 'https://www.youtube.com/watch?feature=foo',
4695 'only_matching': True,
c1708b89
PH
4696 }, {
4697 'url': 'https://www.youtube.com/watch?hl=en-GB',
4698 'only_matching': True,
287be8c6
PH
4699 }, {
4700 'url': 'https://www.youtube.com/watch?t=2372',
4701 'only_matching': True,
c4808c60
PH
4702 }]
4703
15870e90
PH
4704 def _real_extract(self, url):
4705 raise ExtractorError(
78caa52a
PH
4706 'Did you forget to quote the URL? Remember that & is a meta '
4707 'character in most shells, so you want to put the URL in quotes, '
3867038a 4708 'like youtube-dl '
2d3d2997 4709 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4710 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4711 expected=True)
772fd5cc
PH
4712
4713
3cd786db 4714class YoutubeClipIE(InfoExtractor):
4715 IE_NAME = 'youtube:clip'
4716 IE_DESC = False # Do not list
4717 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4718
4719 def _real_extract(self, url):
4720 self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4721 return self.url_result(url, 'Generic')
4722
4723
772fd5cc
PH
4724class YoutubeTruncatedIDIE(InfoExtractor):
4725 IE_NAME = 'youtube:truncated_id'
4726 IE_DESC = False # Do not list
b95aab84 4727 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4728
4729 _TESTS = [{
4730 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4731 'only_matching': True,
4732 }]
4733
4734 def _real_extract(self, url):
4735 video_id = self._match_id(url)
4736 raise ExtractorError(
4737 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4738 expected=True)