]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[ThisOldHouseIE] Add support for premium videos (#2358)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
109dd3b2 6import copy
fe93e2c4 7import datetime
adbc4ec4 8import functools
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
720c3099 12import math
c4417ddb 13import os.path
d77ab8e2 14import random
c5e8d7af 15import re
46383212 16import sys
8a784c74 17import time
e0df6211 18import traceback
adbc4ec4 19import threading
c5e8d7af 20
b05654f0 21from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 22from ..compat import (
edf3e38e 23 compat_chr,
29f7c58a 24 compat_HTTPError,
c5e8d7af 25 compat_parse_qs,
545cc85d 26 compat_str,
7fd002c0 27 compat_urllib_parse_unquote_plus,
15707c7e 28 compat_urllib_parse_urlencode,
7c80519c 29 compat_urllib_parse_urlparse,
7c61bd36 30 compat_urlparse,
4bb4a188 31)
545cc85d 32from ..jsinterp import JSInterpreter
4bb4a188 33from ..utils import (
720c3099 34 bug_reports_message,
c5e8d7af 35 clean_html,
d92f5d5a 36 datetime_from_str,
11f9be09 37 dict_get,
358de58c 38 error_to_compat_str,
c5e8d7af 39 ExtractorError,
2d30521a 40 float_or_none,
11f9be09 41 format_field,
dd27fd17 42 int_or_none,
641ad5d8 43 is_html,
34921b43 44 join_nonempty,
94278f72 45 mimetype2ext,
9c0d7f49 46 network_exceptions,
a6213a49 47 NO_DEFAULT,
11f9be09 48 orderedSet,
6310acf5 49 parse_codecs,
49bd8c66 50 parse_count,
7c80519c 51 parse_duration,
7ea65411 52 parse_iso8601,
4dfbf869 53 parse_qs,
dca3ff4a 54 qualities,
c0ac49bc 55 remove_end,
3995d37d 56 remove_start,
cf7e015f 57 smuggle_url,
dbdaaa23 58 str_or_none,
c93d53f5 59 str_to_int,
f3aa3c3f 60 strftime_or_none,
7c365c21 61 traverse_obj,
556dbe7f 62 try_get,
c5e8d7af
PH
63 unescapeHTML,
64 unified_strdate,
f0d785d3 65 unified_timestamp,
cf7e015f 66 unsmuggle_url,
8bdd16b4 67 update_url_query,
21c340b8 68 url_or_none,
fe93e2c4 69 urljoin,
7c365c21 70 variadic,
c5e8d7af
PH
71)
72
5f6a1245 73
720c3099 74def get_first(obj, keys, **kwargs):
75 return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
76
77
000c15a4 78# any clients starting with _ cannot be explicity requested by the user
79INNERTUBE_CLIENTS = {
80 'web': {
81 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
82 'INNERTUBE_CONTEXT': {
83 'client': {
84 'clientName': 'WEB',
18c7683d 85 'clientVersion': '2.20211221.00.00',
000c15a4 86 }
87 },
88 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
89 },
90 'web_embedded': {
91 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
92 'INNERTUBE_CONTEXT': {
93 'client': {
94 'clientName': 'WEB_EMBEDDED_PLAYER',
18c7683d 95 'clientVersion': '1.20211215.00.01',
000c15a4 96 },
97 },
98 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
99 },
100 'web_music': {
101 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
102 'INNERTUBE_HOST': 'music.youtube.com',
103 'INNERTUBE_CONTEXT': {
104 'client': {
105 'clientName': 'WEB_REMIX',
18c7683d 106 'clientVersion': '1.20211213.00.00',
000c15a4 107 }
108 },
109 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
110 },
e7e94f2a 111 'web_creator': {
18c7683d 112 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
e7e94f2a
D
113 'INNERTUBE_CONTEXT': {
114 'client': {
115 'clientName': 'WEB_CREATOR',
18c7683d 116 'clientVersion': '1.20211220.02.00',
e7e94f2a
D
117 }
118 },
119 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
120 },
000c15a4 121 'android': {
18c7683d 122 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
000c15a4 123 'INNERTUBE_CONTEXT': {
124 'client': {
125 'clientName': 'ANDROID',
18c7683d 126 'clientVersion': '16.49',
000c15a4 127 }
128 },
129 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
b6de707d 130 'REQUIRE_JS_PLAYER': False
000c15a4 131 },
132 'android_embedded': {
18c7683d 133 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
000c15a4 134 'INNERTUBE_CONTEXT': {
135 'client': {
136 'clientName': 'ANDROID_EMBEDDED_PLAYER',
18c7683d 137 'clientVersion': '16.49',
000c15a4 138 },
139 },
b6de707d 140 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
141 'REQUIRE_JS_PLAYER': False
000c15a4 142 },
143 'android_music': {
18c7683d 144 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
000c15a4 145 'INNERTUBE_CONTEXT': {
146 'client': {
147 'clientName': 'ANDROID_MUSIC',
18c7683d 148 'clientVersion': '4.57',
000c15a4 149 }
150 },
151 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
b6de707d 152 'REQUIRE_JS_PLAYER': False
000c15a4 153 },
e7e94f2a 154 'android_creator': {
18c7683d 155 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
e7e94f2a
D
156 'INNERTUBE_CONTEXT': {
157 'client': {
158 'clientName': 'ANDROID_CREATOR',
18c7683d 159 'clientVersion': '21.47',
e7e94f2a
D
160 },
161 },
b6de707d 162 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
163 'REQUIRE_JS_PLAYER': False
e7e94f2a 164 },
18c7683d 165 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
166 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
000c15a4 167 'ios': {
18c7683d 168 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
000c15a4 169 'INNERTUBE_CONTEXT': {
170 'client': {
171 'clientName': 'IOS',
18c7683d 172 'clientVersion': '16.46',
173 'deviceModel': 'iPhone14,3',
000c15a4 174 }
175 },
b6de707d 176 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
177 'REQUIRE_JS_PLAYER': False
000c15a4 178 },
179 'ios_embedded': {
000c15a4 180 'INNERTUBE_CONTEXT': {
181 'client': {
182 'clientName': 'IOS_MESSAGES_EXTENSION',
18c7683d 183 'clientVersion': '16.46',
184 'deviceModel': 'iPhone14,3',
000c15a4 185 },
186 },
b6de707d 187 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
188 'REQUIRE_JS_PLAYER': False
000c15a4 189 },
190 'ios_music': {
18c7683d 191 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
000c15a4 192 'INNERTUBE_CONTEXT': {
193 'client': {
194 'clientName': 'IOS_MUSIC',
18c7683d 195 'clientVersion': '4.57',
000c15a4 196 },
197 },
b6de707d 198 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
199 'REQUIRE_JS_PLAYER': False
000c15a4 200 },
e7e94f2a
D
201 'ios_creator': {
202 'INNERTUBE_CONTEXT': {
203 'client': {
204 'clientName': 'IOS_CREATOR',
18c7683d 205 'clientVersion': '21.47',
e7e94f2a
D
206 },
207 },
b6de707d 208 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
209 'REQUIRE_JS_PLAYER': False
e7e94f2a 210 },
3619f78d 211 # mweb has 'ultralow' formats
212 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 213 'mweb': {
18c7683d 214 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
000c15a4 215 'INNERTUBE_CONTEXT': {
216 'client': {
217 'clientName': 'MWEB',
18c7683d 218 'clientVersion': '2.20211221.01.00',
000c15a4 219 }
220 },
221 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
18c7683d 222 }
000c15a4 223}
224
225
226def build_innertube_clients():
65c2fde2 227 third_party = {
228 'embedUrl': 'https://google.com', # Can be any valid URL
229 }
000c15a4 230 base_clients = ('android', 'web', 'ios', 'mweb')
231 priority = qualities(base_clients[::-1])
232
233 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 234 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 235 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 236 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 237 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
238 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
239
240 if client in base_clients:
241 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
242 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
65c2fde2 243 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 244 agegate_ytcfg['priority'] -= 1
245 elif client.endswith('_embedded'):
65c2fde2 246 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 247 ytcfg['priority'] -= 2
248 else:
249 ytcfg['priority'] -= 3
250
251
252build_innertube_clients()
253
254
de7f3446 255class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 256 """Provide base functions for Youtube extractors"""
e00eb564 257
3462ffa8 258 _RESERVED_NAMES = (
3cd786db 259 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
3619f78d 260 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
261 r'browse|oembed|get_video_info|iframe_api|s/player|'
cd7c66cf 262 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 263
3619f78d 264 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
265
b2e8bc1b 266 _NETRC_MACHINE = 'youtube'
3619f78d 267
b2e8bc1b
JMF
268 # If True it will raise an error if no login info is provided
269 _LOGIN_REQUIRED = False
270
d9190e44
RH
271 _INVIDIOUS_SITES = (
272 # invidious-redirect websites
273 r'(?:www\.)?redirect\.invidious\.io',
274 r'(?:(?:www|dev)\.)?invidio\.us',
275 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
276 r'(?:www\.)?invidious\.pussthecat\.org',
277 r'(?:www\.)?invidious\.zee\.li',
278 r'(?:www\.)?invidious\.ethibox\.fr',
279 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
4c968755
U
280 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
281 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
d9190e44
RH
282 # youtube-dl invidious instances list
283 r'(?:(?:www|no)\.)?invidiou\.sh',
284 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
285 r'(?:www\.)?invidious\.kabi\.tk',
286 r'(?:www\.)?invidious\.mastodon\.host',
287 r'(?:www\.)?invidious\.zapashcanon\.fr',
288 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
289 r'(?:www\.)?invidious\.tinfoil-hat\.net',
290 r'(?:www\.)?invidious\.himiko\.cloud',
291 r'(?:www\.)?invidious\.reallyancient\.tech',
292 r'(?:www\.)?invidious\.tube',
293 r'(?:www\.)?invidiou\.site',
294 r'(?:www\.)?invidious\.site',
295 r'(?:www\.)?invidious\.xyz',
296 r'(?:www\.)?invidious\.nixnet\.xyz',
297 r'(?:www\.)?invidious\.048596\.xyz',
298 r'(?:www\.)?invidious\.drycat\.fr',
299 r'(?:www\.)?inv\.skyn3t\.in',
300 r'(?:www\.)?tube\.poal\.co',
301 r'(?:www\.)?tube\.connect\.cafe',
302 r'(?:www\.)?vid\.wxzm\.sx',
303 r'(?:www\.)?vid\.mint\.lgbt',
304 r'(?:www\.)?vid\.puffyan\.us',
305 r'(?:www\.)?yewtu\.be',
306 r'(?:www\.)?yt\.elukerio\.org',
307 r'(?:www\.)?yt\.lelux\.fi',
308 r'(?:www\.)?invidious\.ggc-project\.de',
309 r'(?:www\.)?yt\.maisputain\.ovh',
310 r'(?:www\.)?ytprivate\.com',
311 r'(?:www\.)?invidious\.13ad\.de',
312 r'(?:www\.)?invidious\.toot\.koeln',
313 r'(?:www\.)?invidious\.fdn\.fr',
314 r'(?:www\.)?watch\.nettohikari\.com',
315 r'(?:www\.)?invidious\.namazso\.eu',
316 r'(?:www\.)?invidious\.silkky\.cloud',
317 r'(?:www\.)?invidious\.exonip\.de',
318 r'(?:www\.)?invidious\.riverside\.rocks',
319 r'(?:www\.)?invidious\.blamefran\.net',
320 r'(?:www\.)?invidious\.moomoo\.de',
321 r'(?:www\.)?ytb\.trom\.tf',
322 r'(?:www\.)?yt\.cyberhost\.uk',
323 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
324 r'(?:www\.)?qklhadlycap4cnod\.onion',
325 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
326 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
327 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
328 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
329 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
330 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
331 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
332 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
333 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
334 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
335 )
336
b2e8bc1b 337 def _login(self):
83317f69 338 """
339 Attempt to log in to YouTube.
83317f69 340 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
341 """
9d5d4d64 342
982ee69a
MB
343 if (self._LOGIN_REQUIRED
344 and self.get_param('cookiefile') is None
345 and self.get_param('cookiesfrombrowser') is None):
9d5d4d64 346 self.raise_login_required(
347 'Login details are needed to download this content', method='cookies')
68217024 348 username, password = self._get_login_info()
9d5d4d64 349 if username:
24b0a72b 350 self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
b2e8bc1b 351
cce889b9 352 def _initialize_consent(self):
353 cookies = self._get_cookies('https://www.youtube.com/')
354 if cookies.get('__Secure-3PSID'):
355 return
356 consent_id = None
357 consent = cookies.get('CONSENT')
358 if consent:
359 if 'YES' in consent.value:
360 return
361 consent_id = self._search_regex(
362 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
363 if not consent_id:
364 consent_id = random.randint(100, 999)
365 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 366
f3aa3c3f 367 def _initialize_pref(self):
368 cookies = self._get_cookies('https://www.youtube.com/')
369 pref_cookie = cookies.get('PREF')
370 pref = {}
371 if pref_cookie:
372 try:
373 pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
374 except ValueError:
375 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
376 pref.update({'hl': 'en'})
377 self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
378
b2e8bc1b 379 def _real_initialize(self):
f3aa3c3f 380 self._initialize_pref()
cce889b9 381 self._initialize_consent()
24b0a72b 382 self._login()
c5e8d7af 383
a0566bbf 384 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 385 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
386 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 387
000c15a4 388 def _get_default_ytcfg(self, client='web'):
389 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 390
000c15a4 391 def _get_innertube_host(self, client='web'):
392 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 393
000c15a4 394 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 395 # try_get but with fallback to default ytcfg client values when present
396 _func = lambda y: try_get(y, getter, expected_type)
397 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
398
000c15a4 399 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 400 return self._ytcfg_get_safe(
401 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
402 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
109dd3b2 403
000c15a4 404 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 405 return self._ytcfg_get_safe(
406 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
407 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
109dd3b2 408
000c15a4 409 def _extract_api_key(self, ytcfg=None, default_client='web'):
109dd3b2 410 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
411
000c15a4 412 def _extract_context(self, ytcfg=None, default_client='web'):
f3aa3c3f 413 context = get_first(
414 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
415 # Enforce language for extraction
416 traverse_obj(context, 'client', expected_type=dict, default={})['hl'] = 'en'
109dd3b2 417 return context
418
cf87314d 419 _SAPISID = None
420
109dd3b2 421 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 422 time_now = round(time.time())
cf87314d 423 if self._SAPISID is None:
424 yt_cookies = self._get_cookies('https://www.youtube.com')
425 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
426 # See: https://github.com/yt-dlp/yt-dlp/issues/393
427 sapisid_cookie = dict_get(
428 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
429 if sapisid_cookie and sapisid_cookie.value:
430 self._SAPISID = sapisid_cookie.value
431 self.write_debug('Extracted SAPISID cookie')
432 # SAPISID cookie is required if not already present
433 if not yt_cookies.get('SAPISID'):
434 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
435 self._set_cookie(
436 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
437 else:
438 self._SAPISID = False
439 if not self._SAPISID:
440 return None
1974e99f 441 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
442 sapisidhash = hashlib.sha1(
cf87314d 443 f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
1974e99f 444 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
445
446 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 447 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 448 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 449
109dd3b2 450 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 451 data.update(query)
11f9be09 452 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 453 real_headers.update({'content-type': 'application/json'})
454 if headers:
455 real_headers.update(headers)
545cc85d 456 return self._download_json(
109dd3b2 457 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 458 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 459 data=json.dumps(data).encode('utf8'), headers=real_headers,
460 query={'key': api_key or self._extract_api_key()})
461
ac56cf38 462 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
463 data = self._search_regex(
464 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
465 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
466 if data:
467 return self._parse_json(data, item_id, fatal=fatal)
0c148415 468
99e9e001 469 @staticmethod
470 def _extract_session_index(*data):
471 """
472 Index of current account in account list.
473 See: https://github.com/yt-dlp/yt-dlp/pull/519
474 """
475 for ytcfg in data:
476 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
477 if session_index is not None:
478 return session_index
479
480 # Deprecated?
481 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca
M
482 if ytcfg:
483 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
484 if token:
485 return token
99e9e001 486 if webpage:
487 return self._search_regex(
488 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
489 'identity token', default=None, fatal=False)
a1c5d2ca
M
490
491 @staticmethod
fe93e2c4 492 def _extract_account_syncid(*args):
8ea3f7b9 493 """
494 Extract syncId required to download private playlists of secondary channels
fe93e2c4 495 @params response and/or ytcfg
8ea3f7b9 496 """
fe93e2c4 497 for data in args:
498 # ytcfg includes channel_syncid if on secondary channel
499 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
500 if delegated_sid:
501 return delegated_sid
502 sync_ids = (try_get(
503 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
e6f21b3d 504 lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
fe93e2c4 505 if len(sync_ids) >= 2 and sync_ids[1]:
506 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
507 # and just "user_syncid||" for primary channel. We only want the channel_syncid
508 return sync_ids[0]
a1c5d2ca 509
ac56cf38 510 @staticmethod
511 def _extract_visitor_data(*args):
512 """
513 Extracts visitorData from an API response or ytcfg
514 Appears to be used to track session state
515 """
9222c381 516 return get_first(
517 args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
518 expected_type=str)
ac56cf38 519
99e9e001 520 @property
521 def is_authenticated(self):
522 return bool(self._generate_sapisidhash_header())
523
11f9be09 524 def extract_ytcfg(self, video_id, webpage):
8c54a305 525 if not webpage:
526 return {}
29f7c58a 527 return self._parse_json(
528 self._search_regex(
529 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 530 default='{}'), video_id, fatal=False) or {}
531
11f9be09 532 def generate_api_headers(
99e9e001 533 self, *, ytcfg=None, account_syncid=None, session_index=None,
534 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
535
11f9be09 536 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
f4f751af 537 headers = {
109dd3b2 538 'X-YouTube-Client-Name': compat_str(
11f9be09 539 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
540 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 541 'Origin': origin,
542 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
543 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
ac56cf38 544 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
99e9e001 545 }
546 if session_index is None:
314ee305 547 session_index = self._extract_session_index(ytcfg)
548 if account_syncid or session_index is not None:
549 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 550
109dd3b2 551 auth = self._generate_sapisidhash_header(origin)
f4f751af 552 if auth is not None:
553 headers['Authorization'] = auth
109dd3b2 554 headers['X-Origin'] = origin
99e9e001 555 return {h: v for h, v in headers.items() if v is not None}
29f7c58a 556
2d6659b9 557 @staticmethod
558 def _build_api_continuation_query(continuation, ctp=None):
559 query = {
560 'continuation': continuation
561 }
562 # TODO: Inconsistency with clickTrackingParams.
563 # Currently we have a fixed ctp contained within context (from ytcfg)
564 # and a ctp in root query for continuation.
565 if ctp:
566 query['clickTracking'] = {'clickTrackingParams': ctp}
567 return query
568
2d6659b9 569 @classmethod
570 def _extract_next_continuation_data(cls, renderer):
571 next_continuation = try_get(
572 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
573 lambda x: x['continuation']['reloadContinuationData']), dict)
574 if not next_continuation:
575 return
576 continuation = next_continuation.get('continuation')
577 if not continuation:
578 return
579 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 580 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 581
582 @classmethod
583 def _extract_continuation_ep_data(cls, continuation_ep: dict):
584 if isinstance(continuation_ep, dict):
585 continuation = try_get(
586 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
587 if not continuation:
588 return
589 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 590 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 591
592 @classmethod
593 def _extract_continuation(cls, renderer):
594 next_continuation = cls._extract_next_continuation_data(renderer)
595 if next_continuation:
596 return next_continuation
fe93e2c4 597
2d6659b9 598 contents = []
599 for key in ('contents', 'items'):
600 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 601
2d6659b9 602 for content in contents:
603 if not isinstance(content, dict):
604 continue
605 continuation_ep = try_get(
606 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
607 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
608 dict)
609 continuation = cls._extract_continuation_ep_data(continuation_ep)
610 if continuation:
611 return continuation
612
fe93e2c4 613 @classmethod
614 def _extract_alerts(cls, data):
109dd3b2 615 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
616 if not isinstance(alert_dict, dict):
617 continue
618 for alert in alert_dict.values():
619 alert_type = alert.get('type')
620 if not alert_type:
621 continue
052e1350 622 message = cls._get_text(alert, 'text')
109dd3b2 623 if message:
624 yield alert_type, message
625
c0ac49bc 626 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
109dd3b2 627 errors = []
628 warnings = []
629 for alert_type, alert_message in alerts:
641ad5d8 630 if alert_type.lower() == 'error' and fatal:
109dd3b2 631 errors.append([alert_type, alert_message])
632 else:
633 warnings.append([alert_type, alert_message])
634
635 for alert_type, alert_message in (warnings + errors[:-1]):
c0ac49bc 636 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
109dd3b2 637 if errors:
638 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
639
640 def _extract_and_report_alerts(self, data, *args, **kwargs):
641 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
642
47193e02 643 def _extract_badges(self, renderer: dict):
644 badges = set()
645 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
646 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
647 if label:
648 badges.add(label.lower())
649 return badges
650
651 @staticmethod
052e1350 652 def _get_text(data, *path_list, max_runs=None):
653 for path in path_list or [None]:
654 if path is None:
655 obj = [data]
656 else:
657 obj = traverse_obj(data, path, default=[])
658 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
659 obj = [obj]
660 for item in obj:
661 text = try_get(item, lambda x: x['simpleText'], compat_str)
662 if text:
663 return text
664 runs = try_get(item, lambda x: x['runs'], list) or []
665 if not runs and isinstance(item, list):
666 runs = item
667
668 runs = runs[:min(len(runs), max_runs or len(runs))]
669 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
670 if text:
671 return text
47193e02 672
f0d785d3 673 def _get_count(self, data, *path_list):
674 count_text = self._get_text(data, *path_list) or ''
675 count = parse_count(count_text)
676 if count is None:
677 count = str_to_int(
678 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
679 return count
680
a709d873 681 @staticmethod
682 def _extract_thumbnails(data, *path_list):
683 """
684 Extract thumbnails from thumbnails dict
685 @param path_list: path list to level that contains 'thumbnails' key
686 """
687 thumbnails = []
688 for path in path_list or [()]:
689 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
690 thumbnail_url = url_or_none(thumbnail.get('url'))
691 if not thumbnail_url:
692 continue
693 # Sometimes youtube gives a wrong thumbnail URL. See:
694 # https://github.com/yt-dlp/yt-dlp/issues/233
695 # https://github.com/ytdl-org/youtube-dl/issues/28023
696 if 'maxresdefault' in thumbnail_url:
697 thumbnail_url = thumbnail_url.split('?')[0]
698 thumbnails.append({
699 'url': thumbnail_url,
700 'height': int_or_none(thumbnail.get('height')),
701 'width': int_or_none(thumbnail.get('width')),
702 })
703 return thumbnails
704
f3aa3c3f 705 @staticmethod
706 def extract_relative_time(relative_time_text):
707 """
708 Extracts a relative time from string and converts to dt object
f0d785d3 709 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
f3aa3c3f 710 """
f0d785d3 711 mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
f3aa3c3f 712 if mobj:
f0d785d3 713 start = mobj.group('start')
714 if start:
715 return datetime_from_str(start)
f3aa3c3f 716 try:
f0d785d3 717 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
f3aa3c3f 718 except ValueError:
719 return None
720
721 def _extract_time_text(self, renderer, *path_list):
722 text = self._get_text(renderer, *path_list) or ''
723 dt = self.extract_relative_time(text)
724 timestamp = None
725 if isinstance(dt, datetime.datetime):
726 timestamp = calendar.timegm(dt.timetuple())
f0d785d3 727
728 if timestamp is None:
729 timestamp = (
730 unified_timestamp(text) or unified_timestamp(
731 self._search_regex(
732 (r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*on)?\s*(.+\d)', r'\w+[\s,\.-]*\w+[\s,\.-]+20\d{2}'), text.lower(), 'time text', default=None)))
733
f3aa3c3f 734 if text and timestamp is None:
735 self.report_warning('Cannot parse localized time text' + bug_reports_message(), only_once=True)
736 return timestamp, text
737
109dd3b2 738 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
739 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 740 default_client='web'):
109dd3b2 741 response = None
742 last_error = None
743 count = -1
744 retries = self.get_param('extractor_retries', 3)
745 if check_get_keys is None:
746 check_get_keys = []
747 while count < retries:
748 count += 1
749 if last_error:
c0ac49bc 750 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
109dd3b2 751 try:
752 response = self._call_api(
753 ep=ep, fatal=True, headers=headers,
754 video_id=item_id, query=query,
755 context=self._extract_context(ytcfg, default_client),
756 api_key=self._extract_api_key(ytcfg, default_client),
757 api_hostname=api_hostname, default_client=default_client,
758 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
759 except ExtractorError as e:
9c0d7f49 760 if isinstance(e.cause, network_exceptions):
641ad5d8 761 if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
762 e.cause.seek(0)
763 yt_error = try_get(
764 self._parse_json(e.cause.read().decode(), item_id, fatal=False),
765 lambda x: x['error']['message'], compat_str)
766 if yt_error:
767 self._report_alerts([('ERROR', yt_error)], fatal=False)
109dd3b2 768 # Downloading page may result in intermittent 5xx HTTP error
769 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
9c0d7f49 770 # We also want to catch all other network exceptions since errors in later pages can be troublesome
771 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
772 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
526d74ec 773 last_error = error_to_compat_str(e.cause or e.msg)
9c0d7f49 774 if count < retries:
775 continue
109dd3b2 776 if fatal:
777 raise
778 else:
779 self.report_warning(error_to_compat_str(e))
780 return
781
782 else:
109dd3b2 783 try:
ac56cf38 784 self._extract_and_report_alerts(response, only_once=True)
109dd3b2 785 except ExtractorError as e:
c0ac49bc 786 # YouTube servers may return errors we want to retry on in a 200 OK response
787 # See: https://github.com/yt-dlp/yt-dlp/issues/839
788 if 'unknown error' in e.msg.lower():
789 last_error = e.msg
790 continue
109dd3b2 791 if fatal:
792 raise
793 self.report_warning(error_to_compat_str(e))
794 return
795 if not check_get_keys or dict_get(response, check_get_keys):
796 break
797 # Youtube sometimes sends incomplete data
798 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
799 last_error = 'Incomplete data received'
800 if count >= retries:
801 if fatal:
802 raise ExtractorError(last_error)
803 else:
804 self.report_warning(last_error)
805 return
806 return response
807
9297939e 808 @staticmethod
809 def is_music_url(url):
810 return re.match(r'https?://music\.youtube\.com/', url) is not None
811
30a074c2 812 def _extract_video(self, renderer):
813 video_id = renderer.get('videoId')
052e1350 814 title = self._get_text(renderer, 'title')
815 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 816 duration = parse_duration(self._get_text(
817 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
f0d785d3 818 view_count = self._get_count(renderer, 'viewCountText')
fe93e2c4 819
052e1350 820 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
f3aa3c3f 821 channel_id = traverse_obj(
822 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False)
823 timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
824 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
825 overlay_style = traverse_obj(
826 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)
827 badges = self._extract_badges(renderer)
a709d873 828 thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
829
30a074c2 830 return {
39ed931e 831 '_type': 'url',
30a074c2 832 'ie_key': YoutubeIE.ie_key(),
833 'id': video_id,
5e3f2f8f 834 'url': f'https://www.youtube.com/watch?v={video_id}',
30a074c2 835 'title': title,
836 'description': description,
837 'duration': duration,
838 'view_count': view_count,
839 'uploader': uploader,
f3aa3c3f 840 'channel_id': channel_id,
a709d873 841 'thumbnails': thumbnails,
f3aa3c3f 842 'upload_date': strftime_or_none(timestamp, '%Y%m%d'),
843 'live_status': ('is_upcoming' if scheduled_timestamp is not None
844 else 'was_live' if 'streamed' in time_text.lower()
845 else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges
846 else None),
847 'release_timestamp': scheduled_timestamp,
848 'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
30a074c2 849 }
850
0c148415 851
360e1ca5 852class YoutubeIE(YoutubeBaseInfoExtractor):
96565c7e 853 IE_DESC = 'YouTube'
cb7dfeea 854 _VALID_URL = r"""(?x)^
c5e8d7af 855 (
edb53e2d 856 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 857 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
858 (?:www\.)?deturl\.com/www\.youtube\.com|
859 (?:www\.)?pwnyoutube\.com|
860 (?:www\.)?hooktube\.com|
861 (?:www\.)?yourepeat\.com|
862 tube\.majestyc\.net|
863 %(invidious)s|
864 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
865 (?:.*?\#/)? # handle anchor (#/) redirect urls
866 (?: # the various things that can precede the ID:
8fc54b12 867 (?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/
c5e8d7af 868 |(?: # or the v= param in all its forms
f7000f3a 869 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 870 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 871 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
872 v=
873 )
f4b05232 874 ))
cbaed4bb
S
875 |(?:
876 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
877 vid\.plus| # or vid.plus/xxxx
878 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 879 %(invidious)s
cbaed4bb 880 )/
edb53e2d 881 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 882 )
c5e8d7af 883 )? # all until now is optional -> you can pass the naked ID
201c1459 884 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 885 (?(1).+)? # if we found the ID, everything can follow
9297939e 886 (?:\#|$)""" % {
d9190e44 887 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
bc2ca1bb 888 }
e40c758c 889 _PLAYER_INFO_RE = (
cc2db878 890 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
891 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 892 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 893 )
2c62dc26 894 _formats = {
c2d3cb4c 895 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
896 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
897 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
898 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
899 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
900 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
901 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
902 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 903 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 904 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
905 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
906 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
907 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
908 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
909 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 910 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 911 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
912 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 913
914
915 # 3D videos
c2d3cb4c 916 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
917 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
918 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
919 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 920 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
921 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
922 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 923
96fb5605 924 # Apple HTTP Live Streaming
11f12195 925 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 926 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
927 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
928 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
929 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
930 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 931 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
932 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
933
934 # DASH mp4 video
d23028a8
S
935 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
936 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
937 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
938 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
939 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 940 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
941 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
942 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
943 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
944 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
945 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
946 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 947
f6f1fc92 948 # Dash mp4 audio
d23028a8
S
949 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
950 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
951 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
952 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
953 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
954 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
955 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
956
957 # Dash webm
d23028a8
S
958 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
959 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
960 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
961 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
962 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
963 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
964 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
965 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
966 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
967 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
968 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
969 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
970 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
971 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
972 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 973 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
974 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
975 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
976 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
977 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
978 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
979 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
980
981 # Dash webm audio
d23028a8
S
982 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
983 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 984
0857baad 985 # Dash webm audio with opus inside
d23028a8
S
986 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
987 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
988 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 989
ce6b9a2d
PH
990 # RTMP (unnamed)
991 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
992
993 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
994 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
995 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
996 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
997 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
998 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
999 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1000 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1001 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 1002 }
29f7c58a 1003 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1004
fd5c4aab
S
1005 _GEO_BYPASS = False
1006
78caa52a 1007 IE_NAME = 'youtube'
2eb88d95
PH
1008 _TESTS = [
1009 {
2d3d2997 1010 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1011 'info_dict': {
1012 'id': 'BaW_jenozKc',
1013 'ext': 'mp4',
3867038a 1014 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1015 'uploader': 'Philipp Hagemeister',
1016 'uploader_id': 'phihag',
ec85ded8 1017 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
ff9f925b 1018 'channel': 'Philipp Hagemeister',
dd4c4492
S
1019 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1020 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1021 'upload_date': '20121002',
ff9f925b 1022 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
4bc3a23e 1023 'categories': ['Science & Technology'],
3867038a 1024 'tags': ['youtube-dl'],
556dbe7f 1025 'duration': 10,
dbdaaa23 1026 'view_count': int,
3e7c1224 1027 'like_count': int,
ff9f925b 1028 'availability': 'public',
1029 'playable_in_embed': True,
1030 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1031 'live_status': 'not_live',
1032 'age_limit': 0,
7c80519c 1033 'start_time': 1,
297a564b 1034 'end_time': 9,
2eb88d95 1035 }
0e853ca4 1036 },
fccd3771 1037 {
4bc3a23e
PH
1038 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1039 'note': 'Embed-only video (#1746)',
1040 'info_dict': {
1041 'id': 'yZIXLfi8CZQ',
1042 'ext': 'mp4',
1043 'upload_date': '20120608',
1044 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1045 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1046 'uploader': 'SET India',
94bfcd23 1047 'uploader_id': 'setindia',
ec85ded8 1048 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1049 'age_limit': 18,
545cc85d 1050 },
1051 'skip': 'Private video',
fccd3771 1052 },
11b56058 1053 {
8bdd16b4 1054 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1055 'note': 'Use the first video ID in the URL',
1056 'info_dict': {
1057 'id': 'BaW_jenozKc',
1058 'ext': 'mp4',
3867038a 1059 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1060 'uploader': 'Philipp Hagemeister',
1061 'uploader_id': 'phihag',
ec85ded8 1062 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
976ae3ea 1063 'channel': 'Philipp Hagemeister',
1064 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1065 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
11b56058 1066 'upload_date': '20121002',
976ae3ea 1067 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
11b56058 1068 'categories': ['Science & Technology'],
3867038a 1069 'tags': ['youtube-dl'],
556dbe7f 1070 'duration': 10,
dbdaaa23 1071 'view_count': int,
11b56058 1072 'like_count': int,
976ae3ea 1073 'availability': 'public',
1074 'playable_in_embed': True,
1075 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1076 'live_status': 'not_live',
1077 'age_limit': 0,
34a7de29
S
1078 },
1079 'params': {
1080 'skip_download': True,
1081 },
11b56058 1082 },
dd27fd17 1083 {
2d3d2997 1084 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1085 'note': '256k DASH audio (format 141) via DASH manifest',
1086 'info_dict': {
1087 'id': 'a9LDPn-MO4I',
1088 'ext': 'm4a',
1089 'upload_date': '20121002',
1090 'uploader_id': '8KVIDEO',
ec85ded8 1091 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1092 'description': '',
1093 'uploader': '8KVIDEO',
1094 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1095 },
4bc3a23e
PH
1096 'params': {
1097 'youtube_include_dash_manifest': True,
1098 'format': '141',
4919603f 1099 },
de3c7fe0 1100 'skip': 'format 141 not served anymore',
dd27fd17 1101 },
8bdd16b4 1102 # DASH manifest with encrypted signature
1103 {
1104 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1105 'info_dict': {
1106 'id': 'IB3lcPjvWLA',
1107 'ext': 'm4a',
1108 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1109 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1110 'duration': 244,
1111 'uploader': 'AfrojackVEVO',
1112 'uploader_id': 'AfrojackVEVO',
1113 'upload_date': '20131011',
cc2db878 1114 'abr': 129.495,
976ae3ea 1115 'like_count': int,
1116 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1117 'playable_in_embed': True,
1118 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1119 'view_count': int,
1120 'track': 'The Spark',
1121 'live_status': 'not_live',
1122 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1123 'channel': 'Afrojack',
1124 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
1125 'tags': 'count:19',
1126 'availability': 'public',
1127 'categories': ['Music'],
1128 'age_limit': 0,
1129 'alt_title': 'The Spark',
8bdd16b4 1130 },
1131 'params': {
1132 'youtube_include_dash_manifest': True,
1133 'format': '141/bestaudio[ext=m4a]',
1134 },
1135 },
65c2fde2 1136 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1137 {
65c2fde2 1138 'note': 'Embed allowed age-gate video',
2d3d2997 1139 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1140 'info_dict': {
1141 'id': 'HtVdAasjOgU',
1142 'ext': 'mp4',
1143 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1144 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1145 'duration': 142,
c522adb1
JMF
1146 'uploader': 'The Witcher',
1147 'uploader_id': 'WitcherGame',
ec85ded8 1148 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1149 'upload_date': '20140605',
34952f09 1150 'age_limit': 18,
976ae3ea 1151 'categories': ['Gaming'],
1152 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1153 'availability': 'needs_auth',
1154 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1155 'like_count': int,
1156 'channel': 'The Witcher',
1157 'live_status': 'not_live',
1158 'tags': 'count:17',
1159 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1160 'playable_in_embed': True,
1161 'view_count': int,
c522adb1
JMF
1162 },
1163 },
65c2fde2 1164 {
1165 'note': 'Age-gate video with embed allowed in public site',
1166 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1167 'info_dict': {
1168 'id': 'HsUATh_Nc2U',
1169 'ext': 'mp4',
1170 'title': 'Godzilla 2 (Official Video)',
1171 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1172 'upload_date': '20200408',
1173 'uploader_id': 'FlyingKitty900',
1174 'uploader': 'FlyingKitty',
1175 'age_limit': 18,
976ae3ea 1176 'availability': 'needs_auth',
1177 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1178 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
1179 'channel': 'FlyingKitty',
1180 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1181 'view_count': int,
1182 'categories': ['Entertainment'],
1183 'live_status': 'not_live',
1184 'tags': ['Flyingkitty', 'godzilla 2'],
1185 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1186 'like_count': int,
1187 'duration': 177,
1188 'playable_in_embed': True,
65c2fde2 1189 },
1190 },
1191 {
1192 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1193 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1194 'info_dict': {
1195 'id': 'Tq92D6wQ1mg',
1196 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1197 'ext': 'mp4',
1198 'upload_date': '20191227',
65c2fde2 1199 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1200 'uploader': 'Projekt Melody',
1201 'description': 'md5:17eccca93a786d51bc67646756894066',
1202 'age_limit': 18,
976ae3ea 1203 'like_count': int,
1204 'availability': 'needs_auth',
1205 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1206 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1207 'view_count': int,
1208 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1209 'channel': 'Projekt Melody',
1210 'live_status': 'not_live',
1211 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1212 'playable_in_embed': True,
1213 'categories': ['Entertainment'],
1214 'duration': 106,
1215 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
65c2fde2 1216 },
1217 },
1218 {
1219 'note': 'Non-Agegated non-embeddable video',
1220 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1221 'info_dict': {
1222 'id': 'MeJVWBSsPAY',
1223 'ext': 'mp4',
1224 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1225 'uploader': 'Herr Lurik',
1226 'uploader_id': 'st3in234',
1227 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1228 'upload_date': '20130730',
976ae3ea 1229 'track': 'Such mich find mich',
1230 'age_limit': 0,
1231 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1232 'like_count': int,
1233 'playable_in_embed': False,
1234 'creator': 'OOMPH!',
1235 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1236 'view_count': int,
1237 'alt_title': 'Such mich find mich',
1238 'duration': 210,
1239 'channel': 'Herr Lurik',
1240 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1241 'categories': ['Music'],
1242 'availability': 'public',
1243 'uploader_url': 'http://www.youtube.com/user/st3in234',
1244 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1245 'live_status': 'not_live',
1246 'artist': 'OOMPH!',
65c2fde2 1247 },
1248 },
1249 {
1250 'note': 'Non-bypassable age-gated video',
1251 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1252 'only_matching': True,
1253 },
8bdd16b4 1254 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1255 # YouTube Red ad is not captured for creator
1256 {
1257 'url': '__2ABJjxzNo',
1258 'info_dict': {
1259 'id': '__2ABJjxzNo',
1260 'ext': 'mp4',
1261 'duration': 266,
1262 'upload_date': '20100430',
1263 'uploader_id': 'deadmau5',
1264 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1265 'creator': 'deadmau5',
1266 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1267 'uploader': 'deadmau5',
1268 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1269 'alt_title': 'Some Chords',
976ae3ea 1270 'availability': 'public',
1271 'tags': 'count:14',
1272 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1273 'view_count': int,
1274 'live_status': 'not_live',
1275 'channel': 'deadmau5',
1276 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1277 'like_count': int,
1278 'track': 'Some Chords',
1279 'artist': 'deadmau5',
1280 'playable_in_embed': True,
1281 'age_limit': 0,
1282 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1283 'categories': ['Music'],
1284 'album': 'Some Chords',
8bdd16b4 1285 },
1286 'expected_warnings': [
1287 'DASH manifest missing',
1288 ]
1289 },
067aa17e 1290 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1291 {
1292 'url': 'lqQg6PlCWgI',
1293 'info_dict': {
1294 'id': 'lqQg6PlCWgI',
1295 'ext': 'mp4',
556dbe7f 1296 'duration': 6085,
90227264 1297 'upload_date': '20150827',
cbe2bd91 1298 'uploader_id': 'olympic',
ec85ded8 1299 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1300 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
11f9be09 1301 'uploader': 'Olympics',
cbe2bd91 1302 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
976ae3ea 1303 'like_count': int,
1304 'release_timestamp': 1343767800,
1305 'playable_in_embed': True,
1306 'categories': ['Sports'],
1307 'release_date': '20120731',
1308 'channel': 'Olympics',
1309 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1310 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1311 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1312 'age_limit': 0,
1313 'availability': 'public',
1314 'live_status': 'was_live',
1315 'view_count': int,
1316 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
cbe2bd91
PH
1317 },
1318 'params': {
1319 'skip_download': 'requires avconv',
e52a40ab 1320 }
cbe2bd91 1321 },
6271f1ca
PH
1322 # Non-square pixels
1323 {
1324 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1325 'info_dict': {
1326 'id': '_b-2C3KPAM0',
1327 'ext': 'mp4',
1328 'stretched_ratio': 16 / 9.,
556dbe7f 1329 'duration': 85,
6271f1ca
PH
1330 'upload_date': '20110310',
1331 'uploader_id': 'AllenMeow',
ec85ded8 1332 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1333 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1334 'uploader': '孫ᄋᄅ',
6271f1ca 1335 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
976ae3ea 1336 'playable_in_embed': True,
1337 'channel': '孫ᄋᄅ',
1338 'age_limit': 0,
1339 'tags': 'count:11',
1340 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1341 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1342 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1343 'view_count': int,
1344 'categories': ['People & Blogs'],
1345 'like_count': int,
1346 'live_status': 'not_live',
1347 'availability': 'unlisted',
6271f1ca 1348 },
06b491eb
S
1349 },
1350 # url_encoded_fmt_stream_map is empty string
1351 {
1352 'url': 'qEJwOuvDf7I',
1353 'info_dict': {
1354 'id': 'qEJwOuvDf7I',
f57b7835 1355 'ext': 'webm',
06b491eb
S
1356 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1357 'description': '',
1358 'upload_date': '20150404',
1359 'uploader_id': 'spbelect',
1360 'uploader': 'Наблюдатели Петербурга',
1361 },
1362 'params': {
1363 'skip_download': 'requires avconv',
e323cf3f
S
1364 },
1365 'skip': 'This live event has ended.',
06b491eb 1366 },
067aa17e 1367 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1368 {
1369 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1370 'info_dict': {
1371 'id': 'FIl7x6_3R5Y',
eb6793ba 1372 'ext': 'webm',
da77d856
S
1373 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1374 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1375 'duration': 220,
da77d856
S
1376 'upload_date': '20150625',
1377 'uploader_id': 'dorappi2000',
ec85ded8 1378 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1379 'uploader': 'dorappi2000',
eb6793ba 1380 'formats': 'mincount:31',
da77d856 1381 },
eb6793ba 1382 'skip': 'not actual anymore',
2ee8f5d8 1383 },
8a1a26ce
YCH
1384 # DASH manifest with segment_list
1385 {
1386 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1387 'md5': '8ce563a1d667b599d21064e982ab9e31',
1388 'info_dict': {
1389 'id': 'CsmdDsKjzN8',
1390 'ext': 'mp4',
17ee98e1 1391 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1392 'uploader': 'Airtek',
1393 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1394 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1395 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1396 },
1397 'params': {
1398 'youtube_include_dash_manifest': True,
1399 'format': '135', # bestvideo
be49068d
S
1400 },
1401 'skip': 'This live event has ended.',
2ee8f5d8 1402 },
cf7e015f
S
1403 {
1404 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1405 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1406 'info_dict': {
545cc85d 1407 'id': 'jvGDaLqkpTg',
1408 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1409 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1410 },
1411 'playlist': [{
1412 'info_dict': {
545cc85d 1413 'id': 'jvGDaLqkpTg',
cf7e015f 1414 'ext': 'mp4',
545cc85d 1415 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1416 'description': 'md5:e03b909557865076822aa169218d6a5d',
1417 'duration': 10643,
1418 'upload_date': '20161111',
1419 'uploader': 'Team PGP',
1420 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1421 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1422 },
1423 }, {
1424 'info_dict': {
545cc85d 1425 'id': '3AKt1R1aDnw',
cf7e015f 1426 'ext': 'mp4',
545cc85d 1427 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1428 'description': 'md5:e03b909557865076822aa169218d6a5d',
1429 'duration': 10991,
1430 'upload_date': '20161111',
1431 'uploader': 'Team PGP',
1432 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1433 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1434 },
1435 }, {
1436 'info_dict': {
545cc85d 1437 'id': 'RtAMM00gpVc',
cf7e015f 1438 'ext': 'mp4',
545cc85d 1439 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1440 'description': 'md5:e03b909557865076822aa169218d6a5d',
1441 'duration': 10995,
1442 'upload_date': '20161111',
1443 'uploader': 'Team PGP',
1444 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1445 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1446 },
1447 }, {
1448 'info_dict': {
545cc85d 1449 'id': '6N2fdlP3C5U',
cf7e015f 1450 'ext': 'mp4',
545cc85d 1451 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1452 'description': 'md5:e03b909557865076822aa169218d6a5d',
1453 'duration': 10990,
1454 'upload_date': '20161111',
1455 'uploader': 'Team PGP',
1456 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1457 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1458 },
1459 }],
1460 'params': {
1461 'skip_download': True,
1462 },
65c2fde2 1463 'skip': 'Not multifeed anymore',
cbaed4bb 1464 },
f9f49d87 1465 {
067aa17e 1466 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1467 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1468 'info_dict': {
1469 'id': 'gVfLd0zydlo',
1470 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1471 },
1472 'playlist_count': 2,
be49068d 1473 'skip': 'Not multifeed anymore',
f9f49d87 1474 },
cbaed4bb 1475 {
2d3d2997 1476 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1477 'only_matching': True,
0e49d9a6 1478 },
6d4fc66b 1479 {
2d3d2997 1480 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1481 'only_matching': True,
1482 },
0e49d9a6 1483 {
067aa17e 1484 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1485 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1486 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1487 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1488 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1489 'info_dict': {
1490 'id': 'lsguqyKfVQg',
1491 'ext': 'mp4',
1492 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1493 'alt_title': 'Dark Walk',
0e49d9a6 1494 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1495 'duration': 133,
0e49d9a6
LL
1496 'upload_date': '20151119',
1497 'uploader_id': 'IronSoulElf',
ec85ded8 1498 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1499 'uploader': 'IronSoulElf',
11f9be09 1500 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1501 'track': 'Dark Walk',
1502 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1503 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
976ae3ea 1504 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1505 'categories': ['Film & Animation'],
1506 'view_count': int,
1507 'live_status': 'not_live',
1508 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1509 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1510 'tags': 'count:13',
1511 'availability': 'public',
1512 'channel': 'IronSoulElf',
1513 'playable_in_embed': True,
1514 'like_count': int,
1515 'age_limit': 0,
0e49d9a6
LL
1516 },
1517 'params': {
1518 'skip_download': True,
1519 },
1520 },
61f92af1 1521 {
067aa17e 1522 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1523 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1524 'only_matching': True,
1525 },
313dfc45
LL
1526 {
1527 # Video with yt:stretch=17:0
1528 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1529 'info_dict': {
1530 'id': 'Q39EVAstoRM',
1531 'ext': 'mp4',
1532 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1533 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1534 'upload_date': '20151107',
1535 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1536 'uploader': 'CH GAMER DROID',
1537 },
1538 'params': {
1539 'skip_download': True,
1540 },
be49068d 1541 'skip': 'This video does not exist.',
313dfc45 1542 },
201c1459 1543 {
1544 # Video with incomplete 'yt:stretch=16:'
1545 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1546 'only_matching': True,
1547 },
7caf9830
S
1548 {
1549 # Video licensed under Creative Commons
1550 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1551 'info_dict': {
1552 'id': 'M4gD1WSo5mA',
1553 'ext': 'mp4',
1554 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1555 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1556 'duration': 721,
7caf9830
S
1557 'upload_date': '20150127',
1558 'uploader_id': 'BerkmanCenter',
ec85ded8 1559 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1560 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830 1561 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1562 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1563 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1564 'like_count': int,
1565 'age_limit': 0,
1566 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1567 'channel': 'The Berkman Klein Center for Internet & Society',
1568 'availability': 'public',
1569 'view_count': int,
1570 'categories': ['Education'],
1571 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1572 'live_status': 'not_live',
1573 'playable_in_embed': True,
7caf9830
S
1574 },
1575 'params': {
1576 'skip_download': True,
1577 },
1578 },
fd050249
S
1579 {
1580 # Channel-like uploader_url
1581 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1582 'info_dict': {
1583 'id': 'eQcmzGIKrzg',
1584 'ext': 'mp4',
1585 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1586 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1587 'duration': 4060,
fd050249 1588 'upload_date': '20151119',
eb6793ba 1589 'uploader': 'Bernie Sanders',
fd050249 1590 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1591 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249 1592 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1593 'playable_in_embed': True,
1594 'tags': 'count:12',
1595 'like_count': int,
1596 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1597 'age_limit': 0,
1598 'availability': 'public',
1599 'categories': ['News & Politics'],
1600 'channel': 'Bernie Sanders',
1601 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1602 'view_count': int,
1603 'live_status': 'not_live',
1604 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1605 },
1606 'params': {
1607 'skip_download': True,
1608 },
1609 },
040ac686
S
1610 {
1611 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1612 'only_matching': True,
7f29cf54
S
1613 },
1614 {
067aa17e 1615 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1616 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1617 'only_matching': True,
6496ccb4
S
1618 },
1619 {
1620 # Rental video preview
1621 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1622 'info_dict': {
1623 'id': 'uGpuVWrhIzE',
1624 'ext': 'mp4',
1625 'title': 'Piku - Trailer',
1626 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1627 'upload_date': '20150811',
1628 'uploader': 'FlixMatrix',
1629 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1630 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1631 'license': 'Standard YouTube License',
1632 },
1633 'params': {
1634 'skip_download': True,
1635 },
eb6793ba 1636 'skip': 'This video is not available.',
022a5d66 1637 },
12afdc2a
S
1638 {
1639 # YouTube Red video with episode data
1640 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1641 'info_dict': {
1642 'id': 'iqKdEhx-dD4',
1643 'ext': 'mp4',
1644 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1645 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1646 'duration': 2085,
12afdc2a
S
1647 'upload_date': '20170118',
1648 'uploader': 'Vsauce',
1649 'uploader_id': 'Vsauce',
1650 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1651 'series': 'Mind Field',
1652 'season_number': 1,
1653 'episode_number': 1,
976ae3ea 1654 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1655 'tags': 'count:12',
1656 'view_count': int,
1657 'availability': 'public',
1658 'age_limit': 0,
1659 'channel': 'Vsauce',
1660 'episode': 'Episode 1',
1661 'categories': ['Entertainment'],
1662 'season': 'Season 1',
1663 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1664 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1665 'like_count': int,
1666 'playable_in_embed': True,
1667 'live_status': 'not_live',
12afdc2a
S
1668 },
1669 'params': {
1670 'skip_download': True,
1671 },
1672 'expected_warnings': [
1673 'Skipping DASH manifest',
1674 ],
1675 },
c7121fa7
S
1676 {
1677 # The following content has been identified by the YouTube community
1678 # as inappropriate or offensive to some audiences.
1679 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1680 'info_dict': {
1681 'id': '6SJNVb0GnPI',
1682 'ext': 'mp4',
1683 'title': 'Race Differences in Intelligence',
1684 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1685 'duration': 965,
1686 'upload_date': '20140124',
1687 'uploader': 'New Century Foundation',
1688 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1689 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1690 },
1691 'params': {
1692 'skip_download': True,
1693 },
545cc85d 1694 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1695 },
022a5d66
S
1696 {
1697 # itag 212
1698 'url': '1t24XAntNCY',
1699 'only_matching': True,
fd5c4aab
S
1700 },
1701 {
1702 # geo restricted to JP
1703 'url': 'sJL6WA-aGkQ',
1704 'only_matching': True,
1705 },
cd5a74a2
S
1706 {
1707 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1708 'only_matching': True,
1709 },
bc2ca1bb 1710 {
1711 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1712 'only_matching': True,
1713 },
1714 {
1715 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1716 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1717 'only_matching': True,
1718 },
825cd268
RA
1719 {
1720 # DRM protected
1721 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1722 'only_matching': True,
4fe54c12
S
1723 },
1724 {
1725 # Video with unsupported adaptive stream type formats
1726 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1727 'info_dict': {
1728 'id': 'Z4Vy8R84T1U',
1729 'ext': 'mp4',
1730 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1731 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1732 'duration': 433,
1733 'upload_date': '20130923',
1734 'uploader': 'Amelia Putri Harwita',
1735 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1736 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1737 'formats': 'maxcount:10',
1738 },
1739 'params': {
1740 'skip_download': True,
1741 'youtube_include_dash_manifest': False,
1742 },
5429d6a9 1743 'skip': 'not actual anymore',
5caabd3c 1744 },
1745 {
822b9d9c 1746 # Youtube Music Auto-generated description
5caabd3c 1747 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1748 'info_dict': {
1749 'id': 'MgNrAu2pzNs',
1750 'ext': 'mp4',
1751 'title': 'Voyeur Girl',
1752 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1753 'upload_date': '20190312',
5429d6a9
S
1754 'uploader': 'Stephen - Topic',
1755 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1756 'artist': 'Stephen',
1757 'track': 'Voyeur Girl',
1758 'album': 'it\'s too much love to know my dear',
1759 'release_date': '20190313',
1760 'release_year': 2019,
976ae3ea 1761 'alt_title': 'Voyeur Girl',
1762 'view_count': int,
1763 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1764 'playable_in_embed': True,
1765 'like_count': int,
1766 'categories': ['Music'],
1767 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1768 'channel': 'Stephen',
1769 'availability': 'public',
1770 'creator': 'Stephen',
1771 'duration': 169,
1772 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
1773 'age_limit': 0,
1774 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1775 'tags': 'count:11',
1776 'live_status': 'not_live',
5caabd3c 1777 },
1778 'params': {
1779 'skip_download': True,
1780 },
1781 },
66b48727
RA
1782 {
1783 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1784 'only_matching': True,
1785 },
011e75e6
S
1786 {
1787 # invalid -> valid video id redirection
1788 'url': 'DJztXj2GPfl',
1789 'info_dict': {
1790 'id': 'DJztXj2GPfk',
1791 'ext': 'mp4',
1792 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1793 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1794 'upload_date': '20090125',
1795 'uploader': 'Prochorowka',
1796 'uploader_id': 'Prochorowka',
1797 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1798 'artist': 'Panjabi MC',
1799 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1800 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1801 },
1802 'params': {
1803 'skip_download': True,
1804 },
545cc85d 1805 'skip': 'Video unavailable',
ea74e00b
DP
1806 },
1807 {
1808 # empty description results in an empty string
1809 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1810 'info_dict': {
1811 'id': 'x41yOUIvK2k',
1812 'ext': 'mp4',
1813 'title': 'IMG 3456',
1814 'description': '',
1815 'upload_date': '20170613',
1816 'uploader_id': 'ElevageOrVert',
1817 'uploader': 'ElevageOrVert',
976ae3ea 1818 'view_count': int,
1819 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
1820 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
1821 'like_count': int,
1822 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
1823 'tags': [],
1824 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
1825 'availability': 'public',
1826 'age_limit': 0,
1827 'categories': ['Pets & Animals'],
1828 'duration': 7,
1829 'playable_in_embed': True,
1830 'live_status': 'not_live',
1831 'channel': 'ElevageOrVert',
ea74e00b
DP
1832 },
1833 'params': {
1834 'skip_download': True,
1835 },
1836 },
a0566bbf 1837 {
29f7c58a 1838 # with '};' inside yt initial data (see [1])
1839 # see [2] for an example with '};' inside ytInitialPlayerResponse
1840 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1841 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1842 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1843 'info_dict': {
1844 'id': 'CHqg6qOn4no',
1845 'ext': 'mp4',
1846 'title': 'Part 77 Sort a list of simple types in c#',
1847 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1848 'upload_date': '20130831',
1849 'uploader_id': 'kudvenkat',
1850 'uploader': 'kudvenkat',
976ae3ea 1851 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
1852 'like_count': int,
1853 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
1854 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
1855 'live_status': 'not_live',
1856 'categories': ['Education'],
1857 'availability': 'public',
1858 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
1859 'tags': 'count:12',
1860 'playable_in_embed': True,
1861 'age_limit': 0,
1862 'view_count': int,
1863 'duration': 522,
1864 'channel': 'kudvenkat',
a0566bbf 1865 },
1866 'params': {
1867 'skip_download': True,
1868 },
1869 },
29f7c58a 1870 {
1871 # another example of '};' in ytInitialData
1872 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1873 'only_matching': True,
1874 },
1875 {
1876 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1877 'only_matching': True,
1878 },
545cc85d 1879 {
cc2db878 1880 # https://github.com/ytdl-org/youtube-dl/pull/28094
1881 'url': 'OtqTfy26tG0',
1882 'info_dict': {
1883 'id': 'OtqTfy26tG0',
1884 'ext': 'mp4',
1885 'title': 'Burn Out',
1886 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1887 'upload_date': '20141120',
1888 'uploader': 'The Cinematic Orchestra - Topic',
1889 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1890 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1891 'artist': 'The Cinematic Orchestra',
1892 'track': 'Burn Out',
1893 'album': 'Every Day',
976ae3ea 1894 'like_count': int,
1895 'live_status': 'not_live',
1896 'alt_title': 'Burn Out',
1897 'duration': 614,
1898 'age_limit': 0,
1899 'view_count': int,
1900 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1901 'creator': 'The Cinematic Orchestra',
1902 'channel': 'The Cinematic Orchestra',
1903 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
1904 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1905 'availability': 'public',
1906 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
1907 'categories': ['Music'],
1908 'playable_in_embed': True,
cc2db878 1909 },
1910 'params': {
1911 'skip_download': True,
1912 },
545cc85d 1913 },
bc2ca1bb 1914 {
1915 # controversial video, only works with bpctr when authenticated with cookies
1916 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1917 'only_matching': True,
1918 },
a1a7907b 1919 {
1920 # controversial video, requires bpctr/contentCheckOk
1921 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1922 'info_dict': {
1923 'id': 'SZJvDhaSDnc',
1924 'ext': 'mp4',
1925 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1926 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
976ae3ea 1927 'uploader': 'CBS Mornings',
11f9be09 1928 'uploader_id': 'CBSThisMorning',
a1a7907b 1929 'upload_date': '20140716',
976ae3ea 1930 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
1931 'duration': 170,
1932 'categories': ['News & Politics'],
1933 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
1934 'view_count': int,
1935 'channel': 'CBS Mornings',
1936 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
1937 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
1938 'age_limit': 18,
1939 'availability': 'needs_auth',
1940 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
1941 'like_count': int,
1942 'live_status': 'not_live',
1943 'playable_in_embed': True,
a1a7907b 1944 }
1945 },
f7ad7160 1946 {
1947 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1948 'url': 'cBvYw8_A0vQ',
1949 'info_dict': {
1950 'id': 'cBvYw8_A0vQ',
1951 'ext': 'mp4',
1952 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1953 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1954 'upload_date': '20201120',
1955 'uploader': 'Walk around Japan',
1956 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1957 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
976ae3ea 1958 'duration': 1456,
1959 'categories': ['Travel & Events'],
1960 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1961 'view_count': int,
1962 'channel': 'Walk around Japan',
1963 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
1964 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
1965 'age_limit': 0,
1966 'availability': 'public',
1967 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1968 'live_status': 'not_live',
1969 'playable_in_embed': True,
f7ad7160 1970 },
1971 'params': {
1972 'skip_download': True,
1973 },
0fb983f6 1974 }, {
1975 # Has multiple audio streams
1976 'url': 'WaOKSUlf4TM',
1977 'only_matching': True
9297939e 1978 }, {
1979 # Requires Premium: has format 141 when requested using YTM url
1980 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1981 'only_matching': True
1982 }, {
120916da 1983 # multiple subtitles with same lang_code
1984 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1985 'only_matching': True,
109dd3b2 1986 }, {
1987 # Force use android client fallback
1988 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1989 'info_dict': {
1990 'id': 'YOelRv7fMxY',
11f9be09 1991 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 1992 'ext': '3gp',
1993 'upload_date': '20210624',
1994 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1995 'uploader': 'colinfurze',
11f9be09 1996 'uploader_id': 'colinfurze',
109dd3b2 1997 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
976ae3ea 1998 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
1999 'duration': 596,
2000 'categories': ['Entertainment'],
2001 'uploader_url': 'http://www.youtube.com/user/colinfurze',
2002 'view_count': int,
2003 'channel': 'colinfurze',
2004 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2005 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2006 'age_limit': 0,
2007 'availability': 'public',
2008 'like_count': int,
2009 'live_status': 'not_live',
2010 'playable_in_embed': True,
109dd3b2 2011 },
2012 'params': {
2013 'format': '17', # 3gp format available on android
2014 'extractor_args': {'youtube': {'player_client': ['android']}},
2015 },
120916da 2016 },
109dd3b2 2017 {
2018 # Skip download of additional client configs (remix client config in this case)
2019 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2020 'only_matching': True,
2021 'params': {
2022 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2023 },
8fc54b12 2024 }, {
2025 # shorts
2026 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2027 'only_matching': True,
9222c381 2028 }, {
2029 'note': 'Storyboards',
2030 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2031 'info_dict': {
2032 'id': '5KLPxDtMqe8',
2033 'ext': 'mhtml',
2034 'format_id': 'sb0',
2035 'title': 'Your Brain is Plastic',
2036 'uploader_id': 'scishow',
2037 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2038 'upload_date': '20140324',
2039 'uploader': 'SciShow',
976ae3ea 2040 'like_count': int,
2041 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2042 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2043 'view_count': int,
2044 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2045 'playable_in_embed': True,
2046 'tags': 'count:12',
2047 'uploader_url': 'http://www.youtube.com/user/scishow',
2048 'availability': 'public',
2049 'channel': 'SciShow',
2050 'live_status': 'not_live',
2051 'duration': 248,
2052 'categories': ['Education'],
2053 'age_limit': 0,
9222c381 2054 }, 'params': {'format': 'mhtml', 'skip_download': True}
2055 }
2eb88d95
PH
2056 ]
2057
201c1459 2058 @classmethod
2059 def suitable(cls, url):
4dfbf869 2060 from ..utils import parse_qs
2061
201c1459 2062 qs = parse_qs(url)
2063 if qs.get('list', [None])[0]:
2064 return False
2065 return super(YoutubeIE, cls).suitable(url)
2066
e0df6211
PH
2067 def __init__(self, *args, **kwargs):
2068 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 2069 self._code_cache = {}
83799698 2070 self._player_cache = {}
e0df6211 2071
adbc4ec4 2072 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
adbc4ec4
THD
2073 lock = threading.Lock()
2074
2075 is_live = True
185bf310 2076 start_time = time.time()
adbc4ec4
THD
2077 formats = [f for f in formats if f.get('is_from_start')]
2078
185bf310 2079 def refetch_manifest(format_id, delay):
2080 nonlocal formats, start_time, is_live
2081 if time.time() <= start_time + delay:
adbc4ec4
THD
2082 return
2083
2084 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2085 video_details = traverse_obj(
2086 prs, (..., 'videoDetails'), expected_type=dict, default=[])
2087 microformats = traverse_obj(
2088 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2089 expected_type=dict, default=[])
2090 _, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)
185bf310 2091 start_time = time.time()
adbc4ec4 2092
185bf310 2093 def mpd_feed(format_id, delay):
adbc4ec4
THD
2094 """
2095 @returns (manifest_url, manifest_stream_number, is_live) or None
2096 """
2097 with lock:
185bf310 2098 refetch_manifest(format_id, delay)
adbc4ec4
THD
2099
2100 f = next((f for f in formats if f['format_id'] == format_id), None)
2101 if not f:
185bf310 2102 if not is_live:
2103 self.to_screen(f'{video_id}: Video is no longer live')
2104 else:
2105 self.report_warning(
2106 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
adbc4ec4
THD
2107 return None
2108 return f['manifest_url'], f['manifest_stream_number'], is_live
2109
2110 for f in formats:
2111 f['protocol'] = 'http_dash_segments_generator'
2112 f['fragments'] = functools.partial(
2113 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
2114
2115 def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
2116 FETCH_SPAN, MAX_DURATION = 5, 432000
2117
2118 mpd_url, stream_number, is_live = None, None, True
2119
2120 begin_index = 0
2121 download_start_time = ctx.get('start') or time.time()
2122
2123 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2124 if lack_early_segments:
2125 self.report_warning(bug_reports_message(
2126 'Starting download from the last 120 hours of the live stream since '
2127 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2128 lack_early_segments = True
2129
2130 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2131 fragments, fragment_base_url = None, None
2132
2133 def _extract_sequence_from_mpd(refresh_sequence):
2134 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2135 # Obtain from MPD's maximum seq value
2136 old_mpd_url = mpd_url
185bf310 2137 last_error = ctx.pop('last_error', None)
2138 expire_fast = last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403
2139 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2140 or (mpd_url, stream_number, False))
2141 if not refresh_sequence:
2142 if expire_fast and not is_live:
2143 return False, last_seq
2144 elif old_mpd_url == mpd_url:
2145 return True, last_seq
adbc4ec4
THD
2146 try:
2147 fmts, _ = self._extract_mpd_formats_and_subtitles(
2148 mpd_url, None, note=False, errnote=False, fatal=False)
2149 except ExtractorError:
2150 fmts = None
2151 if not fmts:
2152 no_fragment_score += 1
2153 return False, last_seq
2154 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2155 fragments = fmt_info['fragments']
2156 fragment_base_url = fmt_info['fragment_base_url']
2157 assert fragment_base_url
2158
2159 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2160 return True, _last_seq
2161
2162 while is_live:
2163 fetch_time = time.time()
2164 if no_fragment_score > 30:
2165 return
2166 if last_segment_url:
2167 # Obtain from "X-Head-Seqnum" header value from each segment
2168 try:
2169 urlh = self._request_webpage(
2170 last_segment_url, None, note=False, errnote=False, fatal=False)
2171 except ExtractorError:
2172 urlh = None
2173 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2174 if last_seq is None:
2175 no_fragment_score += 1
2176 last_segment_url = None
2177 continue
2178 else:
185bf310 2179 should_continue, last_seq = _extract_sequence_from_mpd(True)
2180 if not should_continue:
adbc4ec4
THD
2181 continue
2182
2183 if known_idx > last_seq:
2184 last_segment_url = None
2185 continue
2186
2187 last_seq += 1
2188
2189 if begin_index < 0 and known_idx < 0:
2190 # skip from the start when it's negative value
2191 known_idx = last_seq + begin_index
2192 if lack_early_segments:
2193 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2194 try:
2195 for idx in range(known_idx, last_seq):
2196 # do not update sequence here or you'll get skipped some part of it
185bf310 2197 should_continue, _ = _extract_sequence_from_mpd(False)
2198 if not should_continue:
adbc4ec4
THD
2199 known_idx = idx - 1
2200 raise ExtractorError('breaking out of outer loop')
2201 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2202 yield {
2203 'url': last_segment_url,
2204 }
2205 if known_idx == last_seq:
2206 no_fragment_score += 5
2207 else:
2208 no_fragment_score = 0
2209 known_idx = last_seq
2210 except ExtractorError:
2211 continue
2212
2213 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2214
b6de707d 2215 def _extract_player_url(self, *ytcfgs, webpage=None):
2216 player_url = traverse_obj(
2217 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
2218 get_all=False, expected_type=compat_str)
11f9be09 2219 if not player_url:
b6de707d 2220 return
109dd3b2 2221 if player_url.startswith('//'):
2222 player_url = 'https:' + player_url
2223 elif not re.match(r'https?://', player_url):
2224 player_url = compat_urlparse.urljoin(
2225 'https://www.youtube.com', player_url)
2226 return player_url
2227
b6de707d 2228 def _download_player_url(self, video_id, fatal=False):
2229 res = self._download_webpage(
2230 'https://www.youtube.com/iframe_api',
2231 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2232 if res:
2233 player_version = self._search_regex(
2234 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2235 if player_version:
2236 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2237
60064c53
PH
2238 def _signature_cache_id(self, example_sig):
2239 """ Return a string representation of a signature """
78caa52a 2240 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 2241
e40c758c
S
2242 @classmethod
2243 def _extract_player_info(cls, player_url):
2244 for player_re in cls._PLAYER_INFO_RE:
2245 id_m = re.search(player_re, player_url)
2246 if id_m:
2247 break
2248 else:
c081b35c 2249 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 2250 return id_m.group('id')
e40c758c 2251
404f611f 2252 def _load_player(self, video_id, player_url, fatal=True):
109dd3b2 2253 player_id = self._extract_player_info(player_url)
2254 if player_id not in self._code_cache:
1276a43a 2255 code = self._download_webpage(
109dd3b2 2256 player_url, video_id, fatal=fatal,
2257 note='Downloading player ' + player_id,
2258 errnote='Download of %s failed' % player_url)
1276a43a 2259 if code:
2260 self._code_cache[player_id] = code
404f611f 2261 return self._code_cache.get(player_id)
109dd3b2 2262
e40c758c 2263 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 2264 player_id = self._extract_player_info(player_url)
e0df6211 2265
c4417ddb 2266 # Read from filesystem cache
545cc85d 2267 func_id = 'js_%s_%s' % (
2268 player_id, self._signature_cache_id(example_sig))
c4417ddb 2269 assert os.path.basename(func_id) == func_id
a0e07d31 2270
69ea8ca4 2271 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 2272 if cache_spec is not None:
78caa52a 2273 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 2274
404f611f 2275 code = self._load_player(video_id, player_url)
2276 if code:
109dd3b2 2277 res = self._parse_sig_js(code)
e0df6211 2278
109dd3b2 2279 test_string = ''.join(map(compat_chr, range(len(example_sig))))
2280 cache_res = res(test_string)
2281 cache_spec = [ord(c) for c in cache_res]
83799698 2282
109dd3b2 2283 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
2284 return res
83799698 2285
60064c53 2286 def _print_sig_code(self, func, example_sig):
404f611f 2287 if not self.get_param('youtube_print_sig_code'):
2288 return
2289
edf3e38e
PH
2290 def gen_sig_code(idxs):
2291 def _genslice(start, end, step):
78caa52a 2292 starts = '' if start == 0 else str(start)
8bcc8756 2293 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 2294 steps = '' if step == 1 else (':%d' % step)
78caa52a 2295 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
2296
2297 step = None
7af808a5
PH
2298 # Quelch pyflakes warnings - start will be set when step is set
2299 start = '(Never used)'
edf3e38e
PH
2300 for i, prev in zip(idxs[1:], idxs[:-1]):
2301 if step is not None:
2302 if i - prev == step:
2303 continue
2304 yield _genslice(start, prev, step)
2305 step = None
2306 continue
2307 if i - prev in [-1, 1]:
2308 step = i - prev
2309 start = prev
2310 continue
2311 else:
78caa52a 2312 yield 's[%d]' % prev
edf3e38e 2313 if step is None:
78caa52a 2314 yield 's[%d]' % i
edf3e38e
PH
2315 else:
2316 yield _genslice(start, i, step)
2317
78caa52a 2318 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 2319 cache_res = func(test_string)
edf3e38e 2320 cache_spec = [ord(c) for c in cache_res]
78caa52a 2321 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
2322 signature_id_tuple = '(%s)' % (
2323 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 2324 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 2325 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 2326 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 2327
e0df6211
PH
2328 def _parse_sig_js(self, jscode):
2329 funcname = self._search_regex(
abefc03f
S
2330 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2331 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
2332 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2333 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2334 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2335 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 2336 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
2337 # Obsolete patterns
2338 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 2339 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
2340 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2341 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2342 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2343 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2344 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2345 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 2346 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
2347
2348 jsi = JSInterpreter(jscode)
2349 initial_function = jsi.extract_function(funcname)
e0df6211
PH
2350 return lambda s: initial_function([s])
2351
545cc85d 2352 def _decrypt_signature(self, s, video_id, player_url):
257a2501 2353 """Turn the encrypted s field into a working signature"""
6b37f0be 2354
c8bf86d5 2355 if player_url is None:
69ea8ca4 2356 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 2357
c8bf86d5 2358 try:
62af3a0e 2359 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
2360 if player_id not in self._player_cache:
2361 func = self._extract_signature_function(
60064c53 2362 video_id, player_url, s
c8bf86d5
PH
2363 )
2364 self._player_cache[player_id] = func
2365 func = self._player_cache[player_id]
404f611f 2366 self._print_sig_code(func, s)
c8bf86d5
PH
2367 return func(s)
2368 except Exception as e:
404f611f 2369 raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
2370
2371 def _decrypt_nsig(self, s, video_id, player_url):
2372 """Turn the encrypted n field into a working signature"""
2373 if player_url is None:
2374 raise ExtractorError('Cannot decrypt nsig without player_url')
2375 if player_url.startswith('//'):
2376 player_url = 'https:' + player_url
2377 elif not re.match(r'https?://', player_url):
2378 player_url = compat_urlparse.urljoin(
2379 'https://www.youtube.com', player_url)
2380
2381 sig_id = ('nsig_value', s)
2382 if sig_id in self._player_cache:
2383 return self._player_cache[sig_id]
2384
2385 try:
2386 player_id = ('nsig', player_url)
2387 if player_id not in self._player_cache:
2388 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
2389 func = self._player_cache[player_id]
2390 self._player_cache[sig_id] = func(s)
2391 self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
2392 return self._player_cache[sig_id]
2393 except Exception as e:
aa9369a2 2394 raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
404f611f 2395
2396 def _extract_n_function_name(self, jscode):
2397 return self._search_regex(
2398 (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
2399 jscode, 'Initial JS player n function name', group='nfunc')
2400
2401 def _extract_n_function(self, video_id, player_url):
2402 player_id = self._extract_player_info(player_url)
2403 func_code = self._downloader.cache.load('youtube-nsig', player_id)
2404
2405 if func_code:
2406 jsi = JSInterpreter(func_code)
2407 else:
2408 jscode = self._load_player(video_id, player_url)
2409 funcname = self._extract_n_function_name(jscode)
2410 jsi = JSInterpreter(jscode)
2411 func_code = jsi.extract_function_code(funcname)
2412 self._downloader.cache.store('youtube-nsig', player_id, func_code)
2413
2414 if self.get_param('youtube_print_sig_code'):
2415 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
2416
2417 return lambda s: jsi.extract_function_from_code(*func_code)([s])
e0df6211 2418
109dd3b2 2419 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2420 """
2421 Extract signatureTimestamp (sts)
2422 Required to tell API what sig/player version is in use.
2423 """
2424 sts = None
2425 if isinstance(ytcfg, dict):
2426 sts = int_or_none(ytcfg.get('STS'))
2427
2428 if not sts:
2429 # Attempt to extract from player
2430 if player_url is None:
2431 error_msg = 'Cannot extract signature timestamp without player_url.'
2432 if fatal:
2433 raise ExtractorError(error_msg)
2434 self.report_warning(error_msg)
2435 return
404f611f 2436 code = self._load_player(video_id, player_url, fatal=fatal)
2437 if code:
109dd3b2 2438 sts = int_or_none(self._search_regex(
2439 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2440 'JS player signature timestamp', group='sts', fatal=fatal))
2441 return sts
2442
11f9be09 2443 def _mark_watched(self, video_id, player_responses):
9222c381 2444 playback_url = get_first(
2445 player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2446 expected_type=url_or_none)
d77ab8e2 2447 if not playback_url:
352d63fd 2448 self.report_warning('Unable to mark watched')
d77ab8e2
S
2449 return
2450 parsed_playback_url = compat_urlparse.urlparse(playback_url)
2451 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2452
2453 # cpn generation algorithm is reverse engineered from base.js.
2454 # In fact it works even with dummy cpn.
2455 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2456 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2457
2458 qs.update({
2459 'ver': ['2'],
2460 'cpn': [cpn],
2461 })
2462 playback_url = compat_urlparse.urlunparse(
15707c7e 2463 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
2464
2465 self._download_webpage(
2466 playback_url, video_id, 'Marking watched',
2467 'Unable to mark watched', fatal=False)
2468
66c9fa36
S
2469 @staticmethod
2470 def _extract_urls(webpage):
2471 # Embedded YouTube player
2472 entries = [
2473 unescapeHTML(mobj.group('url'))
2474 for mobj in re.finditer(r'''(?x)
2475 (?:
2476 <iframe[^>]+?src=|
2477 data-video-url=|
2478 <embed[^>]+?src=|
2479 embedSWF\(?:\s*|
2480 <object[^>]+data=|
2481 new\s+SWFObject\(
2482 )
2483 (["\'])
2484 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 2485 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
2486 \1''', webpage)]
2487
2488 # lazyYT YouTube embed
2489 entries.extend(list(map(
2490 unescapeHTML,
2491 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2492
2493 # Wordpress "YouTube Video Importer" plugin
2494 matches = re.findall(r'''(?x)<div[^>]+
2495 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2496 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2497 entries.extend(m[-1] for m in matches)
2498
2499 return entries
2500
2501 @staticmethod
2502 def _extract_url(webpage):
2503 urls = YoutubeIE._extract_urls(webpage)
2504 return urls[0] if urls else None
2505
97665381
PH
2506 @classmethod
2507 def extract_id(cls, url):
2508 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 2509 if mobj is None:
69ea8ca4 2510 raise ExtractorError('Invalid URL: %s' % url)
5ad28e7f 2511 return mobj.group('id')
c5e8d7af 2512
7c365c21 2513 def _extract_chapters_from_json(self, data, duration):
2514 chapter_list = traverse_obj(
2515 data, (
2516 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2517 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2518 ), expected_type=list)
2519
2520 return self._extract_chapters(
2521 chapter_list,
2522 chapter_time=lambda chapter: float_or_none(
2523 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2524 chapter_title=lambda chapter: traverse_obj(
2525 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2526 duration=duration)
2527
2528 def _extract_chapters_from_engagement_panel(self, data, duration):
2529 content_list = traverse_obj(
8bdd16b4 2530 data,
7c365c21 2531 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2532 expected_type=list, default=[])
052e1350 2533 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2534 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 2535
2536 return next((
2537 filter(None, (
2538 self._extract_chapters(
2539 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2540 chapter_time, chapter_title, duration)
2541 for contents in content_list
2542 ))), [])
2543
2544 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 2545 chapters = []
7c365c21 2546 last_chapter = {'start_time': 0}
2547 for idx, chapter in enumerate(chapter_list or []):
2548 title = chapter_title(chapter)
84213ea8
S
2549 start_time = chapter_time(chapter)
2550 if start_time is None:
2551 continue
7c365c21 2552 last_chapter['end_time'] = start_time
2553 if start_time < last_chapter['start_time']:
2554 if idx == 1:
2555 chapters.pop()
2556 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2557 else:
2558 self.report_warning(f'Invalid start time for chapter "{title}"')
2559 continue
2560 last_chapter = {'start_time': start_time, 'title': title}
2561 chapters.append(last_chapter)
2562 last_chapter['end_time'] = duration
84213ea8
S
2563 return chapters
2564
545cc85d 2565 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2566 return self._parse_json(self._search_regex(
2567 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2568 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 2569
a1c5d2ca
M
2570 def _extract_comment(self, comment_renderer, parent=None):
2571 comment_id = comment_renderer.get('commentId')
2572 if not comment_id:
2573 return
fe93e2c4 2574
052e1350 2575 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 2576
49bd8c66 2577 # note: timestamp is an estimate calculated from the current time and time_text
f3aa3c3f 2578 timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
052e1350 2579 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca
M
2580 author_id = try_get(comment_renderer,
2581 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2582
49bd8c66 2583 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2584 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2585 author_thumbnail = try_get(comment_renderer,
2586 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2587
2588 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2589 is_favorited = 'creatorHeart' in (try_get(
2590 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2591 return {
2592 'id': comment_id,
2593 'text': text,
d92f5d5a 2594 'timestamp': timestamp,
a1c5d2ca
M
2595 'time_text': time_text,
2596 'like_count': votes,
97524332 2597 'is_favorited': is_favorited,
a1c5d2ca
M
2598 'author': author,
2599 'author_id': author_id,
2600 'author_thumbnail': author_thumbnail,
2601 'author_is_uploader': author_is_uploader,
2602 'parent': parent or 'root'
2603 }
2604
46383212 2605 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2606
2607 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2d6659b9 2608
2609 def extract_header(contents):
2d6659b9 2610 _continuation = None
2611 for content in contents:
46383212 2612 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
f0d785d3 2613 expected_comment_count = self._get_count(
2614 comments_header_renderer, 'countText', 'commentsCount')
fe93e2c4 2615
2d6659b9 2616 if expected_comment_count:
46383212 2617 tracker['est_total'] = expected_comment_count
2618 self.to_screen(f'Downloading ~{expected_comment_count} comments')
2619 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2d6659b9 2620
2621 sort_menu_item = try_get(
2622 comments_header_renderer,
2623 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2624 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2625
2626 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2627 if not _continuation:
2628 continue
2629
46383212 2630 sort_text = str_or_none(sort_menu_item.get('title'))
2631 if not sort_text:
2d6659b9 2632 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
46383212 2633 self.to_screen('Sorting comments by %s' % sort_text.lower())
2d6659b9 2634 break
a2160aa4 2635 return _continuation
a1c5d2ca 2636
2d6659b9 2637 def extract_thread(contents):
a1c5d2ca 2638 if not parent:
46383212 2639 tracker['current_page_thread'] = 0
a1c5d2ca 2640 for content in contents:
46383212 2641 if not parent and tracker['total_parent_comments'] >= max_parents:
2642 yield
a1c5d2ca 2643 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
46383212 2644 comment_renderer = get_first(
2645 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2646 expected_type=dict, default={})
a1c5d2ca 2647
a1c5d2ca
M
2648 comment = self._extract_comment(comment_renderer, parent)
2649 if not comment:
2650 continue
46383212 2651
2652 tracker['running_total'] += 1
2653 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
a1c5d2ca 2654 yield comment
46383212 2655
a1c5d2ca
M
2656 # Attempt to get the replies
2657 comment_replies_renderer = try_get(
2658 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2659
2660 if comment_replies_renderer:
46383212 2661 tracker['current_page_thread'] += 1
a1c5d2ca 2662 comment_entries_iter = self._comment_entries(
99e9e001 2663 comment_replies_renderer, ytcfg, video_id,
46383212 2664 parent=comment.get('id'), tracker=tracker)
2665 for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))):
a1c5d2ca
M
2666 yield reply_comment
2667
46383212 2668 # Keeps track of counts across recursive calls
2669 if not tracker:
2670 tracker = dict(
2671 running_total=0,
2672 est_total=0,
2673 current_page_thread=0,
2674 total_parent_comments=0,
2675 total_reply_comments=0)
2676
2677 # TODO: Deprecated
2d6659b9 2678 # YouTube comments have a max depth of 2
46383212 2679 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
2680 if max_depth:
2681 self._downloader.deprecation_warning(
2682 '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
2d6659b9 2683 if max_depth == 1 and parent:
2684 return
a1c5d2ca 2685
46383212 2686 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
2687 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2d6659b9 2688
46383212 2689 continuation = self._extract_continuation(root_continuation_data)
aae16f6e 2690 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2691 if message and not parent:
2692 self.report_warning(message, video_id=video_id)
2693
46383212 2694 response = None
2d6659b9 2695 is_first_continuation = parent is None
a1c5d2ca
M
2696
2697 for page_num in itertools.count(0):
2698 if not continuation:
2699 break
46383212 2700 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
2701 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2d6659b9 2702 if page_num == 0:
2703 if is_first_continuation:
2704 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2705 else:
2d6659b9 2706 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
46383212 2707 tracker['current_page_thread'], comment_prog_str)
2d6659b9 2708 else:
2709 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2710 ' ' if parent else '', ' replies' if parent else '',
2711 page_num, comment_prog_str)
2712
2713 response = self._extract_response(
fe93e2c4 2714 item_id=None, query=continuation,
2d6659b9 2715 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
46383212 2716 check_get_keys='onResponseReceivedEndpoints')
a1c5d2ca 2717
46383212 2718 continuation_contents = traverse_obj(
2719 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
a1c5d2ca 2720
2d6659b9 2721 continuation = None
46383212 2722 for continuation_section in continuation_contents:
2723 continuation_items = traverse_obj(
2724 continuation_section,
2725 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
2726 get_all=False, expected_type=list) or []
2727 if is_first_continuation:
2728 continuation = extract_header(continuation_items)
2729 is_first_continuation = False
2d6659b9 2730 if continuation:
a1c5d2ca 2731 break
46383212 2732 continue
a1c5d2ca 2733
46383212 2734 for entry in extract_thread(continuation_items):
2735 if not entry:
2736 return
2737 yield entry
2738 continuation = self._extract_continuation({'contents': continuation_items})
2739 if continuation:
2d6659b9 2740 break
a1c5d2ca 2741
a2160aa4 2742 def _get_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2743 """Entry for comment extraction"""
2d6659b9 2744 def _real_comment_extract(contents):
aae16f6e 2745 renderer = next((
2746 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2747 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2748 yield from self._comment_entries(renderer, ytcfg, video_id)
99e9e001 2749
a2160aa4 2750 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
a2160aa4 2751 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
a1c5d2ca 2752
109dd3b2 2753 @staticmethod
99e9e001 2754 def _get_checkok_params():
2755 return {'contentCheckOk': True, 'racyCheckOk': True}
2756
2757 @classmethod
2758 def _generate_player_context(cls, sts=None):
109dd3b2 2759 context = {
2760 'html5Preference': 'HTML5_PREF_WANTS',
2761 }
2762 if sts is not None:
2763 context['signatureTimestamp'] = sts
2764 return {
2765 'playbackContext': {
2766 'contentPlaybackContext': context
a1a7907b 2767 },
99e9e001 2768 **cls._get_checkok_params()
109dd3b2 2769 }
2770
e7e94f2a
D
2771 @staticmethod
2772 def _is_agegated(player_response):
2773 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 2774 return True
e7e94f2a
D
2775
2776 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2777 AGE_GATE_REASONS = (
2778 'confirm your age', 'age-restricted', 'inappropriate', # reason
2779 'age_verification_required', 'age_check_required', # status
2780 )
2781 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2782
2783 @staticmethod
2784 def _is_unplayable(player_response):
2785 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 2786
99e9e001 2787 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
109dd3b2 2788
11f9be09 2789 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2790 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 2791 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 2792 headers = self.generate_api_headers(
99e9e001 2793 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 2794
11f9be09 2795 yt_query = {'videoId': video_id}
2796 yt_query.update(self._generate_player_context(sts))
2797 return self._extract_response(
2798 item_id=video_id, ep='player', query=yt_query,
379e44ed 2799 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 2800 default_client=client,
11f9be09 2801 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2802 ) or None
2803
11f9be09 2804 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 2805 requested_clients = []
d0d012d4 2806 default = ['android', 'web']
000c15a4 2807 allowed_clients = sorted(
2808 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2809 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 2810 for client in self._configuration_arg('player_client'):
2811 if client in allowed_clients:
2812 requested_clients.append(client)
d0d012d4 2813 elif client == 'default':
2814 requested_clients.extend(default)
b4c055ba 2815 elif client == 'all':
2816 requested_clients.extend(allowed_clients)
2817 else:
2818 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 2819 if not requested_clients:
d0d012d4 2820 requested_clients = default
cf7e015f 2821
11f9be09 2822 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2823 requested_clients.extend(
e7e94f2a 2824 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 2825
11f9be09 2826 return orderedSet(requested_clients)
cf7e015f 2827
c0bc527b
M
2828 def _extract_player_ytcfg(self, client, video_id):
2829 url = {
2830 'web_music': 'https://music.youtube.com',
2831 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2832 }.get(client)
2833 if not url:
2834 return {}
18c7683d 2835 webpage = self._download_webpage(url, video_id, fatal=False, note='Downloading %s config' % client.replace('_', ' ').strip())
c0bc527b
M
2836 return self.extract_ytcfg(video_id, webpage) or {}
2837
99e9e001 2838 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
11f9be09 2839 initial_pr = None
2840 if webpage:
2841 initial_pr = self._extract_yt_initial_variable(
2842 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2843 video_id, 'initial player response')
6b09401b 2844
c0bc527b
M
2845 original_clients = clients
2846 clients = clients[::-1]
b6de707d 2847 prs = []
e7e94f2a
D
2848
2849 def append_client(client_name):
2850 if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2851 clients.append(client_name)
2852
379e44ed 2853 # Android player_response does not have microFormats which are needed for
2854 # extraction of some data. So we return the initial_pr with formats
2855 # stripped out even if not requested by the user
2856 # See: https://github.com/yt-dlp/yt-dlp/issues/501
379e44ed 2857 if initial_pr:
2858 pr = dict(initial_pr)
2859 pr['streamingData'] = None
b6de707d 2860 prs.append(pr)
379e44ed 2861
2862 last_error = None
b6de707d 2863 tried_iframe_fallback = False
2864 player_url = None
c0bc527b
M
2865 while clients:
2866 client = clients.pop()
11f9be09 2867 player_ytcfg = master_ytcfg if client == 'web' else {}
c0bc527b
M
2868 if 'configs' not in self._configuration_arg('player_skip'):
2869 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
c0bc527b 2870
b6de707d 2871 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2872 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2873 if 'js' in self._configuration_arg('player_skip'):
2874 require_js_player = False
2875 player_url = None
2876
2877 if not player_url and not tried_iframe_fallback and require_js_player:
2878 player_url = self._download_player_url(video_id)
2879 tried_iframe_fallback = True
2880
379e44ed 2881 try:
2882 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
99e9e001 2883 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
379e44ed 2884 except ExtractorError as e:
2885 if last_error:
2886 self.report_warning(last_error)
2887 last_error = e
2888 continue
2889
11f9be09 2890 if pr:
b6de707d 2891 prs.append(pr)
c0bc527b 2892
e7e94f2a 2893 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
99e9e001 2894 if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
e7e94f2a
D
2895 append_client(client.replace('_agegate', '_creator'))
2896 elif self._is_agegated(pr):
2897 append_client(f'{client}_agegate')
c0bc527b 2898
379e44ed 2899 if last_error:
b6de707d 2900 if not len(prs):
379e44ed 2901 raise last_error
2902 self.report_warning(last_error)
b6de707d 2903 return prs, player_url
11f9be09 2904
2905 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
a0bb6ce5 2906 itags, stream_ids = {}, []
2a9c6dcd 2907 itag_qualities, res_qualities = {}, {}
d3fc8074 2908 q = qualities([
2a9c6dcd 2909 # Normally tiny is the smallest video-only formats. But
2910 # audio-only formats with unknown quality may get tagged as tiny
2911 'tiny',
2912 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 2913 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2914 ])
11f9be09 2915 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 2916
545cc85d 2917 for fmt in streaming_formats:
2918 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2919 continue
321bf820 2920
cc2db878 2921 itag = str_or_none(fmt.get('itag'))
9297939e 2922 audio_track = fmt.get('audioTrack') or {}
2923 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2924 if stream_id in stream_ids:
2925 continue
2926
cc2db878 2927 quality = fmt.get('quality')
2a9c6dcd 2928 height = int_or_none(fmt.get('height'))
d3fc8074 2929 if quality == 'tiny' or not quality:
2930 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 2931 # The 3gp format (17) in android client has a quality of "small",
2932 # but is actually worse than other formats
2933 if itag == '17':
2934 quality = 'tiny'
2935 if quality:
2936 if itag:
2937 itag_qualities[itag] = quality
2938 if height:
2939 res_qualities[height] = quality
cc2db878 2940 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2941 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2942 # number of fragment that would subsequently requested with (`&sq=N`)
2943 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2944 continue
2945
545cc85d 2946 fmt_url = fmt.get('url')
2947 if not fmt_url:
2948 sc = compat_parse_qs(fmt.get('signatureCipher'))
2949 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2950 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2951 if not (sc and fmt_url and encrypted_sig):
2952 continue
545cc85d 2953 if not player_url:
201e9eaa 2954 continue
545cc85d 2955 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2956 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2957 fmt_url += '&' + sp + '=' + signature
2958
404f611f 2959 query = parse_qs(fmt_url)
2960 throttled = False
b2916526 2961 if query.get('n'):
404f611f 2962 try:
2963 fmt_url = update_url_query(fmt_url, {
2964 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
2965 except ExtractorError as e:
aa9369a2 2966 self.report_warning(
2967 f'nsig extraction failed: You may experience throttling for some formats\n'
2968 f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
404f611f 2969 throttled = True
2970
545cc85d 2971 if itag:
a0bb6ce5 2972 itags[itag] = 'https'
9297939e 2973 stream_ids.append(stream_id)
2974
cc2db878 2975 tbr = float_or_none(
2976 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2977 dct = {
2978 'asr': int_or_none(fmt.get('audioSampleRate')),
2979 'filesize': int_or_none(fmt.get('contentLength')),
2980 'format_id': itag,
34921b43 2981 'format_note': join_nonempty(
26e8e044 2982 '%s%s' % (audio_track.get('displayName') or '',
2983 ' (default)' if audio_track.get('audioIsDefault') else ''),
404f611f 2984 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
34921b43 2985 throttled and 'THROTTLED', delim=', '),
c18d4482 2986 'source_preference': -10 if throttled else -1,
a4211baf 2987 'fps': int_or_none(fmt.get('fps')) or None,
2a9c6dcd 2988 'height': height,
dca3ff4a 2989 'quality': q(quality),
cc2db878 2990 'tbr': tbr,
545cc85d 2991 'url': fmt_url,
2a9c6dcd 2992 'width': int_or_none(fmt.get('width')),
0fb983f6 2993 'language': audio_track.get('id', '').split('.')[0],
26e8e044 2994 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
545cc85d 2995 }
60bdb7bd 2996 mime_mobj = re.match(
2997 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2998 if mime_mobj:
2999 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3000 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 3001 no_audio = dct.get('acodec') == 'none'
3002 no_video = dct.get('vcodec') == 'none'
3003 if no_audio:
3004 dct['vbr'] = tbr
3005 if no_video:
3006 dct['abr'] = tbr
3007 if no_audio or no_video:
545cc85d 3008 dct['downloader_options'] = {
3009 # Youtube throttles chunks >~10M
3010 'http_chunk_size': 10485760,
bf1317d2 3011 }
7c60c33e 3012 if dct.get('ext'):
3013 dct['container'] = dct['ext'] + '_dash'
11f9be09 3014 yield dct
545cc85d 3015
adbc4ec4 3016 live_from_start = is_live and self.get_param('live_from_start')
4bb6b02f 3017 skip_manifests = self._configuration_arg('skip')
adbc4ec4
THD
3018 if not self.get_param('youtube_include_hls_manifest', True):
3019 skip_manifests.append('hls')
3020 get_dash = 'dash' not in skip_manifests and (
3021 not is_live or live_from_start or self._configuration_arg('include_live_dash'))
3022 get_hls = not live_from_start and 'hls' not in skip_manifests
5d3a0e79 3023
a0bb6ce5 3024 def process_manifest_format(f, proto, itag):
3025 if itag in itags:
3026 if itags[itag] == proto or f'{itag}-{proto}' in itags:
3027 return False
3028 itag = f'{itag}-{proto}'
3029 if itag:
3030 f['format_id'] = itag
3031 itags[itag] = proto
3032
3033 f['quality'] = next((
3034 q(qdict[val])
e339d25a 3035 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
a0bb6ce5 3036 if val in qdict), -1)
3037 return True
2a9c6dcd 3038
11f9be09 3039 for sd in streaming_data:
5d3a0e79 3040 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 3041 if hls_manifest_url:
2a9c6dcd 3042 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
a0bb6ce5 3043 if process_manifest_format(f, 'hls', self._search_regex(
3044 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3045 yield f
545cc85d 3046
5d3a0e79 3047 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
3048 if dash_manifest_url:
2a9c6dcd 3049 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
a0bb6ce5 3050 if process_manifest_format(f, 'dash', f['format_id']):
3051 f['filesize'] = int_or_none(self._search_regex(
3052 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
adbc4ec4
THD
3053 if live_from_start:
3054 f['is_from_start'] = True
3055
a0bb6ce5 3056 yield f
11f9be09 3057
720c3099 3058 def _extract_storyboard(self, player_responses, duration):
3059 spec = get_first(
3060 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
3061 if not spec:
3062 return
3063 base_url = spec.pop()
3064 L = len(spec) - 1
3065 for i, args in enumerate(spec):
3066 args = args.split('#')
3067 counts = list(map(int_or_none, args[:5]))
3068 if len(args) != 8 or not all(counts):
3069 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3070 continue
3071 width, height, frame_count, cols, rows = counts
3072 N, sigh = args[6:]
3073
3074 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3075 fragment_count = frame_count / (cols * rows)
3076 fragment_duration = duration / fragment_count
3077 yield {
3078 'format_id': f'sb{i}',
3079 'format_note': 'storyboard',
3080 'ext': 'mhtml',
3081 'protocol': 'mhtml',
3082 'acodec': 'none',
3083 'vcodec': 'none',
3084 'url': url,
3085 'width': width,
3086 'height': height,
3087 'fragments': [{
3088 'path': url.replace('$M', str(j)),
3089 'duration': min(fragment_duration, duration - (j * fragment_duration)),
3090 } for j in range(math.ceil(fragment_count))],
3091 }
3092
adbc4ec4 3093 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
b6de707d 3094 webpage = None
3095 if 'webpage' not in self._configuration_arg('player_skip'):
3096 webpage = self._download_webpage(
3097 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
11f9be09 3098
3099 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 3100
b6de707d 3101 player_responses, player_url = self._extract_player_responses(
11f9be09 3102 self._get_requested_clients(url, smuggled_data),
99e9e001 3103 video_id, webpage, master_ytcfg)
11f9be09 3104
adbc4ec4
THD
3105 return webpage, master_ytcfg, player_responses, player_url
3106
3107 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url):
3108 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3109 is_live = get_first(video_details, 'isLive')
3110 if is_live is None:
3111 is_live = get_first(live_broadcast_details, 'isLiveNow')
3112
3113 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
3114 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
3115
3116 return live_broadcast_details, is_live, streaming_data, formats
3117
3118 def _real_extract(self, url):
3119 url, smuggled_data = unsmuggle_url(url, {})
3120 video_id = self._match_id(url)
3121
3122 base_url = self.http_scheme() + '//www.youtube.com/'
3123 webpage_url = base_url + 'watch?v=' + video_id
3124
3125 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3126
11f9be09 3127 playability_statuses = traverse_obj(
3128 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
3129
3130 trailer_video_id = get_first(
3131 playability_statuses,
3132 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
3133 expected_type=str)
3134 if trailer_video_id:
3135 return self.url_result(
3136 trailer_video_id, self.ie_key(), trailer_video_id)
3137
3138 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
3139 if webpage else (lambda x: None))
3140
3141 video_details = traverse_obj(
3142 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
3143 microformats = traverse_obj(
3144 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
3145 expected_type=dict, default=[])
3146 video_title = (
3147 get_first(video_details, 'title')
3148 or self._get_text(microformats, (..., 'title'))
3149 or search_meta(['og:title', 'twitter:title', 'title']))
3150 video_description = get_first(video_details, 'shortDescription')
3151
d89257f3 3152 multifeed_metadata_list = get_first(
3153 player_responses,
3154 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
3155 expected_type=str)
3156 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
3157 if self.get_param('noplaylist'):
11f9be09 3158 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
d89257f3 3159 else:
3160 entries = []
3161 feed_ids = []
3162 for feed in multifeed_metadata_list.split(','):
3163 # Unquote should take place before split on comma (,) since textual
3164 # fields may contain comma as well (see
3165 # https://github.com/ytdl-org/youtube-dl/issues/8536)
3166 feed_data = compat_parse_qs(
3167 compat_urllib_parse_unquote_plus(feed))
3168
3169 def feed_entry(name):
3170 return try_get(
3171 feed_data, lambda x: x[name][0], compat_str)
3172
3173 feed_id = feed_entry('id')
3174 if not feed_id:
3175 continue
3176 feed_title = feed_entry('title')
3177 title = video_title
3178 if feed_title:
3179 title += ' (%s)' % feed_title
3180 entries.append({
3181 '_type': 'url_transparent',
3182 'ie_key': 'Youtube',
3183 'url': smuggle_url(
3184 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
3185 {'force_singlefeed': True}),
3186 'title': title,
3187 })
3188 feed_ids.append(feed_id)
3189 self.to_screen(
3190 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
3191 % (', '.join(feed_ids), video_id))
3192 return self.playlist_result(
3193 entries, video_id, video_title, video_description)
11f9be09 3194
adbc4ec4 3195 live_broadcast_details, is_live, streaming_data, formats = self._list_formats(video_id, microformats, video_details, player_responses, player_url)
bf1317d2 3196
545cc85d 3197 if not formats:
11f9be09 3198 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 3199 self.report_drm(video_id)
11f9be09 3200 pemr = get_first(
3201 playability_statuses,
3202 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
3203 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
3204 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 3205 if subreason:
545cc85d 3206 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 3207 countries = get_first(microformats, 'availableCountries')
545cc85d 3208 if not countries:
3209 regions_allowed = search_meta('regionsAllowed')
3210 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 3211 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 3212 reason += f'. {subreason}'
545cc85d 3213 if reason:
b7da73eb 3214 self.raise_no_formats(reason, expected=True)
bf1317d2 3215
11f9be09 3216 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 3217 if not keywords and webpage:
3218 keywords = [
3219 unescapeHTML(m.group('content'))
3220 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
3221 for keyword in keywords:
3222 if keyword.startswith('yt:stretch='):
201c1459 3223 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
3224 if mobj:
3225 # NB: float is intentional for forcing float division
3226 w, h = (float(v) for v in mobj.groups())
3227 if w > 0 and h > 0:
3228 ratio = w / h
3229 for f in formats:
3230 if f.get('vcodec') != 'none':
3231 f['stretched_ratio'] = ratio
3232 break
a709d873 3233 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
ff2751ac 3234 thumbnail_url = search_meta(['og:image', 'twitter:image'])
3235 if thumbnail_url:
3236 thumbnails.append({
3237 'url': thumbnail_url,
ff2751ac 3238 })
fccf5021 3239 original_thumbnails = thumbnails.copy()
3240
0ba692ac 3241 # The best resolution thumbnails sometimes does not appear in the webpage
3242 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 3243 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
e820fbaa 3244 thumbnail_names = [
3245 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
cca80fe6 3246 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
3247 'mqdefault', 'mq1', 'mq2', 'mq3',
3248 'default', '1', '2', '3'
3249 ]
cca80fe6 3250 n_thumbnail_names = len(thumbnail_names)
0ba692ac 3251 thumbnails.extend({
3252 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
3253 video_id=video_id, name=name, ext=ext,
3254 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 3255 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 3256 for thumb in thumbnails:
cca80fe6 3257 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 3258 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 3259 self._remove_duplicate_formats(thumbnails)
fccf5021 3260 self._downloader._sort_thumbnails(original_thumbnails)
545cc85d 3261
7ea65411 3262 category = get_first(microformats, 'category') or search_meta('genre')
3263 channel_id = str_or_none(
3264 get_first(video_details, 'channelId')
3265 or get_first(microformats, 'externalChannelId')
3266 or search_meta('channelId'))
3267 duration = int_or_none(
3268 get_first(video_details, 'lengthSeconds')
3269 or get_first(microformats, 'lengthSeconds')
3270 or parse_duration(search_meta('duration'))) or None
3271 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
3272
3273 live_content = get_first(video_details, 'isLiveContent')
3274 is_upcoming = get_first(video_details, 'isUpcoming')
3275 if is_live is None:
3276 if is_upcoming or live_content is False:
3277 is_live = False
3278 if is_upcoming is None and (live_content or is_live):
3279 is_upcoming = False
adbc4ec4
THD
3280 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
3281 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
3282 if not duration and live_end_time and live_start_time:
3283 duration = live_end_time - live_start_time
3284
3285 if is_live and self.get_param('live_from_start'):
3286 self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
7ea65411 3287
720c3099 3288 formats.extend(self._extract_storyboard(player_responses, duration))
3289
3290 # Source is given priority since formats that throttle are given lower source_preference
3291 # When throttling issue is fully fixed, remove this
3292 self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
3293
545cc85d 3294 info = {
3295 'id': video_id,
39ca3b5c 3296 'title': video_title,
545cc85d 3297 'formats': formats,
3298 'thumbnails': thumbnails,
fccf5021 3299 # The best thumbnail that we are sure exists. Prevents unnecessary
3300 # URL checking if user don't care about getting the best possible thumbnail
3301 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
545cc85d 3302 'description': video_description,
3303 'upload_date': unified_strdate(
11f9be09 3304 get_first(microformats, 'uploadDate')
545cc85d 3305 or search_meta('uploadDate')),
11f9be09 3306 'uploader': get_first(video_details, 'author'),
545cc85d 3307 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3308 'uploader_url': owner_profile_url,
3309 'channel_id': channel_id,
11f9be09 3310 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
545cc85d 3311 'duration': duration,
3312 'view_count': int_or_none(
11f9be09 3313 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 3314 or search_meta('interactionCount')),
11f9be09 3315 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 3316 'age_limit': 18 if (
11f9be09 3317 get_first(microformats, 'isFamilySafe') is False
545cc85d 3318 or search_meta('isFamilyFriendly') == 'false'
3319 or search_meta('og:restrictions:age') == '18+') else 0,
3320 'webpage_url': webpage_url,
3321 'categories': [category] if category else None,
3322 'tags': keywords,
11f9be09 3323 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 3324 'is_live': is_live,
3325 'was_live': (False if is_live or is_upcoming or live_content is False
3326 else None if is_live is None or is_upcoming is None
3327 else live_content),
3328 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
adbc4ec4 3329 'release_timestamp': live_start_time,
545cc85d 3330 }
b477fc13 3331
3944e7af 3332 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 3333 if pctr:
ecdc9049 3334 def get_lang_code(track):
3335 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3336 or track.get('languageCode'))
3337
3338 # Converted into dicts to remove duplicates
3339 captions = {
3340 get_lang_code(sub): sub
3341 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3342 translation_languages = {
3343 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3344 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3345
774d79cc 3346 def process_language(container, base_url, lang_code, sub_name, query):
120916da 3347 lang_subs = container.setdefault(lang_code, [])
545cc85d 3348 for fmt in self._SUBTITLE_FORMATS:
3349 query.update({
3350 'fmt': fmt,
3351 })
3352 lang_subs.append({
3353 'ext': fmt,
3354 'url': update_url_query(base_url, query),
774d79cc 3355 'name': sub_name,
545cc85d 3356 })
7e72694b 3357
ecdc9049 3358 subtitles, automatic_captions = {}, {}
3359 for lang_code, caption_track in captions.items():
3360 base_url = caption_track.get('baseUrl')
545cc85d 3361 if not base_url:
3362 continue
ecdc9049 3363 lang_name = self._get_text(caption_track, 'name', max_runs=1)
545cc85d 3364 if caption_track.get('kind') != 'asr':
545cc85d 3365 if not lang_code:
3366 continue
3367 process_language(
ecdc9049 3368 subtitles, base_url, lang_code, lang_name, {})
3369 if not caption_track.get('isTranslatable'):
3370 continue
3944e7af 3371 for trans_code, trans_name in translation_languages.items():
3372 if not trans_code:
545cc85d 3373 continue
ecdc9049 3374 if caption_track.get('kind') != 'asr':
3375 trans_code += f'-{lang_code}'
3376 trans_name += format_field(lang_name, template=' from %s')
545cc85d 3377 process_language(
ecdc9049 3378 automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
3379 info['automatic_captions'] = automatic_captions
3380 info['subtitles'] = subtitles
7e72694b 3381
545cc85d 3382 parsed_url = compat_urllib_parse_urlparse(url)
3383 for component in [parsed_url.fragment, parsed_url.query]:
3384 query = compat_parse_qs(component)
3385 for k, v in query.items():
3386 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3387 d_k += '_time'
3388 if d_k not in info and k in s_ks:
3389 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
3390
3391 # Youtube Music Auto-generated description
822b9d9c 3392 if video_description:
38d70284 3393 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 3394 if mobj:
822b9d9c
RA
3395 release_year = mobj.group('release_year')
3396 release_date = mobj.group('release_date')
3397 if release_date:
3398 release_date = release_date.replace('-', '')
3399 if not release_year:
545cc85d 3400 release_year = release_date[:4]
3401 info.update({
3402 'album': mobj.group('album'.strip()),
3403 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3404 'track': mobj.group('track').strip(),
3405 'release_date': release_date,
cc2db878 3406 'release_year': int_or_none(release_year),
545cc85d 3407 })
7e72694b 3408
545cc85d 3409 initial_data = None
3410 if webpage:
3411 initial_data = self._extract_yt_initial_variable(
3412 webpage, self._YT_INITIAL_DATA_RE, video_id,
3413 'yt initial data')
3414 if not initial_data:
99e9e001 3415 query = {'videoId': video_id}
3416 query.update(self._get_checkok_params())
109dd3b2 3417 initial_data = self._extract_response(
3418 item_id=video_id, ep='next', fatal=False,
99e9e001 3419 ytcfg=master_ytcfg, query=query,
3420 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 3421 note='Downloading initial data API JSON')
545cc85d 3422
c60ee3a2 3423 try:
3424 # This will error if there is no livechat
3425 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
ecdc9049 3426 info.setdefault('subtitles', {})['live_chat'] = [{
c60ee3a2 3427 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
3428 'video_id': video_id,
3429 'ext': 'json',
f6745c49 3430 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 3431 }]
3432 except (KeyError, IndexError, TypeError):
3433 pass
545cc85d 3434
3435 if initial_data:
7c365c21 3436 info['chapters'] = (
3437 self._extract_chapters_from_json(initial_data, duration)
3438 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3439 or None)
545cc85d 3440
3441 contents = try_get(
3442 initial_data,
3443 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3444 list) or []
3445 for content in contents:
3446 vpir = content.get('videoPrimaryInfoRenderer')
3447 if vpir:
3448 stl = vpir.get('superTitleLink')
3449 if stl:
fe93e2c4 3450 stl = self._get_text(stl)
545cc85d 3451 if try_get(
3452 vpir,
3453 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3454 info['location'] = stl
3455 else:
3456 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3457 if mobj:
3458 info.update({
3459 'series': mobj.group(1),
3460 'season_number': int(mobj.group(2)),
3461 'episode_number': int(mobj.group(3)),
3462 })
3463 for tlb in (try_get(
3464 vpir,
3465 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3466 list) or []):
3467 tbr = tlb.get('toggleButtonRenderer') or {}
3468 for getter, regex in [(
3469 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3470 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3471 lambda x: x['accessibility'],
3472 lambda x: x['accessibilityData']['accessibilityData'],
3473 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3474 label = (try_get(tbr, getter, dict) or {}).get('label')
3475 if label:
3476 mobj = re.match(regex, label)
3477 if mobj:
3478 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3479 break
3480 sbr_tooltip = try_get(
3481 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3482 if sbr_tooltip:
3483 like_count, dislike_count = sbr_tooltip.split(' / ')
3484 info.update({
3485 'like_count': str_to_int(like_count),
3486 'dislike_count': str_to_int(dislike_count),
3487 })
3488 vsir = content.get('videoSecondaryInfoRenderer')
3489 if vsir:
052e1350 3490 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
545cc85d 3491 rows = try_get(
3492 vsir,
3493 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3494 list) or []
3495 multiple_songs = False
3496 for row in rows:
3497 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3498 multiple_songs = True
3499 break
3500 for row in rows:
3501 mrr = row.get('metadataRowRenderer') or {}
3502 mrr_title = mrr.get('title')
3503 if not mrr_title:
3504 continue
052e1350 3505 mrr_title = self._get_text(mrr, 'title')
3506 mrr_contents_text = self._get_text(mrr, ('contents', 0))
545cc85d 3507 if mrr_title == 'License':
3508 info['license'] = mrr_contents_text
3509 elif not multiple_songs:
3510 if mrr_title == 'Album':
3511 info['album'] = mrr_contents_text
3512 elif mrr_title == 'Artist':
3513 info['artist'] = mrr_contents_text
3514 elif mrr_title == 'Song':
3515 info['track'] = mrr_contents_text
3516
3517 fallbacks = {
3518 'channel': 'uploader',
3519 'channel_id': 'uploader_id',
3520 'channel_url': 'uploader_url',
3521 }
3522 for to, frm in fallbacks.items():
3523 if not info.get(to):
3524 info[to] = info.get(frm)
3525
3526 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3527 v = info.get(s_k)
3528 if v:
3529 info[d_k] = v
b84071c0 3530
11f9be09 3531 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3532 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3533 is_membersonly = None
b28f8d24 3534 is_premium = None
c224251a
M
3535 if initial_data and is_private is not None:
3536 is_membersonly = False
b28f8d24 3537 is_premium = False
47193e02 3538 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3539 badge_labels = set()
3540 for content in contents:
3541 if not isinstance(content, dict):
3542 continue
3543 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3544 for badge_label in badge_labels:
3545 if badge_label.lower() == 'members only':
3546 is_membersonly = True
3547 elif badge_label.lower() == 'premium':
3548 is_premium = True
3549 elif badge_label.lower() == 'unlisted':
3550 is_unlisted = True
c224251a 3551
c224251a
M
3552 info['availability'] = self._availability(
3553 is_private=is_private,
b28f8d24 3554 needs_premium=is_premium,
c224251a
M
3555 needs_subscription=is_membersonly,
3556 needs_auth=info['age_limit'] >= 18,
3557 is_unlisted=None if is_private is None else is_unlisted)
3558
a2160aa4 3559 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3560
11f9be09 3561 self.mark_watched(video_id, player_responses)
d77ab8e2 3562
545cc85d 3563 return info
c5e8d7af 3564
a61fd4cf 3565
a6213a49 3566class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
5f6a1245 3567
a6213a49 3568 def _extract_channel_id(self, webpage):
3569 channel_id = self._html_search_meta(
3570 'channelId', webpage, 'channel id', default=None)
3571 if channel_id:
3572 return channel_id
3573 channel_url = self._html_search_meta(
3574 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3575 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3576 'twitter:app:url:googleplay'), webpage, 'channel url')
3577 return self._search_regex(
3578 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3579 channel_url, 'channel id')
15f6397c 3580
8bdd16b4 3581 @staticmethod
cd7c66cf 3582 def _extract_basic_item_renderer(item):
3583 # Modified from _extract_grid_item_renderer
201c1459 3584 known_basic_renderers = (
3585 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3586 )
3587 for key, renderer in item.items():
201c1459 3588 if not isinstance(renderer, dict):
cd7c66cf 3589 continue
201c1459 3590 elif key in known_basic_renderers:
3591 return renderer
3592 elif key.startswith('grid') and key.endswith('Renderer'):
3593 return renderer
8bdd16b4 3594
8bdd16b4 3595 def _grid_entries(self, grid_renderer):
3596 for item in grid_renderer['items']:
3597 if not isinstance(item, dict):
39b62db1 3598 continue
cd7c66cf 3599 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3600 if not isinstance(renderer, dict):
3601 continue
052e1350 3602 title = self._get_text(renderer, 'title')
fe93e2c4 3603
8bdd16b4 3604 # playlist
3605 playlist_id = renderer.get('playlistId')
3606 if playlist_id:
3607 yield self.url_result(
3608 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3609 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3610 video_title=title)
201c1459 3611 continue
8bdd16b4 3612 # video
3613 video_id = renderer.get('videoId')
3614 if video_id:
3615 yield self._extract_video(renderer)
201c1459 3616 continue
8bdd16b4 3617 # channel
3618 channel_id = renderer.get('channelId')
3619 if channel_id:
8bdd16b4 3620 yield self.url_result(
3621 'https://www.youtube.com/channel/%s' % channel_id,
3622 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3623 continue
3624 # generic endpoint URL support
3625 ep_url = urljoin('https://www.youtube.com/', try_get(
3626 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3627 compat_str))
3628 if ep_url:
3629 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3630 if ie.suitable(ep_url):
3631 yield self.url_result(
3632 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3633 break
8bdd16b4 3634
3d3dddc9 3635 def _shelf_entries_from_content(self, shelf_renderer):
3636 content = shelf_renderer.get('content')
3637 if not isinstance(content, dict):
8bdd16b4 3638 return
cd7c66cf 3639 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3640 if renderer:
3641 # TODO: add support for nested playlists so each shelf is processed
3642 # as separate playlist
3643 # TODO: this includes only first N items
3644 for entry in self._grid_entries(renderer):
3645 yield entry
3646 renderer = content.get('horizontalListRenderer')
3647 if renderer:
3648 # TODO
3649 pass
8bdd16b4 3650
29f7c58a 3651 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3652 ep = try_get(
3653 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3654 compat_str)
3655 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3656 if shelf_url:
29f7c58a 3657 # Skipping links to another channels, note that checking for
3658 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3659 # will not work
3660 if skip_channels and '/channels?' in shelf_url:
3661 return
052e1350 3662 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 3663 yield self.url_result(shelf_url, video_title=title)
3664 # Shelf may not contain shelf URL, fallback to extraction from content
3665 for entry in self._shelf_entries_from_content(shelf_renderer):
3666 yield entry
c5e8d7af 3667
8bdd16b4 3668 def _playlist_entries(self, video_list_renderer):
3669 for content in video_list_renderer['contents']:
3670 if not isinstance(content, dict):
3671 continue
3672 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3673 if not isinstance(renderer, dict):
3674 continue
3675 video_id = renderer.get('videoId')
3676 if not video_id:
3677 continue
3678 yield self._extract_video(renderer)
07aeced6 3679
3462ffa8 3680 def _rich_entries(self, rich_grid_renderer):
3681 renderer = try_get(
70d5c17b 3682 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3683 video_id = renderer.get('videoId')
3684 if not video_id:
3685 return
3686 yield self._extract_video(renderer)
3687
8bdd16b4 3688 def _video_entry(self, video_renderer):
3689 video_id = video_renderer.get('videoId')
3690 if video_id:
3691 return self._extract_video(video_renderer)
dacb3a86 3692
8bdd16b4 3693 def _post_thread_entries(self, post_thread_renderer):
3694 post_renderer = try_get(
3695 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3696 if not post_renderer:
3697 return
3698 # video attachment
3699 video_renderer = try_get(
895b0931 3700 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3701 video_id = video_renderer.get('videoId')
3702 if video_id:
3703 entry = self._extract_video(video_renderer)
8bdd16b4 3704 if entry:
3705 yield entry
895b0931 3706 # playlist attachment
3707 playlist_id = try_get(
3708 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3709 if playlist_id:
3710 yield self.url_result(
e28f1c0a 3711 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3712 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3713 # inline video links
3714 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3715 for run in runs:
3716 if not isinstance(run, dict):
3717 continue
3718 ep_url = try_get(
3719 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3720 if not ep_url:
3721 continue
3722 if not YoutubeIE.suitable(ep_url):
3723 continue
3724 ep_video_id = YoutubeIE._match_id(ep_url)
3725 if video_id == ep_video_id:
3726 continue
895b0931 3727 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3728
8bdd16b4 3729 def _post_thread_continuation_entries(self, post_thread_continuation):
3730 contents = post_thread_continuation.get('contents')
3731 if not isinstance(contents, list):
3732 return
3733 for content in contents:
3734 renderer = content.get('backstagePostThreadRenderer')
3735 if not isinstance(renderer, dict):
3736 continue
3737 for entry in self._post_thread_entries(renderer):
3738 yield entry
07aeced6 3739
39ed931e 3740 r''' # unused
3741 def _rich_grid_entries(self, contents):
3742 for content in contents:
3743 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3744 if video_renderer:
3745 entry = self._video_entry(video_renderer)
3746 if entry:
3747 yield entry
3748 '''
a6213a49 3749 def _extract_entries(self, parent_renderer, continuation_list):
3750 # continuation_list is modified in-place with continuation_list = [continuation_token]
3751 continuation_list[:] = [None]
3752 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3753 for content in contents:
3754 if not isinstance(content, dict):
3755 continue
3756 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3757 if not is_renderer:
3758 renderer = content.get('richItemRenderer')
3759 if renderer:
3760 for entry in self._rich_entries(renderer):
3761 yield entry
3762 continuation_list[0] = self._extract_continuation(parent_renderer)
3763 continue
3764 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3765 for isr_content in isr_contents:
3766 if not isinstance(isr_content, dict):
8bdd16b4 3767 continue
69184e41 3768
a6213a49 3769 known_renderers = {
3770 'playlistVideoListRenderer': self._playlist_entries,
3771 'gridRenderer': self._grid_entries,
3772 'shelfRenderer': lambda x: self._shelf_entries(x),
3773 'backstagePostThreadRenderer': self._post_thread_entries,
3774 'videoRenderer': lambda x: [self._video_entry(x)],
a61fd4cf 3775 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
3776 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
a6213a49 3777 }
3778 for key, renderer in isr_content.items():
3779 if key not in known_renderers:
3780 continue
3781 for entry in known_renderers[key](renderer):
3782 if entry:
3783 yield entry
3784 continuation_list[0] = self._extract_continuation(renderer)
3785 break
70d5c17b 3786
3787 if not continuation_list[0]:
a6213a49 3788 continuation_list[0] = self._extract_continuation(is_renderer)
3462ffa8 3789
a6213a49 3790 if not continuation_list[0]:
3791 continuation_list[0] = self._extract_continuation(parent_renderer)
3792
3793 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
3794 continuation_list = [None]
3795 extract_entries = lambda x: self._extract_entries(x, continuation_list)
29f7c58a 3796 tab_content = try_get(tab, lambda x: x['content'], dict)
3797 if not tab_content:
3798 return
3462ffa8 3799 parent_renderer = (
29f7c58a 3800 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3801 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3802 for entry in extract_entries(parent_renderer):
3803 yield entry
3462ffa8 3804 continuation = continuation_list[0]
d069eca7 3805
8bdd16b4 3806 for page_num in itertools.count(1):
3807 if not continuation:
3808 break
99e9e001 3809 headers = self.generate_api_headers(
3810 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 3811 response = self._extract_response(
3812 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3813 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3814 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3815
3816 if not response:
8bdd16b4 3817 break
ac56cf38 3818 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
3819 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
3820 visitor_data = self._extract_visitor_data(response) or visitor_data
ebf1b291 3821
69184e41 3822 known_continuation_renderers = {
3823 'playlistVideoListContinuation': self._playlist_entries,
3824 'gridContinuation': self._grid_entries,
3825 'itemSectionContinuation': self._post_thread_continuation_entries,
3826 'sectionListContinuation': extract_entries, # for feeds
3827 }
8bdd16b4 3828 continuation_contents = try_get(
69184e41 3829 response, lambda x: x['continuationContents'], dict) or {}
3830 continuation_renderer = None
3831 for key, value in continuation_contents.items():
3832 if key not in known_continuation_renderers:
3462ffa8 3833 continue
69184e41 3834 continuation_renderer = value
3835 continuation_list = [None]
3836 for entry in known_continuation_renderers[key](continuation_renderer):
3837 yield entry
3838 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3839 break
3840 if continuation_renderer:
3841 continue
c5e8d7af 3842
a1b535bd 3843 known_renderers = {
3844 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3845 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 3846 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 3847 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3848 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3849 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3850 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3851 }
cce889b9 3852 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3853 continuation_items = try_get(
cce889b9 3854 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3855 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3856 video_items_renderer = None
3857 for key, value in continuation_item.items():
3858 if key not in known_renderers:
8bdd16b4 3859 continue
a1b535bd 3860 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3861 continuation_list = [None]
a1b535bd 3862 for entry in known_renderers[key][0](video_items_renderer):
3863 yield entry
9ba5705a 3864 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3865 break
3866 if video_items_renderer:
3867 continue
8bdd16b4 3868 break
9558dcec 3869
8bdd16b4 3870 @staticmethod
3871 def _extract_selected_tab(tabs):
3872 for tab in tabs:
cd684175 3873 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3874 if renderer.get('selected') is True:
3875 return renderer
2b3c2546 3876 else:
8bdd16b4 3877 raise ExtractorError('Unable to find selected tab')
b82f815f 3878
47193e02 3879 @classmethod
3880 def _extract_uploader(cls, data):
8bdd16b4 3881 uploader = {}
47193e02 3882 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3883 owner = try_get(
3884 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3885 if owner:
3886 uploader['uploader'] = owner.get('text')
3887 uploader['uploader_id'] = try_get(
3888 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3889 uploader['uploader_url'] = urljoin(
3890 'https://www.youtube.com/',
3891 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3892 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3893
ac56cf38 3894 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
b60419c5 3895 playlist_id = title = description = channel_url = channel_name = channel_id = None
ac56cf38 3896 tags = []
b60419c5 3897
8bdd16b4 3898 selected_tab = self._extract_selected_tab(tabs)
f0d785d3 3899 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
8bdd16b4 3900 renderer = try_get(
3901 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3902 if renderer:
b60419c5 3903 channel_name = renderer.get('title')
3904 channel_url = renderer.get('channelUrl')
3905 channel_id = renderer.get('externalId')
39ed931e 3906 else:
64c0d954 3907 renderer = try_get(
3908 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3909
8bdd16b4 3910 if renderer:
3911 title = renderer.get('title')
ecc97af3 3912 description = renderer.get('description', '')
b60419c5 3913 playlist_id = channel_id
3914 tags = renderer.get('keywords', '').split()
b60419c5 3915
a709d873 3916 thumbnails = (
3917 self._extract_thumbnails(renderer, 'avatar')
3918 or self._extract_thumbnails(
f0d785d3 3919 primary_sidebar_renderer, ('thumbnailRenderer', 'playlistVideoThumbnailRenderer', 'thumbnail')))
a709d873 3920
3462ffa8 3921 if playlist_id is None:
70d5c17b 3922 playlist_id = item_id
f0d785d3 3923
3924 playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')
3925 last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)
70d5c17b 3926 if title is None:
f0d785d3 3927 title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id
b60419c5 3928 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3929 title += format_field(selected_tab, 'expandedText', ' - %s')
f0d785d3 3930
b60419c5 3931 metadata = {
3932 'playlist_id': playlist_id,
3933 'playlist_title': title,
3934 'playlist_description': description,
3935 'uploader': channel_name,
3936 'uploader_id': channel_id,
3937 'uploader_url': channel_url,
3938 'thumbnails': thumbnails,
3939 'tags': tags,
f0d785d3 3940 'view_count': self._get_count(playlist_stats, 1),
3941 'availability': self._extract_availability(data),
3942 'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),
3943 'playlist_count': self._get_count(playlist_stats, 0)
b60419c5 3944 }
3945 if not channel_id:
3946 metadata.update(self._extract_uploader(data))
3947 metadata.update({
3948 'channel': metadata['uploader'],
3949 'channel_id': metadata['uploader_id'],
3950 'channel_url': metadata['uploader_url']})
3951 return self.playlist_result(
d069eca7 3952 self._entries(
ac56cf38 3953 selected_tab, playlist_id, ytcfg,
3954 self._extract_account_syncid(ytcfg, data),
3955 self._extract_visitor_data(data, ytcfg)),
b60419c5 3956 **metadata)
73c4ac2c 3957
ac56cf38 3958 def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
3959 first_id = last_id = response = None
2be71994 3960 for page_num in itertools.count(1):
cd7c66cf 3961 videos = list(self._playlist_entries(playlist))
3962 if not videos:
3963 return
2be71994 3964 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3965 if start >= len(videos):
3966 return
3967 for video in videos[start:]:
3968 if video['id'] == first_id:
3969 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3970 return
3971 yield video
3972 first_id = first_id or videos[0]['id']
3973 last_id = videos[-1]['id']
79360d99 3974 watch_endpoint = try_get(
3975 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
ac56cf38 3976 headers = self.generate_api_headers(
3977 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3978 visitor_data=self._extract_visitor_data(response, data, ytcfg))
79360d99 3979 query = {
3980 'playlistId': playlist_id,
3981 'videoId': watch_endpoint.get('videoId') or last_id,
3982 'index': watch_endpoint.get('index') or len(videos),
3983 'params': watch_endpoint.get('params') or 'OAE%3D'
3984 }
3985 response = self._extract_response(
3986 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 3987 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 3988 check_get_keys='contents'
3989 )
cd7c66cf 3990 playlist = try_get(
79360d99 3991 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3992
ac56cf38 3993 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
8bdd16b4 3994 title = playlist.get('title') or try_get(
3995 data, lambda x: x['titleText']['simpleText'], compat_str)
3996 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3997
3998 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3999 playlist_url = urljoin(url, try_get(
4000 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4001 compat_str))
4002 if playlist_url and playlist_url != url:
4003 return self.url_result(
4004 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4005 video_title=title)
cd7c66cf 4006
8bdd16b4 4007 return self.playlist_result(
ac56cf38 4008 self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
cd7c66cf 4009 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4010
47193e02 4011 def _extract_availability(self, data):
4012 """
4013 Gets the availability of a given playlist/tab.
4014 Note: Unless YouTube tells us explicitly, we do not assume it is public
4015 @param data: response
4016 """
4017 is_private = is_unlisted = None
4018 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4019 badge_labels = self._extract_badges(renderer)
4020
4021 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4022 privacy_dropdown_entries = try_get(
4023 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4024 for renderer_dict in privacy_dropdown_entries:
4025 is_selected = try_get(
4026 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4027 if not is_selected:
4028 continue
052e1350 4029 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
47193e02 4030 if label:
4031 badge_labels.add(label.lower())
4032 break
4033
4034 for badge_label in badge_labels:
4035 if badge_label == 'unlisted':
4036 is_unlisted = True
4037 elif badge_label == 'private':
4038 is_private = True
4039 elif badge_label == 'public':
4040 is_unlisted = is_private = False
4041 return self._availability(is_private, False, False, False, is_unlisted)
4042
4043 @staticmethod
4044 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4045 sidebar_renderer = try_get(
4046 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4047 for item in sidebar_renderer:
4048 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4049 if renderer:
4050 return renderer
4051
ac56cf38 4052 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
358de58c 4053 """
4054 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4055 """
5d342002 4056 browse_id = params = None
47193e02 4057 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4058 if not renderer:
4059 return
4060 menu_renderer = try_get(
4061 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4062 for menu_item in menu_renderer:
4063 if not isinstance(menu_item, dict):
358de58c 4064 continue
47193e02 4065 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4066 text = try_get(
4067 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4068 if not text or text.lower() != 'show unavailable videos':
4069 continue
4070 browse_endpoint = try_get(
4071 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4072 browse_id = browse_endpoint.get('browseId')
4073 params = browse_endpoint.get('params')
4074 break
5d342002 4075
11f9be09 4076 headers = self.generate_api_headers(
99e9e001 4077 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
ac56cf38 4078 visitor_data=self._extract_visitor_data(data, ytcfg))
47193e02 4079 query = {
4080 'params': params or 'wgYCCAA=',
4081 'browseId': browse_id or 'VL%s' % item_id
4082 }
4083 return self._extract_response(
4084 item_id=item_id, headers=headers, query=query,
fe93e2c4 4085 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4086 note='Downloading API JSON with unavailable videos')
358de58c 4087
ac56cf38 4088 def _extract_webpage(self, url, item_id, fatal=True):
a06916d9 4089 retries = self.get_param('extractor_retries', 3)
62bff2c1 4090 count = -1
ac56cf38 4091 webpage = data = last_error = None
14fdfea9 4092 while count < retries:
62bff2c1 4093 count += 1
14fdfea9 4094 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 4095 # See: https://github.com/yt-dlp/yt-dlp/issues/116
ac56cf38 4096 if last_error:
c705177d 4097 self.report_warning('%s. Retrying ...' % last_error)
ac56cf38 4098 try:
4099 webpage = self._download_webpage(
4100 url, item_id,
4101 note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
4102 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4103 except ExtractorError as e:
4104 if isinstance(e.cause, network_exceptions):
4105 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
4106 last_error = error_to_compat_str(e.cause or e.msg)
4107 if count < retries:
4108 continue
4109 if fatal:
4110 raise
4111 self.report_warning(error_to_compat_str(e))
14fdfea9 4112 break
ac56cf38 4113 else:
4114 try:
4115 self._extract_and_report_alerts(data)
4116 except ExtractorError as e:
4117 if fatal:
4118 raise
4119 self.report_warning(error_to_compat_str(e))
4120 break
4121
4122 if dict_get(data, ('contents', 'currentVideoEndpoint')):
4123 break
4124
4125 last_error = 'Incomplete yt initial data received'
4126 if count >= retries:
4127 if fatal:
4128 raise ExtractorError(last_error)
4129 self.report_warning(last_error)
4130 break
4131
cd7c66cf 4132 return webpage, data
4133
ac56cf38 4134 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4135 data = None
4136 if 'webpage' not in self._configuration_arg('skip'):
4137 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4138 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
4139 if not data:
4140 if not ytcfg and self.is_authenticated:
4141 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
4142 if 'authcheck' not in self._configuration_arg('skip') and fatal:
4143 raise ExtractorError(
4144 msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
4145 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4146 expected=True)
4147 self.report_warning(msg, only_once=True)
4148 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4149 return data, ytcfg
4150
4151 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4152 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4153 resolve_response = self._extract_response(
4154 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4155 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4156 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4157 for ep_key, ep in endpoints.items():
4158 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4159 if params:
4160 return self._extract_response(
4161 item_id=item_id, query=params, ep=ep, headers=headers,
4162 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
4163 check_get_keys=('contents', 'currentVideoEndpoint'))
4164 err_note = 'Failed to resolve url (does the playlist exist?)'
4165 if fatal:
4166 raise ExtractorError(err_note, expected=True)
4167 self.report_warning(err_note, item_id)
4168
a6213a49 4169 @staticmethod
4170 def _smuggle_data(entries, data):
4171 for entry in entries:
4172 if data:
4173 entry['url'] = smuggle_url(entry['url'], data)
4174 yield entry
4175
4176 _SEARCH_PARAMS = None
4177
4178 def _search_results(self, query, params=NO_DEFAULT):
4179 data = {'query': query}
4180 if params is NO_DEFAULT:
4181 params = self._SEARCH_PARAMS
4182 if params:
4183 data['params'] = params
a61fd4cf 4184 continuation_list = [None]
a6213a49 4185 for page_num in itertools.count(1):
a61fd4cf 4186 data.update(continuation_list[0] or {})
a6213a49 4187 search = self._extract_response(
4188 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
a61fd4cf 4189 check_get_keys=('contents', 'onResponseReceivedCommands'))
a6213a49 4190 slr_contents = try_get(
4191 search,
4192 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4193 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4194 list)
a61fd4cf 4195 yield from self._extract_entries({'contents': slr_contents}, continuation_list)
4196 if not continuation_list[0]:
a6213a49 4197 break
4198
4199
4200class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
4201 IE_DESC = 'YouTube Tabs'
4202 _VALID_URL = r'''(?x:
4203 https?://
4204 (?:\w+\.)?
4205 (?:
4206 youtube(?:kids)?\.com|
4207 %(invidious)s
4208 )/
4209 (?:
4210 (?P<channel_type>channel|c|user|browse)/|
4211 (?P<not_channel>
4212 feed/|hashtag/|
4213 (?:playlist|watch)\?.*?\blist=
4214 )|
4215 (?!(?:%(reserved_names)s)\b) # Direct URLs
4216 )
4217 (?P<id>[^/?\#&]+)
4218 )''' % {
4219 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
4220 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4221 }
4222 IE_NAME = 'youtube:tab'
4223
4224 _TESTS = [{
4225 'note': 'playlists, multipage',
4226 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
4227 'playlist_mincount': 94,
4228 'info_dict': {
4229 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4230 'title': 'Igor Kleiner - Playlists',
a6213a49 4231 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
976ae3ea 4232 'uploader': 'Igor Kleiner',
a6213a49 4233 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4234 'channel': 'Igor Kleiner',
4235 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4236 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4237 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4238 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
a6213a49 4239 },
4240 }, {
4241 'note': 'playlists, multipage, different order',
4242 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
4243 'playlist_mincount': 94,
4244 'info_dict': {
4245 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4246 'title': 'Igor Kleiner - Playlists',
a6213a49 4247 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4248 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4249 'uploader': 'Igor Kleiner',
4250 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4251 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4252 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4253 'channel': 'Igor Kleiner',
4254 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
a6213a49 4255 },
4256 }, {
4257 'note': 'playlists, series',
4258 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
4259 'playlist_mincount': 5,
4260 'info_dict': {
4261 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4262 'title': '3Blue1Brown - Playlists',
4263 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4264 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4265 'uploader': '3Blue1Brown',
976ae3ea 4266 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4267 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4268 'channel': '3Blue1Brown',
4269 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
4270 'tags': ['Mathematics'],
a6213a49 4271 },
4272 }, {
4273 'note': 'playlists, singlepage',
4274 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
4275 'playlist_mincount': 4,
4276 'info_dict': {
4277 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4278 'title': 'ThirstForScience - Playlists',
4279 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
4280 'uploader': 'ThirstForScience',
4281 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
976ae3ea 4282 'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4283 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4284 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4285 'tags': 'count:13',
4286 'channel': 'ThirstForScience',
a6213a49 4287 }
4288 }, {
4289 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
4290 'only_matching': True,
4291 }, {
4292 'note': 'basic, single video playlist',
4293 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4294 'info_dict': {
4295 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4296 'uploader': 'Sergey M.',
4297 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4298 'title': 'youtube-dl public playlist',
976ae3ea 4299 'description': '',
4300 'tags': [],
4301 'view_count': int,
4302 'modified_date': '20201130',
4303 'channel': 'Sergey M.',
4304 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4305 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4306 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
a6213a49 4307 },
4308 'playlist_count': 1,
4309 }, {
4310 'note': 'empty playlist',
4311 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4312 'info_dict': {
4313 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4314 'uploader': 'Sergey M.',
4315 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4316 'title': 'youtube-dl empty playlist',
976ae3ea 4317 'tags': [],
4318 'channel': 'Sergey M.',
4319 'description': '',
4320 'modified_date': '20160902',
4321 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4322 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4323 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
a6213a49 4324 },
4325 'playlist_count': 0,
4326 }, {
4327 'note': 'Home tab',
4328 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
4329 'info_dict': {
4330 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4331 'title': 'lex will - Home',
4332 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4333 'uploader': 'lex will',
4334 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4335 'channel': 'lex will',
4336 'tags': ['bible', 'history', 'prophesy'],
4337 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4338 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4339 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
a6213a49 4340 },
4341 'playlist_mincount': 2,
4342 }, {
4343 'note': 'Videos tab',
4344 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
4345 'info_dict': {
4346 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4347 'title': 'lex will - Videos',
4348 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4349 'uploader': 'lex will',
4350 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4351 'tags': ['bible', 'history', 'prophesy'],
4352 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4353 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4354 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4355 'channel': 'lex will',
a6213a49 4356 },
4357 'playlist_mincount': 975,
4358 }, {
4359 'note': 'Videos tab, sorted by popular',
4360 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
4361 'info_dict': {
4362 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4363 'title': 'lex will - Videos',
4364 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4365 'uploader': 'lex will',
4366 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4367 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4368 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4369 'channel': 'lex will',
4370 'tags': ['bible', 'history', 'prophesy'],
4371 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
a6213a49 4372 },
4373 'playlist_mincount': 199,
4374 }, {
4375 'note': 'Playlists tab',
4376 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
4377 'info_dict': {
4378 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4379 'title': 'lex will - Playlists',
4380 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4381 'uploader': 'lex will',
4382 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4383 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4384 'channel': 'lex will',
4385 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4386 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4387 'tags': ['bible', 'history', 'prophesy'],
a6213a49 4388 },
4389 'playlist_mincount': 17,
4390 }, {
4391 'note': 'Community tab',
4392 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
4393 'info_dict': {
4394 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4395 'title': 'lex will - Community',
4396 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4397 'uploader': 'lex will',
4398 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4399 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4400 'channel': 'lex will',
4401 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4402 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4403 'tags': ['bible', 'history', 'prophesy'],
a6213a49 4404 },
4405 'playlist_mincount': 18,
4406 }, {
4407 'note': 'Channels tab',
4408 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
4409 'info_dict': {
4410 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4411 'title': 'lex will - Channels',
4412 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4413 'uploader': 'lex will',
4414 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4415 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4416 'channel': 'lex will',
4417 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4418 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4419 'tags': ['bible', 'history', 'prophesy'],
a6213a49 4420 },
4421 'playlist_mincount': 12,
4422 }, {
4423 'note': 'Search tab',
4424 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
4425 'playlist_mincount': 40,
4426 'info_dict': {
4427 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4428 'title': '3Blue1Brown - Search - linear algebra',
4429 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4430 'uploader': '3Blue1Brown',
4431 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
976ae3ea 4432 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4433 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4434 'tags': ['Mathematics'],
4435 'channel': '3Blue1Brown',
4436 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
a6213a49 4437 },
4438 }, {
4439 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4440 'only_matching': True,
4441 }, {
4442 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4443 'only_matching': True,
4444 }, {
4445 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4446 'only_matching': True,
4447 }, {
4448 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
4449 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4450 'info_dict': {
4451 'title': '29C3: Not my department',
4452 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4453 'uploader': 'Christiaan008',
4454 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4455 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
976ae3ea 4456 'tags': [],
4457 'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
4458 'view_count': int,
4459 'modified_date': '20150605',
4460 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4461 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
4462 'channel': 'Christiaan008',
a6213a49 4463 },
4464 'playlist_count': 96,
4465 }, {
4466 'note': 'Large playlist',
4467 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
4468 'info_dict': {
4469 'title': 'Uploads from Cauchemar',
4470 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
4471 'uploader': 'Cauchemar',
4472 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
976ae3ea 4473 'channel_url': 'https://www.youtube.com/c/Cauchemar89',
4474 'tags': [],
4475 'modified_date': r're:\d{8}',
4476 'channel': 'Cauchemar',
4477 'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
4478 'view_count': int,
4479 'description': '',
4480 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
a6213a49 4481 },
4482 'playlist_mincount': 1123,
976ae3ea 4483 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 4484 }, {
4485 'note': 'even larger playlist, 8832 videos',
4486 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
4487 'only_matching': True,
4488 }, {
4489 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
4490 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
4491 'info_dict': {
4492 'title': 'Uploads from Interstellar Movie',
4493 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4494 'uploader': 'Interstellar Movie',
4495 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
976ae3ea 4496 'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
4497 'tags': [],
4498 'view_count': int,
4499 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4500 'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
4501 'channel': 'Interstellar Movie',
4502 'description': '',
4503 'modified_date': r're:\d{8}',
a6213a49 4504 },
4505 'playlist_mincount': 21,
4506 }, {
4507 'note': 'Playlist with "show unavailable videos" button',
4508 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
4509 'info_dict': {
4510 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
4511 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
4512 'uploader': 'Phim Siêu Nhân Nhật Bản',
4513 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
976ae3ea 4514 'view_count': int,
4515 'channel': 'Phim Siêu Nhân Nhật Bản',
4516 'tags': [],
4517 'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
4518 'description': '',
4519 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
4520 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
4521 'modified_date': r're:\d{8}',
a6213a49 4522 },
4523 'playlist_mincount': 200,
976ae3ea 4524 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 4525 }, {
4526 'note': 'Playlist with unavailable videos in page 7',
4527 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
4528 'info_dict': {
4529 'title': 'Uploads from BlankTV',
4530 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
4531 'uploader': 'BlankTV',
4532 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
976ae3ea 4533 'channel': 'BlankTV',
4534 'channel_url': 'https://www.youtube.com/c/blanktv',
4535 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
4536 'view_count': int,
4537 'tags': [],
4538 'uploader_url': 'https://www.youtube.com/c/blanktv',
4539 'modified_date': r're:\d{8}',
4540 'description': '',
a6213a49 4541 },
4542 'playlist_mincount': 1000,
976ae3ea 4543 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 4544 }, {
4545 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
4546 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4547 'info_dict': {
4548 'title': 'Data Analysis with Dr Mike Pound',
4549 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4550 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
4551 'uploader': 'Computerphile',
4552 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
976ae3ea 4553 'uploader_url': 'https://www.youtube.com/user/Computerphile',
4554 'tags': [],
4555 'view_count': int,
4556 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
4557 'channel_url': 'https://www.youtube.com/user/Computerphile',
4558 'channel': 'Computerphile',
a6213a49 4559 },
4560 'playlist_mincount': 11,
4561 }, {
4562 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4563 'only_matching': True,
4564 }, {
4565 'note': 'Playlist URL that does not actually serve a playlist',
4566 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
4567 'info_dict': {
4568 'id': 'FqZTN594JQw',
4569 'ext': 'webm',
4570 'title': "Smiley's People 01 detective, Adventure Series, Action",
4571 'uploader': 'STREEM',
4572 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
4573 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
4574 'upload_date': '20150526',
4575 'license': 'Standard YouTube License',
4576 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
4577 'categories': ['People & Blogs'],
4578 'tags': list,
4579 'view_count': int,
4580 'like_count': int,
a6213a49 4581 },
4582 'params': {
4583 'skip_download': True,
4584 },
4585 'skip': 'This video is not available.',
4586 'add_ie': [YoutubeIE.ie_key()],
4587 }, {
4588 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
4589 'only_matching': True,
4590 }, {
4591 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
4592 'only_matching': True,
4593 }, {
4594 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
4595 'info_dict': {
976ae3ea 4596 'id': 'zpsbVPFwsqk', # This will keep changing
a6213a49 4597 'ext': 'mp4',
976ae3ea 4598 'title': str,
a6213a49 4599 'uploader': 'Sky News',
4600 'uploader_id': 'skynews',
4601 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
4602 'upload_date': r're:\d{8}',
976ae3ea 4603 'description': str,
a6213a49 4604 'categories': ['News & Politics'],
4605 'tags': list,
4606 'like_count': int,
976ae3ea 4607 'release_timestamp': 1640164857,
4608 'channel': 'Sky News',
4609 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
4610 'age_limit': 0,
4611 'view_count': int,
4612 'thumbnail': 'https://i.ytimg.com/vi/zpsbVPFwsqk/maxresdefault_live.jpg',
4613 'playable_in_embed': True,
4614 'release_date': '20211222',
4615 'availability': 'public',
4616 'live_status': 'is_live',
4617 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
a6213a49 4618 },
4619 'params': {
4620 'skip_download': True,
4621 },
976ae3ea 4622 'expected_warnings': ['Ignoring subtitle tracks found in '],
a6213a49 4623 }, {
4624 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
4625 'info_dict': {
4626 'id': 'a48o2S1cPoo',
4627 'ext': 'mp4',
4628 'title': 'The Young Turks - Live Main Show',
4629 'uploader': 'The Young Turks',
4630 'uploader_id': 'TheYoungTurks',
4631 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
4632 'upload_date': '20150715',
4633 'license': 'Standard YouTube License',
4634 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
4635 'categories': ['News & Politics'],
4636 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
4637 'like_count': int,
a6213a49 4638 },
4639 'params': {
4640 'skip_download': True,
4641 },
4642 'only_matching': True,
4643 }, {
4644 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
4645 'only_matching': True,
4646 }, {
4647 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
4648 'only_matching': True,
4649 }, {
4650 'note': 'A channel that is not live. Should raise error',
4651 'url': 'https://www.youtube.com/user/numberphile/live',
4652 'only_matching': True,
4653 }, {
4654 'url': 'https://www.youtube.com/feed/trending',
4655 'only_matching': True,
4656 }, {
4657 'url': 'https://www.youtube.com/feed/library',
4658 'only_matching': True,
4659 }, {
4660 'url': 'https://www.youtube.com/feed/history',
4661 'only_matching': True,
4662 }, {
4663 'url': 'https://www.youtube.com/feed/subscriptions',
4664 'only_matching': True,
4665 }, {
4666 'url': 'https://www.youtube.com/feed/watch_later',
4667 'only_matching': True,
4668 }, {
4669 'note': 'Recommended - redirects to home page.',
4670 'url': 'https://www.youtube.com/feed/recommended',
4671 'only_matching': True,
4672 }, {
4673 'note': 'inline playlist with not always working continuations',
4674 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
4675 'only_matching': True,
4676 }, {
4677 'url': 'https://www.youtube.com/course',
4678 'only_matching': True,
4679 }, {
4680 'url': 'https://www.youtube.com/zsecurity',
4681 'only_matching': True,
4682 }, {
4683 'url': 'http://www.youtube.com/NASAgovVideo/videos',
4684 'only_matching': True,
4685 }, {
4686 'url': 'https://www.youtube.com/TheYoungTurks/live',
4687 'only_matching': True,
4688 }, {
4689 'url': 'https://www.youtube.com/hashtag/cctv9',
4690 'info_dict': {
4691 'id': 'cctv9',
4692 'title': '#cctv9',
976ae3ea 4693 'tags': [],
a6213a49 4694 },
4695 'playlist_mincount': 350,
4696 }, {
4697 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
4698 'only_matching': True,
4699 }, {
4700 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
4701 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4702 'only_matching': True
4703 }, {
4704 'note': '/browse/ should redirect to /channel/',
4705 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
4706 'only_matching': True
4707 }, {
4708 'note': 'VLPL, should redirect to playlist?list=PL...',
4709 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4710 'info_dict': {
4711 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4712 'uploader': 'NoCopyrightSounds',
4713 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
4714 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
4715 'title': 'NCS Releases',
976ae3ea 4716 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
4717 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
4718 'modified_date': r're:\d{8}',
4719 'view_count': int,
4720 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
4721 'tags': [],
4722 'channel': 'NoCopyrightSounds',
a6213a49 4723 },
4724 'playlist_mincount': 166,
976ae3ea 4725 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 4726 }, {
4727 'note': 'Topic, should redirect to playlist?list=UU...',
4728 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4729 'info_dict': {
4730 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4731 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4732 'title': 'Uploads from Royalty Free Music - Topic',
4733 'uploader': 'Royalty Free Music - Topic',
976ae3ea 4734 'tags': [],
4735 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4736 'channel': 'Royalty Free Music - Topic',
4737 'view_count': int,
4738 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
4739 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
4740 'modified_date': r're:\d{8}',
4741 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
4742 'description': '',
a6213a49 4743 },
4744 'expected_warnings': [
a6213a49 4745 'The URL does not have a videos tab',
976ae3ea 4746 r'[Uu]navailable videos (are|will be) hidden',
a6213a49 4747 ],
4748 'playlist_mincount': 101,
4749 }, {
4750 'note': 'Topic without a UU playlist',
4751 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
4752 'info_dict': {
4753 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
4754 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
976ae3ea 4755 'tags': [],
a6213a49 4756 },
4757 'expected_warnings': [
976ae3ea 4758 'the playlist redirect gave error',
a6213a49 4759 ],
4760 'playlist_mincount': 9,
4761 }, {
4762 'note': 'Youtube music Album',
4763 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
4764 'info_dict': {
4765 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
4766 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
976ae3ea 4767 'tags': [],
4768 'view_count': int,
4769 'description': '',
4770 'availability': 'unlisted',
4771 'modified_date': r're:\d{8}',
a6213a49 4772 },
4773 'playlist_count': 50,
4774 }, {
4775 'note': 'unlisted single video playlist',
4776 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4777 'info_dict': {
4778 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
4779 'uploader': 'colethedj',
4780 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4781 'title': 'yt-dlp unlisted playlist test',
976ae3ea 4782 'availability': 'unlisted',
4783 'tags': [],
4784 'modified_date': '20211208',
4785 'channel': 'colethedj',
4786 'view_count': int,
4787 'description': '',
4788 'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
4789 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
4790 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
a6213a49 4791 },
4792 'playlist_count': 1,
4793 }, {
4794 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
4795 'url': 'https://www.youtube.com/feed/recommended',
4796 'info_dict': {
4797 'id': 'recommended',
4798 'title': 'recommended',
4799 },
4800 'playlist_mincount': 50,
4801 'params': {
4802 'skip_download': True,
4803 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4804 },
4805 }, {
4806 'note': 'API Fallback: /videos tab, sorted by oldest first',
4807 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
4808 'info_dict': {
4809 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4810 'title': 'Cody\'sLab - Videos',
4811 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
4812 'uploader': 'Cody\'sLab',
4813 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
976ae3ea 4814 'channel': 'Cody\'sLab',
4815 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4816 'tags': [],
4817 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
4818 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
a6213a49 4819 },
4820 'playlist_mincount': 650,
4821 'params': {
4822 'skip_download': True,
4823 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4824 },
4825 }, {
4826 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
4827 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4828 'info_dict': {
4829 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4830 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4831 'title': 'Uploads from Royalty Free Music - Topic',
4832 'uploader': 'Royalty Free Music - Topic',
976ae3ea 4833 'modified_date': r're:\d{8}',
4834 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4835 'description': '',
4836 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
4837 'tags': [],
4838 'channel': 'Royalty Free Music - Topic',
4839 'view_count': int,
4840 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
a6213a49 4841 },
4842 'expected_warnings': [
976ae3ea 4843 'does not have a videos tab',
4844 r'[Uu]navailable videos (are|will be) hidden',
a6213a49 4845 ],
4846 'playlist_mincount': 101,
4847 'params': {
4848 'skip_download': True,
4849 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4850 },
4851 }]
4852
4853 @classmethod
4854 def suitable(cls, url):
4855 return False if YoutubeIE.suitable(url) else super(
4856 YoutubeTabIE, cls).suitable(url)
9297939e 4857
cd7c66cf 4858 def _real_extract(self, url):
9297939e 4859 url, smuggled_data = unsmuggle_url(url, {})
4860 if self.is_music_url(url):
4861 smuggled_data['is_music_url'] = True
fe03a6cd 4862 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4863 if info_dict.get('entries'):
4864 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4865 return info_dict
4866
37e57a9f 4867 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$')
fe03a6cd 4868
4869 def __real_extract(self, url, smuggled_data):
cd7c66cf 4870 item_id = self._match_id(url)
4871 url = compat_urlparse.urlunparse(
4872 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4873 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4874
fe03a6cd 4875 def get_mobj(url):
37e57a9f 4876 mobj = self._URL_RE.match(url).groupdict()
07cce701 4877 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4878 return mobj
4879
37e57a9f 4880 mobj, redirect_warning = get_mobj(url), None
fe03a6cd 4881 # Youtube returns incomplete data if tabname is not lower case
4882 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
fe03a6cd 4883 if is_channel:
4884 if smuggled_data.get('is_music_url'):
37e57a9f 4885 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
fe03a6cd 4886 item_id = item_id[2:]
37e57a9f 4887 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
4888 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
ac56cf38 4889 mdata = self._extract_tab_endpoint(
37e57a9f 4890 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
4891 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
4892 get_all=False, expected_type=compat_str)
ac56cf38 4893 if not murl:
37e57a9f 4894 raise ExtractorError('Failed to resolve album to playlist')
ac56cf38 4895 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
37e57a9f 4896 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
4897 pre = f'https://www.youtube.com/channel/{item_id}'
4898
fe03a6cd 4899 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4900 # Home URLs should redirect to /videos/
37e57a9f 4901 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
4902 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4903 tab = '/videos'
4904
4905 url = ''.join((pre, tab, post))
4906 mobj = get_mobj(url)
cd7c66cf 4907
4908 # Handle both video/playlist URLs
201c1459 4909 qs = parse_qs(url)
37e57a9f 4910 video_id, playlist_id = [qs.get(key, [None])[0] for key in ('v', 'list')]
cd7c66cf 4911
fe03a6cd 4912 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4913 if not playlist_id:
fe03a6cd 4914 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4915 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4916 # Common mistake: https://www.youtube.com/watch?list=playlist_id
37e57a9f 4917 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
4918 url = f'https://www.youtube.com/playlist?list={playlist_id}'
18db7548 4919 mobj = get_mobj(url)
cd7c66cf 4920
4921 if video_id and playlist_id:
a06916d9 4922 if self.get_param('noplaylist'):
37e57a9f 4923 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
4924 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
4925 ie=YoutubeIE.ie_key(), video_id=video_id)
4926 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
cd7c66cf 4927
ac56cf38 4928 data, ytcfg = self._extract_data(url, item_id)
14fdfea9 4929
37e57a9f 4930 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
18db7548 4931 if tabs:
4932 selected_tab = self._extract_selected_tab(tabs)
4933 tab_name = selected_tab.get('title', '')
09f1580e 4934 if 'no-youtube-channel-redirect' not in compat_opts:
4935 if mobj['tab'] == '/live':
4936 # Live tab should have redirected to the video
4937 raise ExtractorError('The channel is not currently live', expected=True)
4938 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
37e57a9f 4939 redirect_warning = f'The URL does not have a {mobj["tab"][1:]} tab'
09f1580e 4940 if not mobj['not_channel'] and item_id[:2] == 'UC':
4941 # Topic channels don't have /videos. Use the equivalent playlist instead
37e57a9f 4942 pl_id = f'UU{item_id[2:]}'
4943 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
09f1580e 4944 try:
37e57a9f 4945 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True)
09f1580e 4946 except ExtractorError:
37e57a9f 4947 redirect_warning += ' and the playlist redirect gave error'
4948 else:
4949 item_id, url, tab_name = pl_id, pl_url, mobj['tab'][1:]
4950 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
4951 if tab_name.lower() != mobj['tab'][1:]:
4952 redirect_warning += f'. {tab_name} tab is being downloaded instead'
18db7548 4953
37e57a9f 4954 if redirect_warning:
4955 self.report_warning(redirect_warning)
4956 self.write_debug(f'Final URL: {url}')
18db7548 4957
358de58c 4958 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4959 if 'no-youtube-unavailable-videos' not in compat_opts:
ac56cf38 4960 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
c0ac49bc 4961 self._extract_and_report_alerts(data, only_once=True)
37e57a9f 4962 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
8bdd16b4 4963 if tabs:
ac56cf38 4964 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
cd7c66cf 4965
37e57a9f 4966 playlist = traverse_obj(
4967 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
8bdd16b4 4968 if playlist:
ac56cf38 4969 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
cd7c66cf 4970
37e57a9f 4971 video_id = traverse_obj(
4972 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
8bdd16b4 4973 if video_id:
09f1580e 4974 if mobj['tab'] != '/live': # live tab is expected to redirect to video
37e57a9f 4975 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
4976 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
4977 ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4978
8bdd16b4 4979 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4980
c5e8d7af 4981
8bdd16b4 4982class YoutubePlaylistIE(InfoExtractor):
96565c7e 4983 IE_DESC = 'YouTube playlists'
8bdd16b4 4984 _VALID_URL = r'''(?x)(?:
4985 (?:https?://)?
4986 (?:\w+\.)?
4987 (?:
4988 (?:
4989 youtube(?:kids)?\.com|
d9190e44 4990 %(invidious)s
8bdd16b4 4991 )
4992 /.*?\?.*?\blist=
4993 )?
4994 (?P<id>%(playlist_id)s)
d9190e44
RH
4995 )''' % {
4996 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
4997 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4998 }
8bdd16b4 4999 IE_NAME = 'youtube:playlist'
cdc628a4 5000 _TESTS = [{
8bdd16b4 5001 'note': 'issue #673',
5002 'url': 'PLBB231211A4F62143',
cdc628a4 5003 'info_dict': {
8bdd16b4 5004 'title': '[OLD]Team Fortress 2 (Class-based LP)',
5005 'id': 'PLBB231211A4F62143',
976ae3ea 5006 'uploader': 'Wickman',
8bdd16b4 5007 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 5008 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
976ae3ea 5009 'view_count': int,
5010 'uploader_url': 'https://www.youtube.com/user/Wickydoo',
5011 'modified_date': r're:\d{8}',
5012 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5013 'channel': 'Wickman',
5014 'tags': [],
5015 'channel_url': 'https://www.youtube.com/user/Wickydoo',
8bdd16b4 5016 },
5017 'playlist_mincount': 29,
5018 }, {
5019 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5020 'info_dict': {
5021 'title': 'YDL_safe_search',
5022 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5023 },
5024 'playlist_count': 2,
5025 'skip': 'This playlist is private',
9558dcec 5026 }, {
8bdd16b4 5027 'note': 'embedded',
5028 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5029 'playlist_count': 4,
9558dcec 5030 'info_dict': {
8bdd16b4 5031 'title': 'JODA15',
5032 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5033 'uploader': 'milan',
5034 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
976ae3ea 5035 'description': '',
5036 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5037 'tags': [],
5038 'modified_date': '20140919',
5039 'view_count': int,
5040 'channel': 'milan',
5041 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5042 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5043 },
5044 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
cdc628a4 5045 }, {
8bdd16b4 5046 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
11f9be09 5047 'playlist_mincount': 654,
8bdd16b4 5048 'info_dict': {
5049 'title': '2018 Chinese New Singles (11/6 updated)',
5050 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5051 'uploader': 'LBK',
5052 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 5053 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
976ae3ea 5054 'channel': 'LBK',
5055 'view_count': int,
5056 'channel_url': 'https://www.youtube.com/c/愛低音的國王',
5057 'tags': [],
5058 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
5059 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5060 'modified_date': r're:\d{8}',
5061 },
5062 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
daa0df9e 5063 }, {
29f7c58a 5064 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
5065 'only_matching': True,
5066 }, {
5067 # music album playlist
5068 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
5069 'only_matching': True,
5070 }]
5071
5072 @classmethod
5073 def suitable(cls, url):
201c1459 5074 if YoutubeTabIE.suitable(url):
5075 return False
49a57e70 5076 from ..utils import parse_qs
201c1459 5077 qs = parse_qs(url)
5078 if qs.get('v', [None])[0]:
5079 return False
5080 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 5081
5082 def _real_extract(self, url):
5083 playlist_id = self._match_id(url)
46953e7e 5084 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 5085 url = update_url_query(
5086 'https://www.youtube.com/playlist',
5087 parse_qs(url) or {'list': playlist_id})
5088 if is_music_url:
5089 url = smuggle_url(url, {'is_music_url': True})
5090 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 5091
5092
5093class YoutubeYtBeIE(InfoExtractor):
c76eb41b 5094 IE_DESC = 'youtu.be'
29f7c58a 5095 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
5096 _TESTS = [{
8bdd16b4 5097 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
5098 'info_dict': {
5099 'id': 'yeWKywCrFtk',
5100 'ext': 'mp4',
5101 'title': 'Small Scale Baler and Braiding Rugs',
5102 'uploader': 'Backus-Page House Museum',
5103 'uploader_id': 'backuspagemuseum',
5104 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
5105 'upload_date': '20161008',
5106 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
5107 'categories': ['Nonprofits & Activism'],
5108 'tags': list,
5109 'like_count': int,
976ae3ea 5110 'age_limit': 0,
5111 'playable_in_embed': True,
5112 'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
5113 'channel': 'Backus-Page House Museum',
5114 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
5115 'live_status': 'not_live',
5116 'view_count': int,
5117 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
5118 'availability': 'public',
5119 'duration': 59,
8bdd16b4 5120 },
5121 'params': {
5122 'noplaylist': True,
5123 'skip_download': True,
5124 },
39e7107d 5125 }, {
8bdd16b4 5126 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 5127 'only_matching': True,
cdc628a4
PH
5128 }]
5129
8bdd16b4 5130 def _real_extract(self, url):
5ad28e7f 5131 mobj = self._match_valid_url(url)
29f7c58a 5132 video_id = mobj.group('id')
5133 playlist_id = mobj.group('playlist_id')
8bdd16b4 5134 return self.url_result(
29f7c58a 5135 update_url_query('https://www.youtube.com/watch', {
5136 'v': video_id,
5137 'list': playlist_id,
5138 'feature': 'youtu.be',
5139 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 5140
5141
5142class YoutubeYtUserIE(InfoExtractor):
96565c7e 5143 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
8bdd16b4 5144 _VALID_URL = r'ytuser:(?P<id>.+)'
5145 _TESTS = [{
5146 'url': 'ytuser:phihag',
5147 'only_matching': True,
5148 }]
5149
5150 def _real_extract(self, url):
5151 user_id = self._match_id(url)
5152 return self.url_result(
c586f9e8 5153 'https://www.youtube.com/user/%s/videos' % user_id,
8bdd16b4 5154 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 5155
b05654f0 5156
3d3dddc9 5157class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 5158 IE_NAME = 'youtube:favorites'
96565c7e 5159 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
70d5c17b 5160 _VALID_URL = r':ytfav(?:ou?rite)?s?'
5161 _LOGIN_REQUIRED = True
5162 _TESTS = [{
5163 'url': ':ytfav',
5164 'only_matching': True,
5165 }, {
5166 'url': ':ytfavorites',
5167 'only_matching': True,
5168 }]
5169
5170 def _real_extract(self, url):
5171 return self.url_result(
5172 'https://www.youtube.com/playlist?list=LL',
5173 ie=YoutubeTabIE.ie_key())
5174
5175
a6213a49 5176class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
5177 IE_DESC = 'YouTube search'
78caa52a 5178 IE_NAME = 'youtube:search'
b05654f0 5179 _SEARCH_KEY = 'ytsearch'
a61fd4cf 5180 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
9dd8e46a 5181 _TESTS = []
b05654f0 5182
a61fd4cf 5183
5f7cb91a 5184class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
cb7fb546 5185 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 5186 _SEARCH_KEY = 'ytsearchdate'
a6213a49 5187 IE_DESC = 'YouTube search, newest videos first'
a61fd4cf 5188 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
75dff0ee 5189
c9ae7b95 5190
a6213a49 5191class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
96565c7e 5192 IE_DESC = 'YouTube search URLs with sorting and filter support'
386e1dd9 5193 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
5194 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
3462ffa8 5195 _TESTS = [{
5196 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
5197 'playlist_mincount': 5,
5198 'info_dict': {
11f9be09 5199 'id': 'youtube-dl test video',
3462ffa8 5200 'title': 'youtube-dl test video',
5201 }
a61fd4cf 5202 }, {
5203 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
5204 'playlist_mincount': 5,
5205 'info_dict': {
5206 'id': 'python',
5207 'title': 'python',
5208 }
5209
3462ffa8 5210 }, {
5211 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
5212 'only_matching': True,
5213 }]
5214
5215 def _real_extract(self, url):
4dfbf869 5216 qs = parse_qs(url)
386e1dd9 5217 query = (qs.get('search_query') or qs.get('q'))[0]
a6213a49 5218 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
3462ffa8 5219
5220
5221class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 5222 """
25f14e9f 5223 Base class for feed extractors
3d3dddc9 5224 Subclasses must define the _FEED_NAME property.
d7ae0639 5225 """
b2e8bc1b 5226 _LOGIN_REQUIRED = True
ef2f3c7f 5227 _TESTS = []
d7ae0639
JMF
5228
5229 @property
5230 def IE_NAME(self):
78caa52a 5231 return 'youtube:%s' % self._FEED_NAME
04cc9617 5232
3853309f 5233 def _real_extract(self, url):
3d3dddc9 5234 return self.url_result(
5235 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
5236 ie=YoutubeTabIE.ie_key())
25f14e9f
S
5237
5238
ef2f3c7f 5239class YoutubeWatchLaterIE(InfoExtractor):
5240 IE_NAME = 'youtube:watchlater'
96565c7e 5241 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
3d3dddc9 5242 _VALID_URL = r':ytwatchlater'
bc7a9cd8 5243 _TESTS = [{
8bdd16b4 5244 'url': ':ytwatchlater',
bc7a9cd8
S
5245 'only_matching': True,
5246 }]
25f14e9f
S
5247
5248 def _real_extract(self, url):
ef2f3c7f 5249 return self.url_result(
5250 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 5251
5252
25f14e9f 5253class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
96565c7e 5254 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
3d3dddc9 5255 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 5256 _FEED_NAME = 'recommended'
45db527f 5257 _LOGIN_REQUIRED = False
3d3dddc9 5258 _TESTS = [{
5259 'url': ':ytrec',
5260 'only_matching': True,
5261 }, {
5262 'url': ':ytrecommended',
5263 'only_matching': True,
5264 }, {
5265 'url': 'https://youtube.com',
5266 'only_matching': True,
5267 }]
1ed5b5c9 5268
1ed5b5c9 5269
25f14e9f 5270class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
96565c7e 5271 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
3d3dddc9 5272 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 5273 _FEED_NAME = 'subscriptions'
3d3dddc9 5274 _TESTS = [{
5275 'url': ':ytsubs',
5276 'only_matching': True,
5277 }, {
5278 'url': ':ytsubscriptions',
5279 'only_matching': True,
5280 }]
1ed5b5c9 5281
1ed5b5c9 5282
25f14e9f 5283class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
96565c7e 5284 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
a5c56234 5285 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 5286 _FEED_NAME = 'history'
3d3dddc9 5287 _TESTS = [{
5288 'url': ':ythistory',
5289 'only_matching': True,
5290 }]
1ed5b5c9
JMF
5291
5292
15870e90
PH
5293class YoutubeTruncatedURLIE(InfoExtractor):
5294 IE_NAME = 'youtube:truncated_url'
5295 IE_DESC = False # Do not list
975d35db 5296 _VALID_URL = r'''(?x)
b95aab84
PH
5297 (?:https?://)?
5298 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
5299 (?:watch\?(?:
c4808c60 5300 feature=[a-z_]+|
b95aab84
PH
5301 annotation_id=annotation_[^&]+|
5302 x-yt-cl=[0-9]+|
c1708b89 5303 hl=[^&]*|
287be8c6 5304 t=[0-9]+
b95aab84
PH
5305 )?
5306 |
5307 attribution_link\?a=[^&]+
5308 )
5309 $
975d35db 5310 '''
15870e90 5311
c4808c60 5312 _TESTS = [{
2d3d2997 5313 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 5314 'only_matching': True,
dc2fc736 5315 }, {
2d3d2997 5316 'url': 'https://www.youtube.com/watch?',
dc2fc736 5317 'only_matching': True,
b95aab84
PH
5318 }, {
5319 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
5320 'only_matching': True,
5321 }, {
5322 'url': 'https://www.youtube.com/watch?feature=foo',
5323 'only_matching': True,
c1708b89
PH
5324 }, {
5325 'url': 'https://www.youtube.com/watch?hl=en-GB',
5326 'only_matching': True,
287be8c6
PH
5327 }, {
5328 'url': 'https://www.youtube.com/watch?t=2372',
5329 'only_matching': True,
c4808c60
PH
5330 }]
5331
15870e90
PH
5332 def _real_extract(self, url):
5333 raise ExtractorError(
78caa52a
PH
5334 'Did you forget to quote the URL? Remember that & is a meta '
5335 'character in most shells, so you want to put the URL in quotes, '
3867038a 5336 'like youtube-dl '
2d3d2997 5337 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 5338 ' or simply youtube-dl BaW_jenozKc .',
15870e90 5339 expected=True)
772fd5cc
PH
5340
5341
3cd786db 5342class YoutubeClipIE(InfoExtractor):
5343 IE_NAME = 'youtube:clip'
5344 IE_DESC = False # Do not list
5345 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
5346
5347 def _real_extract(self, url):
5348 self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
5349 return self.url_result(url, 'Generic')
5350
5351
772fd5cc
PH
5352class YoutubeTruncatedIDIE(InfoExtractor):
5353 IE_NAME = 'youtube:truncated_id'
5354 IE_DESC = False # Do not list
b95aab84 5355 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
5356
5357 _TESTS = [{
5358 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
5359 'only_matching': True,
5360 }]
5361
5362 def _real_extract(self, url):
5363 video_id = self._match_id(url)
5364 raise ExtractorError(
5365 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
5366 expected=True)