]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[extractor/huya] Fix stream extraction (#4798)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
6e634cbe 1import base64
d92f5d5a 2import calendar
109dd3b2 3import copy
fe93e2c4 4import datetime
a5c56234 5import hashlib
0ca96d48 6import itertools
c5e8d7af 7import json
720c3099 8import math
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
46383212 12import sys
f8271158 13import threading
8a784c74 14import time
e0df6211 15import traceback
14f25df2 16import urllib.error
ac668111 17import urllib.parse
c5e8d7af 18
b05654f0 19from .common import InfoExtractor, SearchInfoExtractor
25836db6 20from .openload import PhantomJSwrapper
14f25df2 21from ..compat import functools
545cc85d 22from ..jsinterp import JSInterpreter
4bb4a188 23from ..utils import (
f8271158 24 NO_DEFAULT,
25 ExtractorError,
693f0600 26 UserNotLive,
720c3099 27 bug_reports_message,
82d02080 28 classproperty,
c5e8d7af 29 clean_html,
d92f5d5a 30 datetime_from_str,
11f9be09 31 dict_get,
2d30521a 32 float_or_none,
11f9be09 33 format_field,
ff91cf74 34 get_first,
dd27fd17 35 int_or_none,
641ad5d8 36 is_html,
34921b43 37 join_nonempty,
48416bc4 38 js_to_json,
94278f72 39 mimetype2ext,
9c0d7f49 40 network_exceptions,
11f9be09 41 orderedSet,
6310acf5 42 parse_codecs,
49bd8c66 43 parse_count,
7c80519c 44 parse_duration,
7ea65411 45 parse_iso8601,
4dfbf869 46 parse_qs,
dca3ff4a 47 qualities,
3995d37d 48 remove_start,
cf7e015f 49 smuggle_url,
dbdaaa23 50 str_or_none,
c93d53f5 51 str_to_int,
f3aa3c3f 52 strftime_or_none,
7c365c21 53 traverse_obj,
556dbe7f 54 try_get,
c5e8d7af
PH
55 unescapeHTML,
56 unified_strdate,
f0d785d3 57 unified_timestamp,
cf7e015f 58 unsmuggle_url,
8bdd16b4 59 update_url_query,
21c340b8 60 url_or_none,
fe93e2c4 61 urljoin,
7c365c21 62 variadic,
c5e8d7af
PH
63)
64
962ffcf8 65# any clients starting with _ cannot be explicitly requested by the user
000c15a4 66INNERTUBE_CLIENTS = {
67 'web': {
68 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
69 'INNERTUBE_CONTEXT': {
70 'client': {
71 'clientName': 'WEB',
a0c830f4 72 'clientVersion': '2.20220801.00.00',
000c15a4 73 }
74 },
75 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
76 },
77 'web_embedded': {
78 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
79 'INNERTUBE_CONTEXT': {
80 'client': {
81 'clientName': 'WEB_EMBEDDED_PLAYER',
a0c830f4 82 'clientVersion': '1.20220731.00.00',
000c15a4 83 },
84 },
85 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
86 },
87 'web_music': {
88 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
89 'INNERTUBE_HOST': 'music.youtube.com',
90 'INNERTUBE_CONTEXT': {
91 'client': {
92 'clientName': 'WEB_REMIX',
a0c830f4 93 'clientVersion': '1.20220727.01.00',
000c15a4 94 }
95 },
96 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
97 },
e7e94f2a 98 'web_creator': {
18c7683d 99 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
e7e94f2a
D
100 'INNERTUBE_CONTEXT': {
101 'client': {
102 'clientName': 'WEB_CREATOR',
a0c830f4 103 'clientVersion': '1.20220726.00.00',
e7e94f2a
D
104 }
105 },
106 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
107 },
000c15a4 108 'android': {
18c7683d 109 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
000c15a4 110 'INNERTUBE_CONTEXT': {
111 'client': {
112 'clientName': 'ANDROID',
50ac0e54 113 'clientVersion': '17.31.35',
114 'androidSdkVersion': 30,
115 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
000c15a4 116 }
117 },
118 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
b6de707d 119 'REQUIRE_JS_PLAYER': False
000c15a4 120 },
121 'android_embedded': {
18c7683d 122 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
000c15a4 123 'INNERTUBE_CONTEXT': {
124 'client': {
125 'clientName': 'ANDROID_EMBEDDED_PLAYER',
50ac0e54 126 'clientVersion': '17.31.35',
127 'androidSdkVersion': 30,
128 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
000c15a4 129 },
130 },
b6de707d 131 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
132 'REQUIRE_JS_PLAYER': False
000c15a4 133 },
134 'android_music': {
18c7683d 135 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
000c15a4 136 'INNERTUBE_CONTEXT': {
137 'client': {
138 'clientName': 'ANDROID_MUSIC',
a0c830f4 139 'clientVersion': '5.16.51',
50ac0e54 140 'androidSdkVersion': 30,
141 'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
000c15a4 142 }
143 },
144 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
b6de707d 145 'REQUIRE_JS_PLAYER': False
000c15a4 146 },
e7e94f2a 147 'android_creator': {
18c7683d 148 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
e7e94f2a
D
149 'INNERTUBE_CONTEXT': {
150 'client': {
151 'clientName': 'ANDROID_CREATOR',
50ac0e54 152 'clientVersion': '22.30.100',
153 'androidSdkVersion': 30,
154 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
e7e94f2a
D
155 },
156 },
b6de707d 157 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
158 'REQUIRE_JS_PLAYER': False
e7e94f2a 159 },
18c7683d 160 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
161 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
000c15a4 162 'ios': {
18c7683d 163 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
000c15a4 164 'INNERTUBE_CONTEXT': {
165 'client': {
166 'clientName': 'IOS',
224b5a35 167 'clientVersion': '17.33.2',
18c7683d 168 'deviceModel': 'iPhone14,3',
224b5a35 169 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 170 }
171 },
b6de707d 172 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
173 'REQUIRE_JS_PLAYER': False
000c15a4 174 },
175 'ios_embedded': {
000c15a4 176 'INNERTUBE_CONTEXT': {
177 'client': {
178 'clientName': 'IOS_MESSAGES_EXTENSION',
224b5a35 179 'clientVersion': '17.33.2',
18c7683d 180 'deviceModel': 'iPhone14,3',
224b5a35 181 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 182 },
183 },
b6de707d 184 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
185 'REQUIRE_JS_PLAYER': False
000c15a4 186 },
187 'ios_music': {
18c7683d 188 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
000c15a4 189 'INNERTUBE_CONTEXT': {
190 'client': {
191 'clientName': 'IOS_MUSIC',
224b5a35
SF
192 'clientVersion': '5.21',
193 'deviceModel': 'iPhone14,3',
194 'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 195 },
196 },
b6de707d 197 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
198 'REQUIRE_JS_PLAYER': False
000c15a4 199 },
e7e94f2a
D
200 'ios_creator': {
201 'INNERTUBE_CONTEXT': {
202 'client': {
203 'clientName': 'IOS_CREATOR',
224b5a35
SF
204 'clientVersion': '22.33.101',
205 'deviceModel': 'iPhone14,3',
206 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
e7e94f2a
D
207 },
208 },
b6de707d 209 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
210 'REQUIRE_JS_PLAYER': False
e7e94f2a 211 },
3619f78d 212 # mweb has 'ultralow' formats
213 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 214 'mweb': {
18c7683d 215 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
000c15a4 216 'INNERTUBE_CONTEXT': {
217 'client': {
218 'clientName': 'MWEB',
a0c830f4 219 'clientVersion': '2.20220801.00.00',
000c15a4 220 }
221 },
222 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
e7870111
D
223 },
224 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
225 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
226 'tv_embedded': {
227 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
228 'INNERTUBE_CONTEXT': {
229 'client': {
230 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
231 'clientVersion': '2.0',
232 },
233 },
234 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
235 },
000c15a4 236}
237
238
e7870111
D
239def _split_innertube_client(client_name):
240 variant, *base = client_name.rsplit('.', 1)
241 if base:
242 return variant, base[0], variant
243 base, *variant = client_name.split('_', 1)
244 return client_name, base, variant[0] if variant else None
245
246
000c15a4 247def build_innertube_clients():
2e4cacd0 248 THIRD_PARTY = {
e7870111 249 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
65c2fde2 250 }
e7870111 251 BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
2e4cacd0 252 priority = qualities(BASE_CLIENTS[::-1])
000c15a4 253
254 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 255 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 256 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 257 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 258 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
000c15a4 259
e7870111 260 _, base_client, variant = _split_innertube_client(client)
2e4cacd0 261 ytcfg['priority'] = 10 * priority(base_client)
262
e48b3875 263 if not variant:
e7870111
D
264 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
265 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
266 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
267 embedscreen['priority'] -= 3
268 elif variant == 'embedded':
e48b3875 269 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
000c15a4 270 ytcfg['priority'] -= 2
e48b3875 271 else:
000c15a4 272 ytcfg['priority'] -= 3
273
274
275build_innertube_clients()
276
277
de7f3446 278class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 279 """Provide base functions for Youtube extractors"""
e00eb564 280
3462ffa8 281 _RESERVED_NAMES = (
3cd786db 282 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
182bda88 283 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
3619f78d 284 r'browse|oembed|get_video_info|iframe_api|s/player|'
cd7c66cf 285 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 286
3619f78d 287 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
288
52efa4b3 289 # _NETRC_MACHINE = 'youtube'
3619f78d 290
b2e8bc1b
JMF
291 # If True it will raise an error if no login info is provided
292 _LOGIN_REQUIRED = False
293
d9190e44
RH
294 _INVIDIOUS_SITES = (
295 # invidious-redirect websites
296 r'(?:www\.)?redirect\.invidious\.io',
297 r'(?:(?:www|dev)\.)?invidio\.us',
0a41f331 298 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
d9190e44
RH
299 r'(?:www\.)?invidious\.pussthecat\.org',
300 r'(?:www\.)?invidious\.zee\.li',
301 r'(?:www\.)?invidious\.ethibox\.fr',
302 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
4c968755
U
303 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
304 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
d9190e44
RH
305 # youtube-dl invidious instances list
306 r'(?:(?:www|no)\.)?invidiou\.sh',
307 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
308 r'(?:www\.)?invidious\.kabi\.tk',
309 r'(?:www\.)?invidious\.mastodon\.host',
310 r'(?:www\.)?invidious\.zapashcanon\.fr',
311 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
312 r'(?:www\.)?invidious\.tinfoil-hat\.net',
313 r'(?:www\.)?invidious\.himiko\.cloud',
314 r'(?:www\.)?invidious\.reallyancient\.tech',
315 r'(?:www\.)?invidious\.tube',
316 r'(?:www\.)?invidiou\.site',
317 r'(?:www\.)?invidious\.site',
318 r'(?:www\.)?invidious\.xyz',
319 r'(?:www\.)?invidious\.nixnet\.xyz',
320 r'(?:www\.)?invidious\.048596\.xyz',
321 r'(?:www\.)?invidious\.drycat\.fr',
322 r'(?:www\.)?inv\.skyn3t\.in',
323 r'(?:www\.)?tube\.poal\.co',
324 r'(?:www\.)?tube\.connect\.cafe',
325 r'(?:www\.)?vid\.wxzm\.sx',
326 r'(?:www\.)?vid\.mint\.lgbt',
327 r'(?:www\.)?vid\.puffyan\.us',
328 r'(?:www\.)?yewtu\.be',
329 r'(?:www\.)?yt\.elukerio\.org',
330 r'(?:www\.)?yt\.lelux\.fi',
331 r'(?:www\.)?invidious\.ggc-project\.de',
332 r'(?:www\.)?yt\.maisputain\.ovh',
333 r'(?:www\.)?ytprivate\.com',
334 r'(?:www\.)?invidious\.13ad\.de',
335 r'(?:www\.)?invidious\.toot\.koeln',
336 r'(?:www\.)?invidious\.fdn\.fr',
337 r'(?:www\.)?watch\.nettohikari\.com',
338 r'(?:www\.)?invidious\.namazso\.eu',
339 r'(?:www\.)?invidious\.silkky\.cloud',
340 r'(?:www\.)?invidious\.exonip\.de',
341 r'(?:www\.)?invidious\.riverside\.rocks',
342 r'(?:www\.)?invidious\.blamefran\.net',
343 r'(?:www\.)?invidious\.moomoo\.de',
344 r'(?:www\.)?ytb\.trom\.tf',
345 r'(?:www\.)?yt\.cyberhost\.uk',
346 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
347 r'(?:www\.)?qklhadlycap4cnod\.onion',
348 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
349 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
350 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
351 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
352 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
353 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
354 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
355 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
356 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
357 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
d1c4f6d4
JW
358 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
359 r'(?:www\.)?piped\.kavin\.rocks',
360 r'(?:www\.)?piped\.silkky\.cloud',
361 r'(?:www\.)?piped\.tokhmi\.xyz',
362 r'(?:www\.)?piped\.moomoo\.me',
363 r'(?:www\.)?il\.ax',
364 r'(?:www\.)?piped\.syncpundit\.com',
365 r'(?:www\.)?piped\.mha\.fi',
366 r'(?:www\.)?piped\.mint\.lgbt',
367 r'(?:www\.)?piped\.privacy\.com\.de',
d9190e44
RH
368 )
369
cce889b9 370 def _initialize_consent(self):
371 cookies = self._get_cookies('https://www.youtube.com/')
372 if cookies.get('__Secure-3PSID'):
373 return
374 consent_id = None
375 consent = cookies.get('CONSENT')
376 if consent:
377 if 'YES' in consent.value:
378 return
379 consent_id = self._search_regex(
380 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
381 if not consent_id:
382 consent_id = random.randint(100, 999)
383 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 384
f3aa3c3f 385 def _initialize_pref(self):
386 cookies = self._get_cookies('https://www.youtube.com/')
387 pref_cookie = cookies.get('PREF')
388 pref = {}
389 if pref_cookie:
390 try:
14f25df2 391 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
f3aa3c3f 392 except ValueError:
393 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
396a76f7 394 pref.update({'hl': 'en', 'tz': 'UTC'})
14f25df2 395 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
f3aa3c3f 396
b2e8bc1b 397 def _real_initialize(self):
f3aa3c3f 398 self._initialize_pref()
cce889b9 399 self._initialize_consent()
a25bca9f 400 self._check_login_required()
401
402 def _check_login_required(self):
24146491 403 if self._LOGIN_REQUIRED and not self._cookies_passed:
52efa4b3 404 self.raise_login_required('Login details are needed to download this content', method='cookies')
c5e8d7af 405
b7c47b74 406 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
407 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
a0566bbf 408
000c15a4 409 def _get_default_ytcfg(self, client='web'):
410 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 411
000c15a4 412 def _get_innertube_host(self, client='web'):
413 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 414
000c15a4 415 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 416 # try_get but with fallback to default ytcfg client values when present
417 _func = lambda y: try_get(y, getter, expected_type)
418 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
419
000c15a4 420 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 421 return self._ytcfg_get_safe(
422 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
14f25df2 423 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
109dd3b2 424
000c15a4 425 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 426 return self._ytcfg_get_safe(
427 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
14f25df2 428 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
109dd3b2 429
2ae778b8 430 def _select_api_hostname(self, req_api_hostname, default_client=None):
431 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
432 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
433
000c15a4 434 def _extract_api_key(self, ytcfg=None, default_client='web'):
14f25df2 435 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
109dd3b2 436
000c15a4 437 def _extract_context(self, ytcfg=None, default_client='web'):
f3aa3c3f 438 context = get_first(
439 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
396a76f7 440 # Enforce language and tz for extraction
441 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
442 client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
109dd3b2 443 return context
444
cf87314d 445 _SAPISID = None
446
109dd3b2 447 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 448 time_now = round(time.time())
cf87314d 449 if self._SAPISID is None:
450 yt_cookies = self._get_cookies('https://www.youtube.com')
451 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
452 # See: https://github.com/yt-dlp/yt-dlp/issues/393
453 sapisid_cookie = dict_get(
454 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
455 if sapisid_cookie and sapisid_cookie.value:
456 self._SAPISID = sapisid_cookie.value
457 self.write_debug('Extracted SAPISID cookie')
458 # SAPISID cookie is required if not already present
459 if not yt_cookies.get('SAPISID'):
460 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
461 self._set_cookie(
462 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
463 else:
464 self._SAPISID = False
465 if not self._SAPISID:
466 return None
1974e99f 467 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
468 sapisidhash = hashlib.sha1(
86e5f3ed 469 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
1974e99f 470 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
471
472 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 473 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 474 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 475
109dd3b2 476 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 477 data.update(query)
11f9be09 478 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 479 real_headers.update({'content-type': 'application/json'})
480 if headers:
481 real_headers.update(headers)
2ae778b8 482 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
483 or api_key or self._extract_api_key(default_client=default_client))
545cc85d 484 return self._download_json(
2ae778b8 485 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
a5c56234 486 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 487 data=json.dumps(data).encode('utf8'), headers=real_headers,
2ae778b8 488 query={'key': api_key, 'prettyPrint': 'false'})
f4f751af 489
65141660 490 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
491 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
1890fc63 492
99e9e001 493 @staticmethod
494 def _extract_session_index(*data):
495 """
496 Index of current account in account list.
497 See: https://github.com/yt-dlp/yt-dlp/pull/519
498 """
499 for ytcfg in data:
500 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
501 if session_index is not None:
502 return session_index
503
504 # Deprecated?
505 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca 506 if ytcfg:
14f25df2 507 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
a1c5d2ca
M
508 if token:
509 return token
99e9e001 510 if webpage:
511 return self._search_regex(
512 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
513 'identity token', default=None, fatal=False)
a1c5d2ca
M
514
515 @staticmethod
fe93e2c4 516 def _extract_account_syncid(*args):
8ea3f7b9 517 """
518 Extract syncId required to download private playlists of secondary channels
fe93e2c4 519 @params response and/or ytcfg
8ea3f7b9 520 """
fe93e2c4 521 for data in args:
522 # ytcfg includes channel_syncid if on secondary channel
14f25df2 523 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
fe93e2c4 524 if delegated_sid:
525 return delegated_sid
526 sync_ids = (try_get(
527 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
14f25df2 528 lambda x: x['DATASYNC_ID']), str) or '').split('||')
fe93e2c4 529 if len(sync_ids) >= 2 and sync_ids[1]:
530 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
531 # and just "user_syncid||" for primary channel. We only want the channel_syncid
532 return sync_ids[0]
a1c5d2ca 533
ac56cf38 534 @staticmethod
535 def _extract_visitor_data(*args):
536 """
537 Extracts visitorData from an API response or ytcfg
538 Appears to be used to track session state
539 """
9222c381 540 return get_first(
6c73052c 541 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
9222c381 542 expected_type=str)
ac56cf38 543
2762dbb1 544 @functools.cached_property
99e9e001 545 def is_authenticated(self):
546 return bool(self._generate_sapisidhash_header())
547
11f9be09 548 def extract_ytcfg(self, video_id, webpage):
8c54a305 549 if not webpage:
550 return {}
29f7c58a 551 return self._parse_json(
552 self._search_regex(
553 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 554 default='{}'), video_id, fatal=False) or {}
555
11f9be09 556 def generate_api_headers(
99e9e001 557 self, *, ytcfg=None, account_syncid=None, session_index=None,
558 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
559
2ae778b8 560 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
f4f751af 561 headers = {
14f25df2 562 'X-YouTube-Client-Name': str(
11f9be09 563 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
564 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 565 'Origin': origin,
566 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
567 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
50ac0e54 568 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
569 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
99e9e001 570 }
571 if session_index is None:
314ee305 572 session_index = self._extract_session_index(ytcfg)
573 if account_syncid or session_index is not None:
574 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 575
109dd3b2 576 auth = self._generate_sapisidhash_header(origin)
f4f751af 577 if auth is not None:
578 headers['Authorization'] = auth
109dd3b2 579 headers['X-Origin'] = origin
99e9e001 580 return {h: v for h, v in headers.items() if v is not None}
29f7c58a 581
a25bca9f 582 def _download_ytcfg(self, client, video_id):
583 url = {
584 'web': 'https://www.youtube.com',
585 'web_music': 'https://music.youtube.com',
586 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
587 }.get(client)
588 if not url:
589 return {}
590 webpage = self._download_webpage(
591 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
592 return self.extract_ytcfg(video_id, webpage) or {}
593
2d6659b9 594 @staticmethod
595 def _build_api_continuation_query(continuation, ctp=None):
596 query = {
597 'continuation': continuation
598 }
599 # TODO: Inconsistency with clickTrackingParams.
600 # Currently we have a fixed ctp contained within context (from ytcfg)
601 # and a ctp in root query for continuation.
602 if ctp:
603 query['clickTracking'] = {'clickTrackingParams': ctp}
604 return query
605
2d6659b9 606 @classmethod
607 def _extract_next_continuation_data(cls, renderer):
608 next_continuation = try_get(
609 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
610 lambda x: x['continuation']['reloadContinuationData']), dict)
611 if not next_continuation:
612 return
613 continuation = next_continuation.get('continuation')
614 if not continuation:
615 return
616 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 617 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 618
619 @classmethod
620 def _extract_continuation_ep_data(cls, continuation_ep: dict):
621 if isinstance(continuation_ep, dict):
622 continuation = try_get(
14f25df2 623 continuation_ep, lambda x: x['continuationCommand']['token'], str)
2d6659b9 624 if not continuation:
625 return
626 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 627 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 628
629 @classmethod
630 def _extract_continuation(cls, renderer):
631 next_continuation = cls._extract_next_continuation_data(renderer)
632 if next_continuation:
633 return next_continuation
fe93e2c4 634
2d6659b9 635 contents = []
636 for key in ('contents', 'items'):
637 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 638
2d6659b9 639 for content in contents:
640 if not isinstance(content, dict):
641 continue
642 continuation_ep = try_get(
643 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
644 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
645 dict)
646 continuation = cls._extract_continuation_ep_data(continuation_ep)
647 if continuation:
648 return continuation
649
fe93e2c4 650 @classmethod
651 def _extract_alerts(cls, data):
109dd3b2 652 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
653 if not isinstance(alert_dict, dict):
654 continue
655 for alert in alert_dict.values():
656 alert_type = alert.get('type')
657 if not alert_type:
658 continue
052e1350 659 message = cls._get_text(alert, 'text')
109dd3b2 660 if message:
661 yield alert_type, message
662
c0ac49bc 663 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
109dd3b2 664 errors = []
665 warnings = []
666 for alert_type, alert_message in alerts:
641ad5d8 667 if alert_type.lower() == 'error' and fatal:
109dd3b2 668 errors.append([alert_type, alert_message])
669 else:
670 warnings.append([alert_type, alert_message])
671
672 for alert_type, alert_message in (warnings + errors[:-1]):
86e5f3ed 673 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
109dd3b2 674 if errors:
675 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
676
677 def _extract_and_report_alerts(self, data, *args, **kwargs):
678 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
679
47193e02 680 def _extract_badges(self, renderer: dict):
681 badges = set()
682 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
14f25df2 683 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], str)
47193e02 684 if label:
685 badges.add(label.lower())
686 return badges
687
688 @staticmethod
052e1350 689 def _get_text(data, *path_list, max_runs=None):
690 for path in path_list or [None]:
691 if path is None:
692 obj = [data]
693 else:
694 obj = traverse_obj(data, path, default=[])
695 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
696 obj = [obj]
697 for item in obj:
14f25df2 698 text = try_get(item, lambda x: x['simpleText'], str)
052e1350 699 if text:
700 return text
701 runs = try_get(item, lambda x: x['runs'], list) or []
702 if not runs and isinstance(item, list):
703 runs = item
704
705 runs = runs[:min(len(runs), max_runs or len(runs))]
706 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
707 if text:
708 return text
47193e02 709
f0d785d3 710 def _get_count(self, data, *path_list):
711 count_text = self._get_text(data, *path_list) or ''
712 count = parse_count(count_text)
713 if count is None:
714 count = str_to_int(
715 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
716 return count
717
a709d873 718 @staticmethod
719 def _extract_thumbnails(data, *path_list):
720 """
721 Extract thumbnails from thumbnails dict
722 @param path_list: path list to level that contains 'thumbnails' key
723 """
724 thumbnails = []
725 for path in path_list or [()]:
726 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
727 thumbnail_url = url_or_none(thumbnail.get('url'))
728 if not thumbnail_url:
729 continue
730 # Sometimes youtube gives a wrong thumbnail URL. See:
731 # https://github.com/yt-dlp/yt-dlp/issues/233
732 # https://github.com/ytdl-org/youtube-dl/issues/28023
733 if 'maxresdefault' in thumbnail_url:
734 thumbnail_url = thumbnail_url.split('?')[0]
735 thumbnails.append({
736 'url': thumbnail_url,
737 'height': int_or_none(thumbnail.get('height')),
738 'width': int_or_none(thumbnail.get('width')),
739 })
740 return thumbnails
741
f3aa3c3f 742 @staticmethod
743 def extract_relative_time(relative_time_text):
744 """
745 Extracts a relative time from string and converts to dt object
f0d785d3 746 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
f3aa3c3f 747 """
f0d785d3 748 mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
f3aa3c3f 749 if mobj:
f0d785d3 750 start = mobj.group('start')
751 if start:
752 return datetime_from_str(start)
f3aa3c3f 753 try:
f0d785d3 754 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
f3aa3c3f 755 except ValueError:
756 return None
757
758 def _extract_time_text(self, renderer, *path_list):
a25bca9f 759 """@returns (timestamp, time_text)"""
f3aa3c3f 760 text = self._get_text(renderer, *path_list) or ''
761 dt = self.extract_relative_time(text)
762 timestamp = None
763 if isinstance(dt, datetime.datetime):
764 timestamp = calendar.timegm(dt.timetuple())
f0d785d3 765
766 if timestamp is None:
767 timestamp = (
768 unified_timestamp(text) or unified_timestamp(
769 self._search_regex(
17322130 770 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
396a76f7 771 text.lower(), 'time text', default=None)))
f0d785d3 772
f3aa3c3f 773 if text and timestamp is None:
17322130 774 self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)
f3aa3c3f 775 return timestamp, text
776
109dd3b2 777 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
778 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 779 default_client='web'):
be5c1ae8 780 for retry in self.RetryManager():
109dd3b2 781 try:
782 response = self._call_api(
783 ep=ep, fatal=True, headers=headers,
be5c1ae8 784 video_id=item_id, query=query, note=note,
109dd3b2 785 context=self._extract_context(ytcfg, default_client),
786 api_key=self._extract_api_key(ytcfg, default_client),
be5c1ae8 787 api_hostname=api_hostname, default_client=default_client)
109dd3b2 788 except ExtractorError as e:
be5c1ae8 789 if not isinstance(e.cause, network_exceptions):
790 return self._error_or_warning(e, fatal=fatal)
791 elif not isinstance(e.cause, urllib.error.HTTPError):
792 retry.error = e
793 continue
109dd3b2 794
be5c1ae8 795 first_bytes = e.cause.read(512)
796 if not is_html(first_bytes):
797 yt_error = try_get(
798 self._parse_json(
799 self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
800 lambda x: x['error']['message'], str)
801 if yt_error:
802 self._report_alerts([('ERROR', yt_error)], fatal=False)
803 # Downloading page may result in intermittent 5xx HTTP error
804 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
805 # We also want to catch all other network exceptions since errors in later pages can be troublesome
806 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
807 if e.cause.code not in (403, 429):
808 retry.error = e
809 continue
810 return self._error_or_warning(e, fatal=fatal)
811
812 try:
813 self._extract_and_report_alerts(response, only_once=True)
814 except ExtractorError as e:
815 # YouTube servers may return errors we want to retry on in a 200 OK response
816 # See: https://github.com/yt-dlp/yt-dlp/issues/839
817 if 'unknown error' in e.msg.lower():
818 retry.error = e
819 continue
820 return self._error_or_warning(e, fatal=fatal)
821 # Youtube sometimes sends incomplete data
822 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
823 if not traverse_obj(response, *variadic(check_get_keys)):
3ce29336 824 retry.error = ExtractorError('Incomplete data received', expected=True)
be5c1ae8 825 continue
826
827 return response
109dd3b2 828
9297939e 829 @staticmethod
830 def is_music_url(url):
831 return re.match(r'https?://music\.youtube\.com/', url) is not None
832
30a074c2 833 def _extract_video(self, renderer):
834 video_id = renderer.get('videoId')
052e1350 835 title = self._get_text(renderer, 'title')
836 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 837 duration = parse_duration(self._get_text(
838 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
1c1b2f96 839 if duration is None:
840 duration = parse_duration(self._search_regex(
841 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
842 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
843 video_id, default=None, group='duration'))
844
f0d785d3 845 view_count = self._get_count(renderer, 'viewCountText')
fe93e2c4 846
052e1350 847 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
f3aa3c3f 848 channel_id = traverse_obj(
a44ca5a4 849 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
850 expected_type=str, get_all=False)
f3aa3c3f 851 timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
852 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
853 overlay_style = traverse_obj(
a44ca5a4 854 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
855 get_all=False, expected_type=str)
f3aa3c3f 856 badges = self._extract_badges(renderer)
a709d873 857 thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
fd2ad7cb 858 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
a44ca5a4 859 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
860 expected_type=str)) or ''
fd2ad7cb 861 url = f'https://www.youtube.com/watch?v={video_id}'
a44ca5a4 862 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
fd2ad7cb 863 url = f'https://www.youtube.com/shorts/{video_id}'
a709d873 864
30a074c2 865 return {
39ed931e 866 '_type': 'url',
30a074c2 867 'ie_key': YoutubeIE.ie_key(),
868 'id': video_id,
fd2ad7cb 869 'url': url,
30a074c2 870 'title': title,
871 'description': description,
872 'duration': duration,
873 'view_count': view_count,
874 'uploader': uploader,
f3aa3c3f 875 'channel_id': channel_id,
a709d873 876 'thumbnails': thumbnails,
a44ca5a4 877 'upload_date': (strftime_or_none(timestamp, '%Y%m%d')
878 if self._configuration_arg('approximate_date', ie_key='youtubetab')
879 else None),
f3aa3c3f 880 'live_status': ('is_upcoming' if scheduled_timestamp is not None
881 else 'was_live' if 'streamed' in time_text.lower()
a831c2ea 882 else 'is_live' if overlay_style == 'LIVE' or 'live now' in badges
f3aa3c3f 883 else None),
884 'release_timestamp': scheduled_timestamp,
885 'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
30a074c2 886 }
887
0c148415 888
360e1ca5 889class YoutubeIE(YoutubeBaseInfoExtractor):
96565c7e 890 IE_DESC = 'YouTube'
cb7dfeea 891 _VALID_URL = r"""(?x)^
c5e8d7af 892 (
edb53e2d 893 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 894 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
895 (?:www\.)?deturl\.com/www\.youtube\.com|
896 (?:www\.)?pwnyoutube\.com|
897 (?:www\.)?hooktube\.com|
898 (?:www\.)?yourepeat\.com|
899 tube\.majestyc\.net|
900 %(invidious)s|
901 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
902 (?:.*?\#/)? # handle anchor (#/) redirect urls
903 (?: # the various things that can precede the ID:
b6ce9bb0 904 (?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
c5e8d7af 905 |(?: # or the v= param in all its forms
f7000f3a 906 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 907 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 908 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
909 v=
910 )
f4b05232 911 ))
cbaed4bb
S
912 |(?:
913 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
914 vid\.plus| # or vid.plus/xxxx
915 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 916 %(invidious)s
cbaed4bb 917 )/
edb53e2d 918 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 919 )
c5e8d7af 920 )? # all until now is optional -> you can pass the naked ID
201c1459 921 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 922 (?(1).+)? # if we found the ID, everything can follow
9297939e 923 (?:\#|$)""" % {
d9190e44 924 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
bc2ca1bb 925 }
bfd973ec 926 _EMBED_REGEX = [r'''(?x)
927 (?:
928 <iframe[^>]+?src=|
929 data-video-url=|
930 <embed[^>]+?src=|
931 embedSWF\(?:\s*|
932 <object[^>]+data=|
933 new\s+SWFObject\(
934 )
935 (["\'])
936 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
937 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
938 \1''']
e40c758c 939 _PLAYER_INFO_RE = (
cc2db878 940 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
941 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 942 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 943 )
2c62dc26 944 _formats = {
c2d3cb4c 945 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
946 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
947 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
948 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
949 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
950 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
951 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
952 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 953 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 954 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
955 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
956 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
957 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
958 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
959 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 960 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 961 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
962 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 963
964
965 # 3D videos
c2d3cb4c 966 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
967 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
968 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
969 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 970 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
971 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
972 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 973
96fb5605 974 # Apple HTTP Live Streaming
11f12195 975 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 976 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
977 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
978 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
979 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
980 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 981 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
982 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
983
984 # DASH mp4 video
d23028a8
S
985 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
986 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
987 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
988 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
989 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 990 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
991 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
992 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
993 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
994 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
995 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
996 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 997
f6f1fc92 998 # Dash mp4 audio
d23028a8
S
999 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1000 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1001 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1002 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1003 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1004 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1005 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
1006
1007 # Dash webm
d23028a8
S
1008 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1009 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1010 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1011 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1012 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1013 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1014 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1015 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1016 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1017 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1018 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1019 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1020 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1021 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1022 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1023 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1024 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1025 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1026 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1027 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1028 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1029 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1030
1031 # Dash webm audio
d23028a8
S
1032 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1033 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1034
0857baad 1035 # Dash webm audio with opus inside
d23028a8
S
1036 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1037 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1038 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1039
ce6b9a2d
PH
1040 # RTMP (unnamed)
1041 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1042
1043 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
1044 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1045 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1046 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1047 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1048 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1049 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1050 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1051 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 1052 }
29f7c58a 1053 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1054
fd5c4aab
S
1055 _GEO_BYPASS = False
1056
78caa52a 1057 IE_NAME = 'youtube'
2eb88d95
PH
1058 _TESTS = [
1059 {
2d3d2997 1060 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1061 'info_dict': {
1062 'id': 'BaW_jenozKc',
1063 'ext': 'mp4',
3867038a 1064 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1065 'uploader': 'Philipp Hagemeister',
1066 'uploader_id': 'phihag',
ec85ded8 1067 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
ff9f925b 1068 'channel': 'Philipp Hagemeister',
dd4c4492
S
1069 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1070 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1071 'upload_date': '20121002',
ff9f925b 1072 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
4bc3a23e 1073 'categories': ['Science & Technology'],
3867038a 1074 'tags': ['youtube-dl'],
556dbe7f 1075 'duration': 10,
dbdaaa23 1076 'view_count': int,
3e7c1224 1077 'like_count': int,
ff9f925b 1078 'availability': 'public',
1079 'playable_in_embed': True,
1080 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1081 'live_status': 'not_live',
1082 'age_limit': 0,
7c80519c 1083 'start_time': 1,
297a564b 1084 'end_time': 9,
12a1b225 1085 'comment_count': int,
6c73052c 1086 'channel_follower_count': int
2eb88d95 1087 }
0e853ca4 1088 },
fccd3771 1089 {
4bc3a23e
PH
1090 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1091 'note': 'Embed-only video (#1746)',
1092 'info_dict': {
1093 'id': 'yZIXLfi8CZQ',
1094 'ext': 'mp4',
1095 'upload_date': '20120608',
1096 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1097 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1098 'uploader': 'SET India',
94bfcd23 1099 'uploader_id': 'setindia',
ec85ded8 1100 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1101 'age_limit': 18,
545cc85d 1102 },
1103 'skip': 'Private video',
fccd3771 1104 },
11b56058 1105 {
8bdd16b4 1106 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1107 'note': 'Use the first video ID in the URL',
1108 'info_dict': {
1109 'id': 'BaW_jenozKc',
1110 'ext': 'mp4',
3867038a 1111 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1112 'uploader': 'Philipp Hagemeister',
1113 'uploader_id': 'phihag',
ec85ded8 1114 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
976ae3ea 1115 'channel': 'Philipp Hagemeister',
1116 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1117 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
11b56058 1118 'upload_date': '20121002',
976ae3ea 1119 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
11b56058 1120 'categories': ['Science & Technology'],
3867038a 1121 'tags': ['youtube-dl'],
556dbe7f 1122 'duration': 10,
dbdaaa23 1123 'view_count': int,
11b56058 1124 'like_count': int,
976ae3ea 1125 'availability': 'public',
1126 'playable_in_embed': True,
1127 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1128 'live_status': 'not_live',
1129 'age_limit': 0,
12a1b225 1130 'comment_count': int,
6c73052c 1131 'channel_follower_count': int
34a7de29
S
1132 },
1133 'params': {
1134 'skip_download': True,
1135 },
11b56058 1136 },
dd27fd17 1137 {
2d3d2997 1138 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1139 'note': '256k DASH audio (format 141) via DASH manifest',
1140 'info_dict': {
1141 'id': 'a9LDPn-MO4I',
1142 'ext': 'm4a',
1143 'upload_date': '20121002',
1144 'uploader_id': '8KVIDEO',
ec85ded8 1145 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1146 'description': '',
1147 'uploader': '8KVIDEO',
1148 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1149 },
4bc3a23e
PH
1150 'params': {
1151 'youtube_include_dash_manifest': True,
1152 'format': '141',
4919603f 1153 },
de3c7fe0 1154 'skip': 'format 141 not served anymore',
dd27fd17 1155 },
8bdd16b4 1156 # DASH manifest with encrypted signature
1157 {
1158 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1159 'info_dict': {
1160 'id': 'IB3lcPjvWLA',
1161 'ext': 'm4a',
1162 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1163 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1164 'duration': 244,
1165 'uploader': 'AfrojackVEVO',
1166 'uploader_id': 'AfrojackVEVO',
1167 'upload_date': '20131011',
cc2db878 1168 'abr': 129.495,
976ae3ea 1169 'like_count': int,
1170 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1171 'playable_in_embed': True,
1172 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1173 'view_count': int,
1174 'track': 'The Spark',
1175 'live_status': 'not_live',
1176 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1177 'channel': 'Afrojack',
1178 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
1179 'tags': 'count:19',
1180 'availability': 'public',
1181 'categories': ['Music'],
1182 'age_limit': 0,
1183 'alt_title': 'The Spark',
6c73052c 1184 'channel_follower_count': int
8bdd16b4 1185 },
1186 'params': {
1187 'youtube_include_dash_manifest': True,
1188 'format': '141/bestaudio[ext=m4a]',
1189 },
1190 },
65c2fde2 1191 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1192 {
65c2fde2 1193 'note': 'Embed allowed age-gate video',
2d3d2997 1194 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1195 'info_dict': {
1196 'id': 'HtVdAasjOgU',
1197 'ext': 'mp4',
1198 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1199 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1200 'duration': 142,
c522adb1
JMF
1201 'uploader': 'The Witcher',
1202 'uploader_id': 'WitcherGame',
ec85ded8 1203 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1204 'upload_date': '20140605',
34952f09 1205 'age_limit': 18,
976ae3ea 1206 'categories': ['Gaming'],
1207 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1208 'availability': 'needs_auth',
1209 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1210 'like_count': int,
1211 'channel': 'The Witcher',
1212 'live_status': 'not_live',
1213 'tags': 'count:17',
1214 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1215 'playable_in_embed': True,
1216 'view_count': int,
6c73052c 1217 'channel_follower_count': int
c522adb1
JMF
1218 },
1219 },
65c2fde2 1220 {
1221 'note': 'Age-gate video with embed allowed in public site',
1222 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1223 'info_dict': {
1224 'id': 'HsUATh_Nc2U',
1225 'ext': 'mp4',
1226 'title': 'Godzilla 2 (Official Video)',
1227 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1228 'upload_date': '20200408',
1229 'uploader_id': 'FlyingKitty900',
1230 'uploader': 'FlyingKitty',
1231 'age_limit': 18,
976ae3ea 1232 'availability': 'needs_auth',
1233 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1234 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
1235 'channel': 'FlyingKitty',
1236 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1237 'view_count': int,
1238 'categories': ['Entertainment'],
1239 'live_status': 'not_live',
1240 'tags': ['Flyingkitty', 'godzilla 2'],
1241 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1242 'like_count': int,
1243 'duration': 177,
1244 'playable_in_embed': True,
6c73052c 1245 'channel_follower_count': int
65c2fde2 1246 },
1247 },
1248 {
1249 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1250 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1251 'info_dict': {
1252 'id': 'Tq92D6wQ1mg',
1253 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1254 'ext': 'mp4',
17322130 1255 'upload_date': '20191228',
65c2fde2 1256 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1257 'uploader': 'Projekt Melody',
1258 'description': 'md5:17eccca93a786d51bc67646756894066',
1259 'age_limit': 18,
976ae3ea 1260 'like_count': int,
1261 'availability': 'needs_auth',
1262 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1263 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1264 'view_count': int,
1265 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1266 'channel': 'Projekt Melody',
1267 'live_status': 'not_live',
1268 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1269 'playable_in_embed': True,
1270 'categories': ['Entertainment'],
1271 'duration': 106,
1272 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
12a1b225 1273 'comment_count': int,
6c73052c 1274 'channel_follower_count': int
65c2fde2 1275 },
1276 },
1277 {
1278 'note': 'Non-Agegated non-embeddable video',
1279 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1280 'info_dict': {
1281 'id': 'MeJVWBSsPAY',
1282 'ext': 'mp4',
1283 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1284 'uploader': 'Herr Lurik',
1285 'uploader_id': 'st3in234',
1286 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1287 'upload_date': '20130730',
976ae3ea 1288 'track': 'Such mich find mich',
1289 'age_limit': 0,
1290 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1291 'like_count': int,
1292 'playable_in_embed': False,
1293 'creator': 'OOMPH!',
1294 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1295 'view_count': int,
1296 'alt_title': 'Such mich find mich',
1297 'duration': 210,
1298 'channel': 'Herr Lurik',
1299 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1300 'categories': ['Music'],
1301 'availability': 'public',
1302 'uploader_url': 'http://www.youtube.com/user/st3in234',
1303 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1304 'live_status': 'not_live',
1305 'artist': 'OOMPH!',
6c73052c 1306 'channel_follower_count': int
65c2fde2 1307 },
1308 },
1309 {
1310 'note': 'Non-bypassable age-gated video',
1311 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1312 'only_matching': True,
1313 },
8bdd16b4 1314 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1315 # YouTube Red ad is not captured for creator
1316 {
1317 'url': '__2ABJjxzNo',
1318 'info_dict': {
1319 'id': '__2ABJjxzNo',
1320 'ext': 'mp4',
1321 'duration': 266,
1322 'upload_date': '20100430',
1323 'uploader_id': 'deadmau5',
1324 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1325 'creator': 'deadmau5',
1326 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1327 'uploader': 'deadmau5',
1328 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1329 'alt_title': 'Some Chords',
976ae3ea 1330 'availability': 'public',
1331 'tags': 'count:14',
1332 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1333 'view_count': int,
1334 'live_status': 'not_live',
1335 'channel': 'deadmau5',
1336 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1337 'like_count': int,
1338 'track': 'Some Chords',
1339 'artist': 'deadmau5',
1340 'playable_in_embed': True,
1341 'age_limit': 0,
1342 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1343 'categories': ['Music'],
1344 'album': 'Some Chords',
6c73052c 1345 'channel_follower_count': int
8bdd16b4 1346 },
1347 'expected_warnings': [
1348 'DASH manifest missing',
1349 ]
1350 },
067aa17e 1351 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1352 {
1353 'url': 'lqQg6PlCWgI',
1354 'info_dict': {
1355 'id': 'lqQg6PlCWgI',
1356 'ext': 'mp4',
556dbe7f 1357 'duration': 6085,
90227264 1358 'upload_date': '20150827',
cbe2bd91 1359 'uploader_id': 'olympic',
ec85ded8 1360 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
12a1b225 1361 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
11f9be09 1362 'uploader': 'Olympics',
cbe2bd91 1363 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
976ae3ea 1364 'like_count': int,
1365 'release_timestamp': 1343767800,
1366 'playable_in_embed': True,
1367 'categories': ['Sports'],
1368 'release_date': '20120731',
1369 'channel': 'Olympics',
1370 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1371 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1372 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1373 'age_limit': 0,
1374 'availability': 'public',
1375 'live_status': 'was_live',
1376 'view_count': int,
1377 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
6c73052c 1378 'channel_follower_count': int
cbe2bd91
PH
1379 },
1380 'params': {
1381 'skip_download': 'requires avconv',
e52a40ab 1382 }
cbe2bd91 1383 },
6271f1ca
PH
1384 # Non-square pixels
1385 {
1386 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1387 'info_dict': {
1388 'id': '_b-2C3KPAM0',
1389 'ext': 'mp4',
1390 'stretched_ratio': 16 / 9.,
556dbe7f 1391 'duration': 85,
6271f1ca
PH
1392 'upload_date': '20110310',
1393 'uploader_id': 'AllenMeow',
ec85ded8 1394 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1395 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1396 'uploader': '孫ᄋᄅ',
6271f1ca 1397 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
976ae3ea 1398 'playable_in_embed': True,
1399 'channel': '孫ᄋᄅ',
1400 'age_limit': 0,
1401 'tags': 'count:11',
1402 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1403 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1404 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1405 'view_count': int,
1406 'categories': ['People & Blogs'],
1407 'like_count': int,
1408 'live_status': 'not_live',
1409 'availability': 'unlisted',
12a1b225 1410 'comment_count': int,
6c73052c 1411 'channel_follower_count': int
6271f1ca 1412 },
06b491eb
S
1413 },
1414 # url_encoded_fmt_stream_map is empty string
1415 {
1416 'url': 'qEJwOuvDf7I',
1417 'info_dict': {
1418 'id': 'qEJwOuvDf7I',
f57b7835 1419 'ext': 'webm',
06b491eb
S
1420 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1421 'description': '',
1422 'upload_date': '20150404',
1423 'uploader_id': 'spbelect',
1424 'uploader': 'Наблюдатели Петербурга',
1425 },
1426 'params': {
1427 'skip_download': 'requires avconv',
e323cf3f
S
1428 },
1429 'skip': 'This live event has ended.',
06b491eb 1430 },
067aa17e 1431 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1432 {
1433 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1434 'info_dict': {
1435 'id': 'FIl7x6_3R5Y',
eb6793ba 1436 'ext': 'webm',
da77d856
S
1437 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1438 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1439 'duration': 220,
da77d856
S
1440 'upload_date': '20150625',
1441 'uploader_id': 'dorappi2000',
ec85ded8 1442 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1443 'uploader': 'dorappi2000',
eb6793ba 1444 'formats': 'mincount:31',
da77d856 1445 },
eb6793ba 1446 'skip': 'not actual anymore',
2ee8f5d8 1447 },
8a1a26ce
YCH
1448 # DASH manifest with segment_list
1449 {
1450 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1451 'md5': '8ce563a1d667b599d21064e982ab9e31',
1452 'info_dict': {
1453 'id': 'CsmdDsKjzN8',
1454 'ext': 'mp4',
17ee98e1 1455 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1456 'uploader': 'Airtek',
1457 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1458 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1459 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1460 },
1461 'params': {
1462 'youtube_include_dash_manifest': True,
1463 'format': '135', # bestvideo
be49068d
S
1464 },
1465 'skip': 'This live event has ended.',
2ee8f5d8 1466 },
cf7e015f
S
1467 {
1468 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1469 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1470 'info_dict': {
545cc85d 1471 'id': 'jvGDaLqkpTg',
1472 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1473 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1474 },
1475 'playlist': [{
1476 'info_dict': {
545cc85d 1477 'id': 'jvGDaLqkpTg',
cf7e015f 1478 'ext': 'mp4',
545cc85d 1479 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1480 'description': 'md5:e03b909557865076822aa169218d6a5d',
1481 'duration': 10643,
1482 'upload_date': '20161111',
1483 'uploader': 'Team PGP',
1484 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1485 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1486 },
1487 }, {
1488 'info_dict': {
545cc85d 1489 'id': '3AKt1R1aDnw',
cf7e015f 1490 'ext': 'mp4',
545cc85d 1491 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1492 'description': 'md5:e03b909557865076822aa169218d6a5d',
1493 'duration': 10991,
1494 'upload_date': '20161111',
1495 'uploader': 'Team PGP',
1496 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1497 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1498 },
1499 }, {
1500 'info_dict': {
545cc85d 1501 'id': 'RtAMM00gpVc',
cf7e015f 1502 'ext': 'mp4',
545cc85d 1503 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1504 'description': 'md5:e03b909557865076822aa169218d6a5d',
1505 'duration': 10995,
1506 'upload_date': '20161111',
1507 'uploader': 'Team PGP',
1508 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1509 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1510 },
1511 }, {
1512 'info_dict': {
545cc85d 1513 'id': '6N2fdlP3C5U',
cf7e015f 1514 'ext': 'mp4',
545cc85d 1515 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1516 'description': 'md5:e03b909557865076822aa169218d6a5d',
1517 'duration': 10990,
1518 'upload_date': '20161111',
1519 'uploader': 'Team PGP',
1520 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1521 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1522 },
1523 }],
1524 'params': {
1525 'skip_download': True,
1526 },
65c2fde2 1527 'skip': 'Not multifeed anymore',
cbaed4bb 1528 },
f9f49d87 1529 {
067aa17e 1530 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1531 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1532 'info_dict': {
1533 'id': 'gVfLd0zydlo',
1534 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1535 },
1536 'playlist_count': 2,
be49068d 1537 'skip': 'Not multifeed anymore',
f9f49d87 1538 },
cbaed4bb 1539 {
2d3d2997 1540 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1541 'only_matching': True,
0e49d9a6 1542 },
6d4fc66b 1543 {
2d3d2997 1544 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1545 'only_matching': True,
1546 },
0e49d9a6 1547 {
067aa17e 1548 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1549 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1550 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1551 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1552 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1553 'info_dict': {
1554 'id': 'lsguqyKfVQg',
1555 'ext': 'mp4',
1556 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1557 'alt_title': 'Dark Walk',
0e49d9a6 1558 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1559 'duration': 133,
0e49d9a6
LL
1560 'upload_date': '20151119',
1561 'uploader_id': 'IronSoulElf',
ec85ded8 1562 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1563 'uploader': 'IronSoulElf',
11f9be09 1564 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1565 'track': 'Dark Walk',
1566 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1567 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
976ae3ea 1568 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1569 'categories': ['Film & Animation'],
1570 'view_count': int,
1571 'live_status': 'not_live',
1572 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1573 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1574 'tags': 'count:13',
1575 'availability': 'public',
1576 'channel': 'IronSoulElf',
1577 'playable_in_embed': True,
1578 'like_count': int,
1579 'age_limit': 0,
6c73052c 1580 'channel_follower_count': int
0e49d9a6
LL
1581 },
1582 'params': {
1583 'skip_download': True,
1584 },
1585 },
61f92af1 1586 {
067aa17e 1587 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1588 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1589 'only_matching': True,
1590 },
313dfc45
LL
1591 {
1592 # Video with yt:stretch=17:0
1593 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1594 'info_dict': {
1595 'id': 'Q39EVAstoRM',
1596 'ext': 'mp4',
1597 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1598 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1599 'upload_date': '20151107',
1600 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1601 'uploader': 'CH GAMER DROID',
1602 },
1603 'params': {
1604 'skip_download': True,
1605 },
be49068d 1606 'skip': 'This video does not exist.',
313dfc45 1607 },
201c1459 1608 {
1609 # Video with incomplete 'yt:stretch=16:'
1610 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1611 'only_matching': True,
1612 },
7caf9830
S
1613 {
1614 # Video licensed under Creative Commons
1615 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1616 'info_dict': {
1617 'id': 'M4gD1WSo5mA',
1618 'ext': 'mp4',
1619 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1620 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1621 'duration': 721,
17322130 1622 'upload_date': '20150128',
7caf9830 1623 'uploader_id': 'BerkmanCenter',
ec85ded8 1624 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1625 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830 1626 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1627 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1628 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1629 'like_count': int,
1630 'age_limit': 0,
1631 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1632 'channel': 'The Berkman Klein Center for Internet & Society',
1633 'availability': 'public',
1634 'view_count': int,
1635 'categories': ['Education'],
1636 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1637 'live_status': 'not_live',
1638 'playable_in_embed': True,
12a1b225 1639 'comment_count': int,
6c73052c 1640 'channel_follower_count': int
7caf9830
S
1641 },
1642 'params': {
1643 'skip_download': True,
1644 },
1645 },
fd050249
S
1646 {
1647 # Channel-like uploader_url
1648 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1649 'info_dict': {
1650 'id': 'eQcmzGIKrzg',
1651 'ext': 'mp4',
1652 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1653 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1654 'duration': 4060,
17322130 1655 'upload_date': '20151120',
eb6793ba 1656 'uploader': 'Bernie Sanders',
fd050249 1657 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1658 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249 1659 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1660 'playable_in_embed': True,
1661 'tags': 'count:12',
1662 'like_count': int,
1663 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1664 'age_limit': 0,
1665 'availability': 'public',
1666 'categories': ['News & Politics'],
1667 'channel': 'Bernie Sanders',
1668 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1669 'view_count': int,
1670 'live_status': 'not_live',
1671 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
12a1b225 1672 'comment_count': int,
6c73052c 1673 'channel_follower_count': int
fd050249
S
1674 },
1675 'params': {
1676 'skip_download': True,
1677 },
1678 },
040ac686
S
1679 {
1680 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1681 'only_matching': True,
7f29cf54
S
1682 },
1683 {
067aa17e 1684 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1685 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1686 'only_matching': True,
6496ccb4
S
1687 },
1688 {
1689 # Rental video preview
1690 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1691 'info_dict': {
1692 'id': 'uGpuVWrhIzE',
1693 'ext': 'mp4',
1694 'title': 'Piku - Trailer',
1695 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1696 'upload_date': '20150811',
1697 'uploader': 'FlixMatrix',
1698 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1699 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1700 'license': 'Standard YouTube License',
1701 },
1702 'params': {
1703 'skip_download': True,
1704 },
eb6793ba 1705 'skip': 'This video is not available.',
022a5d66 1706 },
12afdc2a
S
1707 {
1708 # YouTube Red video with episode data
1709 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1710 'info_dict': {
1711 'id': 'iqKdEhx-dD4',
1712 'ext': 'mp4',
1713 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1714 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1715 'duration': 2085,
12afdc2a
S
1716 'upload_date': '20170118',
1717 'uploader': 'Vsauce',
1718 'uploader_id': 'Vsauce',
1719 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1720 'series': 'Mind Field',
1721 'season_number': 1,
1722 'episode_number': 1,
976ae3ea 1723 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1724 'tags': 'count:12',
1725 'view_count': int,
1726 'availability': 'public',
1727 'age_limit': 0,
1728 'channel': 'Vsauce',
1729 'episode': 'Episode 1',
1730 'categories': ['Entertainment'],
1731 'season': 'Season 1',
1732 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1733 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1734 'like_count': int,
1735 'playable_in_embed': True,
1736 'live_status': 'not_live',
6c73052c 1737 'channel_follower_count': int
12afdc2a
S
1738 },
1739 'params': {
1740 'skip_download': True,
1741 },
1742 'expected_warnings': [
1743 'Skipping DASH manifest',
1744 ],
1745 },
c7121fa7
S
1746 {
1747 # The following content has been identified by the YouTube community
1748 # as inappropriate or offensive to some audiences.
1749 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1750 'info_dict': {
1751 'id': '6SJNVb0GnPI',
1752 'ext': 'mp4',
1753 'title': 'Race Differences in Intelligence',
1754 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1755 'duration': 965,
1756 'upload_date': '20140124',
1757 'uploader': 'New Century Foundation',
1758 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1759 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1760 },
1761 'params': {
1762 'skip_download': True,
1763 },
545cc85d 1764 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1765 },
022a5d66
S
1766 {
1767 # itag 212
1768 'url': '1t24XAntNCY',
1769 'only_matching': True,
fd5c4aab
S
1770 },
1771 {
1772 # geo restricted to JP
1773 'url': 'sJL6WA-aGkQ',
1774 'only_matching': True,
1775 },
cd5a74a2
S
1776 {
1777 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1778 'only_matching': True,
1779 },
bc2ca1bb 1780 {
1781 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1782 'only_matching': True,
1783 },
1784 {
1785 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1786 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1787 'only_matching': True,
1788 },
825cd268
RA
1789 {
1790 # DRM protected
1791 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1792 'only_matching': True,
4fe54c12
S
1793 },
1794 {
1795 # Video with unsupported adaptive stream type formats
1796 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1797 'info_dict': {
1798 'id': 'Z4Vy8R84T1U',
1799 'ext': 'mp4',
1800 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1801 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1802 'duration': 433,
1803 'upload_date': '20130923',
1804 'uploader': 'Amelia Putri Harwita',
1805 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1806 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1807 'formats': 'maxcount:10',
1808 },
1809 'params': {
1810 'skip_download': True,
1811 'youtube_include_dash_manifest': False,
1812 },
5429d6a9 1813 'skip': 'not actual anymore',
5caabd3c 1814 },
1815 {
822b9d9c 1816 # Youtube Music Auto-generated description
5caabd3c 1817 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1818 'info_dict': {
1819 'id': 'MgNrAu2pzNs',
1820 'ext': 'mp4',
1821 'title': 'Voyeur Girl',
1822 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1823 'upload_date': '20190312',
5429d6a9
S
1824 'uploader': 'Stephen - Topic',
1825 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1826 'artist': 'Stephen',
1827 'track': 'Voyeur Girl',
1828 'album': 'it\'s too much love to know my dear',
1829 'release_date': '20190313',
1830 'release_year': 2019,
976ae3ea 1831 'alt_title': 'Voyeur Girl',
1832 'view_count': int,
1833 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1834 'playable_in_embed': True,
1835 'like_count': int,
1836 'categories': ['Music'],
1837 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1838 'channel': 'Stephen',
1839 'availability': 'public',
1840 'creator': 'Stephen',
1841 'duration': 169,
1842 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
1843 'age_limit': 0,
1844 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1845 'tags': 'count:11',
1846 'live_status': 'not_live',
6c73052c 1847 'channel_follower_count': int
5caabd3c 1848 },
1849 'params': {
1850 'skip_download': True,
1851 },
1852 },
66b48727
RA
1853 {
1854 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1855 'only_matching': True,
1856 },
011e75e6
S
1857 {
1858 # invalid -> valid video id redirection
1859 'url': 'DJztXj2GPfl',
1860 'info_dict': {
1861 'id': 'DJztXj2GPfk',
1862 'ext': 'mp4',
1863 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1864 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1865 'upload_date': '20090125',
1866 'uploader': 'Prochorowka',
1867 'uploader_id': 'Prochorowka',
1868 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1869 'artist': 'Panjabi MC',
1870 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1871 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1872 },
1873 'params': {
1874 'skip_download': True,
1875 },
545cc85d 1876 'skip': 'Video unavailable',
ea74e00b
DP
1877 },
1878 {
1879 # empty description results in an empty string
1880 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1881 'info_dict': {
1882 'id': 'x41yOUIvK2k',
1883 'ext': 'mp4',
1884 'title': 'IMG 3456',
1885 'description': '',
1886 'upload_date': '20170613',
1887 'uploader_id': 'ElevageOrVert',
1888 'uploader': 'ElevageOrVert',
976ae3ea 1889 'view_count': int,
1890 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
1891 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
1892 'like_count': int,
1893 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
1894 'tags': [],
1895 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
1896 'availability': 'public',
1897 'age_limit': 0,
1898 'categories': ['Pets & Animals'],
1899 'duration': 7,
1900 'playable_in_embed': True,
1901 'live_status': 'not_live',
1902 'channel': 'ElevageOrVert',
6c73052c 1903 'channel_follower_count': int
ea74e00b
DP
1904 },
1905 'params': {
1906 'skip_download': True,
1907 },
1908 },
a0566bbf 1909 {
29f7c58a 1910 # with '};' inside yt initial data (see [1])
1911 # see [2] for an example with '};' inside ytInitialPlayerResponse
1912 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1913 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1914 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1915 'info_dict': {
1916 'id': 'CHqg6qOn4no',
1917 'ext': 'mp4',
1918 'title': 'Part 77 Sort a list of simple types in c#',
1919 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1920 'upload_date': '20130831',
1921 'uploader_id': 'kudvenkat',
1922 'uploader': 'kudvenkat',
976ae3ea 1923 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
1924 'like_count': int,
1925 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
1926 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
1927 'live_status': 'not_live',
1928 'categories': ['Education'],
1929 'availability': 'public',
1930 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
1931 'tags': 'count:12',
1932 'playable_in_embed': True,
1933 'age_limit': 0,
1934 'view_count': int,
1935 'duration': 522,
1936 'channel': 'kudvenkat',
12a1b225 1937 'comment_count': int,
6c73052c 1938 'channel_follower_count': int
a0566bbf 1939 },
1940 'params': {
1941 'skip_download': True,
1942 },
1943 },
29f7c58a 1944 {
1945 # another example of '};' in ytInitialData
1946 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1947 'only_matching': True,
1948 },
1949 {
1950 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1951 'only_matching': True,
1952 },
545cc85d 1953 {
cc2db878 1954 # https://github.com/ytdl-org/youtube-dl/pull/28094
1955 'url': 'OtqTfy26tG0',
1956 'info_dict': {
1957 'id': 'OtqTfy26tG0',
1958 'ext': 'mp4',
1959 'title': 'Burn Out',
1960 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1961 'upload_date': '20141120',
1962 'uploader': 'The Cinematic Orchestra - Topic',
1963 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1964 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1965 'artist': 'The Cinematic Orchestra',
1966 'track': 'Burn Out',
1967 'album': 'Every Day',
976ae3ea 1968 'like_count': int,
1969 'live_status': 'not_live',
1970 'alt_title': 'Burn Out',
1971 'duration': 614,
1972 'age_limit': 0,
1973 'view_count': int,
1974 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1975 'creator': 'The Cinematic Orchestra',
1976 'channel': 'The Cinematic Orchestra',
1977 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
1978 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1979 'availability': 'public',
1980 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
1981 'categories': ['Music'],
1982 'playable_in_embed': True,
6c73052c 1983 'channel_follower_count': int
cc2db878 1984 },
1985 'params': {
1986 'skip_download': True,
1987 },
545cc85d 1988 },
bc2ca1bb 1989 {
1990 # controversial video, only works with bpctr when authenticated with cookies
1991 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1992 'only_matching': True,
1993 },
a1a7907b 1994 {
1995 # controversial video, requires bpctr/contentCheckOk
1996 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1997 'info_dict': {
1998 'id': 'SZJvDhaSDnc',
1999 'ext': 'mp4',
2000 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2001 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
976ae3ea 2002 'uploader': 'CBS Mornings',
11f9be09 2003 'uploader_id': 'CBSThisMorning',
a1a7907b 2004 'upload_date': '20140716',
976ae3ea 2005 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2006 'duration': 170,
2007 'categories': ['News & Politics'],
2008 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
2009 'view_count': int,
2010 'channel': 'CBS Mornings',
2011 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2012 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2013 'age_limit': 18,
2014 'availability': 'needs_auth',
2015 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2016 'like_count': int,
2017 'live_status': 'not_live',
2018 'playable_in_embed': True,
6c73052c 2019 'channel_follower_count': int
a1a7907b 2020 }
2021 },
f7ad7160 2022 {
2023 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2024 'url': 'cBvYw8_A0vQ',
2025 'info_dict': {
2026 'id': 'cBvYw8_A0vQ',
2027 'ext': 'mp4',
2028 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2029 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2030 'upload_date': '20201120',
2031 'uploader': 'Walk around Japan',
2032 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2033 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
976ae3ea 2034 'duration': 1456,
2035 'categories': ['Travel & Events'],
2036 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2037 'view_count': int,
2038 'channel': 'Walk around Japan',
2039 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2040 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2041 'age_limit': 0,
2042 'availability': 'public',
2043 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2044 'live_status': 'not_live',
2045 'playable_in_embed': True,
6c73052c 2046 'channel_follower_count': int
f7ad7160 2047 },
2048 'params': {
2049 'skip_download': True,
2050 },
0fb983f6 2051 }, {
2052 # Has multiple audio streams
2053 'url': 'WaOKSUlf4TM',
2054 'only_matching': True
9297939e 2055 }, {
2056 # Requires Premium: has format 141 when requested using YTM url
2057 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2058 'only_matching': True
2059 }, {
120916da 2060 # multiple subtitles with same lang_code
2061 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2062 'only_matching': True,
109dd3b2 2063 }, {
2064 # Force use android client fallback
2065 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2066 'info_dict': {
2067 'id': 'YOelRv7fMxY',
11f9be09 2068 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 2069 'ext': '3gp',
2070 'upload_date': '20210624',
2071 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2072 'uploader': 'colinfurze',
11f9be09 2073 'uploader_id': 'colinfurze',
109dd3b2 2074 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
976ae3ea 2075 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2076 'duration': 596,
2077 'categories': ['Entertainment'],
2078 'uploader_url': 'http://www.youtube.com/user/colinfurze',
2079 'view_count': int,
2080 'channel': 'colinfurze',
2081 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2082 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2083 'age_limit': 0,
2084 'availability': 'public',
2085 'like_count': int,
2086 'live_status': 'not_live',
2087 'playable_in_embed': True,
6c73052c 2088 'channel_follower_count': int
109dd3b2 2089 },
2090 'params': {
2091 'format': '17', # 3gp format available on android
2092 'extractor_args': {'youtube': {'player_client': ['android']}},
2093 },
120916da 2094 },
109dd3b2 2095 {
2096 # Skip download of additional client configs (remix client config in this case)
2097 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2098 'only_matching': True,
2099 'params': {
2100 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2101 },
8fc54b12 2102 }, {
2103 # shorts
2104 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2105 'only_matching': True,
9222c381 2106 }, {
2107 'note': 'Storyboards',
2108 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2109 'info_dict': {
2110 'id': '5KLPxDtMqe8',
2111 'ext': 'mhtml',
2112 'format_id': 'sb0',
2113 'title': 'Your Brain is Plastic',
2114 'uploader_id': 'scishow',
2115 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2116 'upload_date': '20140324',
2117 'uploader': 'SciShow',
976ae3ea 2118 'like_count': int,
2119 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2120 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2121 'view_count': int,
2122 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2123 'playable_in_embed': True,
2124 'tags': 'count:12',
2125 'uploader_url': 'http://www.youtube.com/user/scishow',
2126 'availability': 'public',
2127 'channel': 'SciShow',
2128 'live_status': 'not_live',
2129 'duration': 248,
2130 'categories': ['Education'],
2131 'age_limit': 0,
6c73052c 2132 'channel_follower_count': int
9222c381 2133 }, 'params': {'format': 'mhtml', 'skip_download': True}
992f9a73 2134 }, {
2135 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2136 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2137 'info_dict': {
2138 'id': '2NUZ8W2llS4',
2139 'ext': 'mp4',
2140 'title': 'The NP that test your phone performance 🙂',
2141 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2142 'uploader': 'Leon Nguyen',
2143 'uploader_id': 'VNSXIII',
2144 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2145 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2146 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2147 'duration': 21,
2148 'view_count': int,
2149 'age_limit': 0,
2150 'categories': ['Gaming'],
2151 'tags': 'count:23',
2152 'playable_in_embed': True,
2153 'live_status': 'not_live',
2154 'upload_date': '20220103',
2155 'like_count': int,
2156 'availability': 'public',
2157 'channel': 'Leon Nguyen',
2158 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
12a1b225 2159 'comment_count': int,
992f9a73 2160 'channel_follower_count': int
2161 }
2162 }, {
2163 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2164 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2165 'info_dict': {
2166 'id': 'mzZzzBU6lrM',
2167 'ext': 'mp4',
2168 'title': 'I Met GeorgeNotFound In Real Life...',
2169 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
2170 'uploader': 'Quackity',
2171 'uploader_id': 'QuackityHQ',
2172 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
2173 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2174 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2175 'duration': 955,
2176 'view_count': int,
2177 'age_limit': 0,
2178 'categories': ['Entertainment'],
2179 'tags': 'count:26',
2180 'playable_in_embed': True,
2181 'live_status': 'not_live',
2182 'release_timestamp': 1641172509,
2183 'release_date': '20220103',
2184 'upload_date': '20220103',
2185 'like_count': int,
2186 'availability': 'public',
2187 'channel': 'Quackity',
2188 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2189 'channel_follower_count': int
2190 }
2191 },
2192 { # continuous livestream. Microformat upload date should be preferred.
2193 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2194 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2195 'info_dict': {
2196 'id': 'kgx4WGK0oNU',
2197 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2198 'ext': 'mp4',
2199 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2200 'availability': 'public',
2201 'age_limit': 0,
2202 'release_timestamp': 1637975704,
2203 'upload_date': '20210619',
2204 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2205 'live_status': 'is_live',
2206 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
2207 'uploader': '阿鲍Abao',
2208 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2209 'channel': 'Abao in Tokyo',
2210 'channel_follower_count': int,
2211 'release_date': '20211127',
2212 'tags': 'count:39',
2213 'categories': ['People & Blogs'],
2214 'like_count': int,
2215 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2216 'view_count': int,
2217 'playable_in_embed': True,
2218 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
2219 },
2220 'params': {'skip_download': True}
6e634cbe 2221 }, {
2222 # Story. Requires specific player params to work.
ee27297f 2223 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
6e634cbe 2224 'info_dict': {
ee27297f 2225 'id': 'vv8qTUWmulI',
6e634cbe 2226 'ext': 'mp4',
ee27297f 2227 'availability': 'unlisted',
2228 'view_count': int,
2229 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
2230 'upload_date': '20220526',
2231 'categories': ['Education'],
2232 'title': 'Story',
2233 'channel': 'IT\'S HISTORY',
2234 'description': '',
2235 'uploader_id': 'BlastfromthePast',
2236 'duration': 12,
2237 'uploader': 'IT\'S HISTORY',
6e634cbe 2238 'playable_in_embed': True,
6e634cbe 2239 'age_limit': 0,
6e634cbe 2240 'live_status': 'not_live',
ee27297f 2241 'tags': [],
2242 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
2243 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
2244 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
12a1b225
A
2245 },
2246 'skip': 'stories get removed after some period of time',
ee27297f 2247 }, {
2248 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2249 'info_dict': {
2250 'id': 'tjjjtzRLHvA',
2251 'ext': 'mp4',
2252 'title': 'ハッシュタグ無し };if window.ytcsi',
2253 'upload_date': '20220323',
2254 'like_count': int,
2255 'availability': 'unlisted',
2256 'channel': 'nao20010128nao',
2257 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
2258 'age_limit': 0,
2259 'uploader': 'nao20010128nao',
2260 'uploader_id': 'nao20010128nao',
2261 'categories': ['Music'],
6e634cbe 2262 'view_count': int,
2263 'description': '',
ee27297f 2264 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2265 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2266 'live_status': 'not_live',
2267 'playable_in_embed': True,
2268 'channel_follower_count': int,
2269 'duration': 6,
2270 'tags': [],
2271 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
6e634cbe 2272 }
a4166234 2273 }, {
2274 'note': '6 channel audio',
2275 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2276 'only_matching': True,
6e634cbe 2277 }
2eb88d95
PH
2278 ]
2279
f2e8dbcc 2280 _WEBPAGE_TESTS = [
2281 # YouTube <object> embed
2282 {
2283 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2284 'md5': '873c81d308b979f0e23ee7e620b312a3',
2285 'info_dict': {
2286 'id': 'msN87y-iEx0',
2287 'ext': 'mp4',
2288 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2289 'upload_date': '20080526',
2290 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2291 'uploader': 'Christopher Sykes',
2292 'uploader_id': 'ChristopherJSykes',
2293 'age_limit': 0,
2294 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2295 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2296 'playable_in_embed': True,
2297 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2298 'like_count': int,
2299 'comment_count': int,
2300 'channel': 'Christopher Sykes',
2301 'live_status': 'not_live',
2302 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2303 'availability': 'public',
2304 'duration': 195,
2305 'view_count': int,
2306 'categories': ['Science & Technology'],
2307 'channel_follower_count': int,
2308 'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
2309 },
2310 'params': {
2311 'skip_download': True,
2312 }
2313 },
2314 ]
2315
201c1459 2316 @classmethod
2317 def suitable(cls, url):
4dfbf869 2318 from ..utils import parse_qs
2319
201c1459 2320 qs = parse_qs(url)
2321 if qs.get('list', [None])[0]:
2322 return False
86e5f3ed 2323 return super().suitable(url)
201c1459 2324
e0df6211 2325 def __init__(self, *args, **kwargs):
86e5f3ed 2326 super().__init__(*args, **kwargs)
545cc85d 2327 self._code_cache = {}
83799698 2328 self._player_cache = {}
e0df6211 2329
adbc4ec4 2330 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
adbc4ec4
THD
2331 lock = threading.Lock()
2332
2333 is_live = True
185bf310 2334 start_time = time.time()
adbc4ec4
THD
2335 formats = [f for f in formats if f.get('is_from_start')]
2336
185bf310 2337 def refetch_manifest(format_id, delay):
2338 nonlocal formats, start_time, is_live
2339 if time.time() <= start_time + delay:
adbc4ec4
THD
2340 return
2341
2342 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2343 video_details = traverse_obj(
2344 prs, (..., 'videoDetails'), expected_type=dict, default=[])
2345 microformats = traverse_obj(
2346 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2347 expected_type=dict, default=[])
c646d76f 2348 _, is_live, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
185bf310 2349 start_time = time.time()
adbc4ec4 2350
185bf310 2351 def mpd_feed(format_id, delay):
adbc4ec4
THD
2352 """
2353 @returns (manifest_url, manifest_stream_number, is_live) or None
2354 """
2355 with lock:
185bf310 2356 refetch_manifest(format_id, delay)
adbc4ec4
THD
2357
2358 f = next((f for f in formats if f['format_id'] == format_id), None)
2359 if not f:
185bf310 2360 if not is_live:
2361 self.to_screen(f'{video_id}: Video is no longer live')
2362 else:
2363 self.report_warning(
2364 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
adbc4ec4
THD
2365 return None
2366 return f['manifest_url'], f['manifest_stream_number'], is_live
2367
2368 for f in formats:
a539f065 2369 f['is_live'] = True
adbc4ec4
THD
2370 f['protocol'] = 'http_dash_segments_generator'
2371 f['fragments'] = functools.partial(
2372 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
2373
2374 def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
2375 FETCH_SPAN, MAX_DURATION = 5, 432000
2376
2377 mpd_url, stream_number, is_live = None, None, True
2378
2379 begin_index = 0
2380 download_start_time = ctx.get('start') or time.time()
2381
2382 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2383 if lack_early_segments:
2384 self.report_warning(bug_reports_message(
2385 'Starting download from the last 120 hours of the live stream since '
2386 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2387 lack_early_segments = True
2388
2389 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2390 fragments, fragment_base_url = None, None
2391
a539f065 2392 def _extract_sequence_from_mpd(refresh_sequence, immediate):
adbc4ec4
THD
2393 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2394 # Obtain from MPD's maximum seq value
2395 old_mpd_url = mpd_url
185bf310 2396 last_error = ctx.pop('last_error', None)
14f25df2 2397 expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
185bf310 2398 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2399 or (mpd_url, stream_number, False))
2400 if not refresh_sequence:
2401 if expire_fast and not is_live:
2402 return False, last_seq
2403 elif old_mpd_url == mpd_url:
2404 return True, last_seq
adbc4ec4
THD
2405 try:
2406 fmts, _ = self._extract_mpd_formats_and_subtitles(
2407 mpd_url, None, note=False, errnote=False, fatal=False)
2408 except ExtractorError:
2409 fmts = None
2410 if not fmts:
a539f065 2411 no_fragment_score += 2
adbc4ec4
THD
2412 return False, last_seq
2413 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2414 fragments = fmt_info['fragments']
2415 fragment_base_url = fmt_info['fragment_base_url']
2416 assert fragment_base_url
2417
2418 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2419 return True, _last_seq
2420
2421 while is_live:
2422 fetch_time = time.time()
2423 if no_fragment_score > 30:
2424 return
2425 if last_segment_url:
2426 # Obtain from "X-Head-Seqnum" header value from each segment
2427 try:
2428 urlh = self._request_webpage(
2429 last_segment_url, None, note=False, errnote=False, fatal=False)
2430 except ExtractorError:
2431 urlh = None
2432 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2433 if last_seq is None:
a539f065 2434 no_fragment_score += 2
adbc4ec4
THD
2435 last_segment_url = None
2436 continue
2437 else:
a539f065
LNO
2438 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2439 no_fragment_score += 2
185bf310 2440 if not should_continue:
adbc4ec4
THD
2441 continue
2442
2443 if known_idx > last_seq:
2444 last_segment_url = None
2445 continue
2446
2447 last_seq += 1
2448
2449 if begin_index < 0 and known_idx < 0:
2450 # skip from the start when it's negative value
2451 known_idx = last_seq + begin_index
2452 if lack_early_segments:
2453 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2454 try:
2455 for idx in range(known_idx, last_seq):
2456 # do not update sequence here or you'll get skipped some part of it
a539f065 2457 should_continue, _ = _extract_sequence_from_mpd(False, False)
185bf310 2458 if not should_continue:
adbc4ec4
THD
2459 known_idx = idx - 1
2460 raise ExtractorError('breaking out of outer loop')
2461 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2462 yield {
2463 'url': last_segment_url,
36195c44 2464 'fragment_count': last_seq,
adbc4ec4
THD
2465 }
2466 if known_idx == last_seq:
2467 no_fragment_score += 5
2468 else:
2469 no_fragment_score = 0
2470 known_idx = last_seq
2471 except ExtractorError:
2472 continue
2473
2474 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2475
b6de707d 2476 def _extract_player_url(self, *ytcfgs, webpage=None):
2477 player_url = traverse_obj(
2478 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
14f25df2 2479 get_all=False, expected_type=str)
11f9be09 2480 if not player_url:
b6de707d 2481 return
60f393e4 2482 return urljoin('https://www.youtube.com', player_url)
109dd3b2 2483
b6de707d 2484 def _download_player_url(self, video_id, fatal=False):
2485 res = self._download_webpage(
2486 'https://www.youtube.com/iframe_api',
2487 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2488 if res:
2489 player_version = self._search_regex(
2490 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2491 if player_version:
2492 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2493
60064c53
PH
2494 def _signature_cache_id(self, example_sig):
2495 """ Return a string representation of a signature """
14f25df2 2496 return '.'.join(str(len(part)) for part in example_sig.split('.'))
60064c53 2497
e40c758c
S
2498 @classmethod
2499 def _extract_player_info(cls, player_url):
2500 for player_re in cls._PLAYER_INFO_RE:
2501 id_m = re.search(player_re, player_url)
2502 if id_m:
2503 break
2504 else:
c081b35c 2505 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 2506 return id_m.group('id')
e40c758c 2507
404f611f 2508 def _load_player(self, video_id, player_url, fatal=True):
109dd3b2 2509 player_id = self._extract_player_info(player_url)
2510 if player_id not in self._code_cache:
1276a43a 2511 code = self._download_webpage(
109dd3b2 2512 player_url, video_id, fatal=fatal,
2513 note='Downloading player ' + player_id,
2514 errnote='Download of %s failed' % player_url)
1276a43a 2515 if code:
2516 self._code_cache[player_id] = code
404f611f 2517 return self._code_cache.get(player_id)
109dd3b2 2518
e40c758c 2519 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 2520 player_id = self._extract_player_info(player_url)
e0df6211 2521
c4417ddb 2522 # Read from filesystem cache
86e5f3ed 2523 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
c4417ddb 2524 assert os.path.basename(func_id) == func_id
a0e07d31 2525
ae61d108 2526 self.write_debug(f'Extracting signature function {func_id}')
580ce007 2527 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
83799698 2528
580ce007 2529 if not cache_spec:
2530 code = self._load_player(video_id, player_url)
404f611f 2531 if code:
109dd3b2 2532 res = self._parse_sig_js(code)
ac668111 2533 test_string = ''.join(map(chr, range(len(example_sig))))
580ce007 2534 cache_spec = [ord(c) for c in res(test_string)]
9809740b 2535 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
580ce007 2536
2537 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 2538
60064c53 2539 def _print_sig_code(self, func, example_sig):
404f611f 2540 if not self.get_param('youtube_print_sig_code'):
2541 return
2542
edf3e38e
PH
2543 def gen_sig_code(idxs):
2544 def _genslice(start, end, step):
78caa52a 2545 starts = '' if start == 0 else str(start)
8bcc8756 2546 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 2547 steps = '' if step == 1 else (':%d' % step)
86e5f3ed 2548 return f's[{starts}{ends}{steps}]'
edf3e38e
PH
2549
2550 step = None
7af808a5
PH
2551 # Quelch pyflakes warnings - start will be set when step is set
2552 start = '(Never used)'
edf3e38e
PH
2553 for i, prev in zip(idxs[1:], idxs[:-1]):
2554 if step is not None:
2555 if i - prev == step:
2556 continue
2557 yield _genslice(start, prev, step)
2558 step = None
2559 continue
2560 if i - prev in [-1, 1]:
2561 step = i - prev
2562 start = prev
2563 continue
2564 else:
78caa52a 2565 yield 's[%d]' % prev
edf3e38e 2566 if step is None:
78caa52a 2567 yield 's[%d]' % i
edf3e38e
PH
2568 else:
2569 yield _genslice(start, i, step)
2570
ac668111 2571 test_string = ''.join(map(chr, range(len(example_sig))))
c705320f 2572 cache_res = func(test_string)
edf3e38e 2573 cache_spec = [ord(c) for c in cache_res]
78caa52a 2574 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53 2575 signature_id_tuple = '(%s)' % (
14f25df2 2576 ', '.join(str(len(p)) for p in example_sig.split('.')))
69ea8ca4 2577 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 2578 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 2579 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 2580
e0df6211
PH
2581 def _parse_sig_js(self, jscode):
2582 funcname = self._search_regex(
abefc03f
S
2583 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2584 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
2585 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2586 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2587 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2588 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 2589 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
2590 # Obsolete patterns
2591 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 2592 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
2593 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2594 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2595 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2596 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2597 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2598 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 2599 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
2600
2601 jsi = JSInterpreter(jscode)
2602 initial_function = jsi.extract_function(funcname)
e0df6211
PH
2603 return lambda s: initial_function([s])
2604
580ce007 2605 def _cached(self, func, *cache_id):
2606 def inner(*args, **kwargs):
2607 if cache_id not in self._player_cache:
2608 try:
2609 self._player_cache[cache_id] = func(*args, **kwargs)
2610 except ExtractorError as e:
2611 self._player_cache[cache_id] = e
2612 except Exception as e:
2613 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
2614
2615 ret = self._player_cache[cache_id]
2616 if isinstance(ret, Exception):
2617 raise ret
2618 return ret
2619 return inner
2620
545cc85d 2621 def _decrypt_signature(self, s, video_id, player_url):
257a2501 2622 """Turn the encrypted s field into a working signature"""
580ce007 2623 extract_sig = self._cached(
2624 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
2625 func = extract_sig(video_id, player_url, s)
2626 self._print_sig_code(func, s)
2627 return func(s)
404f611f 2628
2629 def _decrypt_nsig(self, s, video_id, player_url):
2630 """Turn the encrypted n field into a working signature"""
2631 if player_url is None:
2632 raise ExtractorError('Cannot decrypt nsig without player_url')
60f393e4 2633 player_url = urljoin('https://www.youtube.com', player_url)
404f611f 2634
580ce007 2635 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
2636 if self.get_param('youtube_print_sig_code'):
2637 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
404f611f 2638
25836db6 2639 try:
2640 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
2641 ret = extract_nsig(jsi, func_code)(s)
2642 except JSInterpreter.Exception as e:
2643 try:
992dc6b4 2644 jsi = PhantomJSwrapper(self, timeout=5000)
25836db6 2645 except ExtractorError:
2646 raise e
2647 self.report_warning(
2648 f'Native nsig extraction failed: Trying with PhantomJS\n'
2649 f' n = {s} ; player = {player_url}', video_id)
2650 self.write_debug(e)
2651
2652 args, func_body = func_code
2653 ret = jsi.execute(
2654 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
2655 video_id=video_id, note='Executing signature code').strip()
580ce007 2656
2657 self.write_debug(f'Decrypted nsig {s} => {ret}')
2658 return ret
2659
90a1df30 2660 def _extract_n_function_name(self, jscode):
2661 funcname, idx = self._search_regex(
2662 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
2663 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
2664 if not idx:
2665 return funcname
2666
2667 return json.loads(js_to_json(self._search_regex(
2668 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
2669 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
2670
580ce007 2671 def _extract_n_function_code(self, video_id, player_url):
404f611f 2672 player_id = self._extract_player_info(player_url)
c4b2df87 2673 func_code = self.cache.load('youtube-nsig', player_id, after='2022.08.19.1')
580ce007 2674 jscode = func_code or self._load_player(video_id, player_url)
2675 jsi = JSInterpreter(jscode)
404f611f 2676
2677 if func_code:
580ce007 2678 return jsi, player_id, func_code
404f611f 2679
90a1df30 2680 func_code = jsi.extract_function_code(self._extract_n_function_name(jscode))
580ce007 2681 self.cache.store('youtube-nsig', player_id, func_code)
2682 return jsi, player_id, func_code
2683
2684 def _extract_n_function_from_code(self, jsi, func_code):
8f53dc44 2685 func = jsi.extract_function_from_code(*func_code)
f6ca640b 2686
580ce007 2687 def extract_nsig(s):
25836db6 2688 try:
2689 ret = func([s])
2690 except JSInterpreter.Exception:
2691 raise
2692 except Exception as e:
2693 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
2694
f6ca640b 2695 if ret.startswith('enhanced_except_'):
25836db6 2696 raise JSInterpreter.Exception('Signature function returned an exception')
f6ca640b 2697 return ret
580ce007 2698
2699 return extract_nsig
e0df6211 2700
109dd3b2 2701 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2702 """
2703 Extract signatureTimestamp (sts)
2704 Required to tell API what sig/player version is in use.
2705 """
2706 sts = None
2707 if isinstance(ytcfg, dict):
2708 sts = int_or_none(ytcfg.get('STS'))
2709
2710 if not sts:
2711 # Attempt to extract from player
2712 if player_url is None:
2713 error_msg = 'Cannot extract signature timestamp without player_url.'
2714 if fatal:
2715 raise ExtractorError(error_msg)
2716 self.report_warning(error_msg)
2717 return
404f611f 2718 code = self._load_player(video_id, player_url, fatal=fatal)
2719 if code:
109dd3b2 2720 sts = int_or_none(self._search_regex(
2721 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2722 'JS player signature timestamp', group='sts', fatal=fatal))
2723 return sts
2724
11f9be09 2725 def _mark_watched(self, video_id, player_responses):
06cc8f10
B
2726 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
2727 label = 'fully ' if is_full else ''
2728 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
2729 expected_type=url_or_none)
2730 if not url:
2731 self.report_warning(f'Unable to mark {label}watched')
2732 return
14f25df2 2733 parsed_url = urllib.parse.urlparse(url)
2734 qs = urllib.parse.parse_qs(parsed_url.query)
06cc8f10
B
2735
2736 # cpn generation algorithm is reverse engineered from base.js.
2737 # In fact it works even with dummy cpn.
2738 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2739 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
2740
2741 # # more consistent results setting it to right before the end
2742 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
2743
2744 qs.update({
2745 'ver': ['2'],
2746 'cpn': [cpn],
2747 'cmt': video_length,
2748 'el': 'detailpage', # otherwise defaults to "shorts"
2749 })
2750
2751 if is_full:
2752 # these seem to mark watchtime "history" in the real world
2753 # they're required, so send in a single value
2754 qs.update({
2755 'st': video_length,
2756 'et': video_length,
2757 })
2758
14f25df2 2759 url = urllib.parse.urlunparse(
2760 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
06cc8f10
B
2761
2762 self._download_webpage(
2763 url, video_id, f'Marking {label}watched',
2764 'Unable to mark watched', fatal=False)
d77ab8e2 2765
bfd973ec 2766 @classmethod
2767 def _extract_from_webpage(cls, url, webpage):
2768 # Invidious Instances
2769 # https://github.com/yt-dlp/yt-dlp/issues/195
2770 # https://github.com/iv-org/invidious/pull/1730
2771 mobj = re.search(
2772 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
2773 webpage)
2774 if mobj:
2775 yield cls.url_result(mobj.group('url'), cls)
2776 raise cls.StopExtraction()
2777
2778 yield from super()._extract_from_webpage(url, webpage)
66c9fa36
S
2779
2780 # lazyYT YouTube embed
bfd973ec 2781 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
2782 yield cls.url_result(unescapeHTML(id_), cls, id_)
66c9fa36
S
2783
2784 # Wordpress "YouTube Video Importer" plugin
bfd973ec 2785 for m in re.findall(r'''(?x)<div[^>]+
2786 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2787 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
2788 yield cls.url_result(m[-1], cls, m[-1])
66c9fa36 2789
97665381
PH
2790 @classmethod
2791 def extract_id(cls, url):
ae61d108 2792 video_id = cls.get_temp_id(url)
2793 if not video_id:
2794 raise ExtractorError(f'Invalid URL: {url}')
2795 return video_id
c5e8d7af 2796
7c365c21 2797 def _extract_chapters_from_json(self, data, duration):
2798 chapter_list = traverse_obj(
2799 data, (
2800 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2801 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2802 ), expected_type=list)
2803
2804 return self._extract_chapters(
2805 chapter_list,
2806 chapter_time=lambda chapter: float_or_none(
2807 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2808 chapter_title=lambda chapter: traverse_obj(
2809 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2810 duration=duration)
2811
2812 def _extract_chapters_from_engagement_panel(self, data, duration):
2813 content_list = traverse_obj(
8bdd16b4 2814 data,
7c365c21 2815 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2816 expected_type=list, default=[])
052e1350 2817 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2818 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 2819
1890fc63 2820 return next(filter(None, (
2821 self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2822 chapter_time, chapter_title, duration)
2823 for contents in content_list)), [])
7c365c21 2824
1890fc63 2825 def _extract_chapters_from_description(self, description, duration):
2826 return self._extract_chapters(
2827 re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),
2828 chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
2829 duration=duration, strict=False)
84213ea8 2830
1890fc63 2831 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
2832 if not duration:
2833 return
2834 chapter_list = [{
2835 'start_time': chapter_time(chapter),
2836 'title': chapter_title(chapter),
2837 } for chapter in chapter_list or []]
2838 if not strict:
2839 chapter_list.sort(key=lambda c: c['start_time'] or 0)
2840
a3976e07 2841 chapters = [{'start_time': 0}]
1890fc63 2842 for idx, chapter in enumerate(chapter_list):
a3976e07 2843 if chapter['start_time'] is None:
1890fc63 2844 self.report_warning(f'Incomplete chapter {idx}')
2845 elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
1890fc63 2846 chapters.append(chapter)
2847 else:
2848 self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')
a3976e07 2849 return chapters[1:]
84213ea8 2850
a1c5d2ca
M
2851 def _extract_comment(self, comment_renderer, parent=None):
2852 comment_id = comment_renderer.get('commentId')
2853 if not comment_id:
2854 return
fe93e2c4 2855
052e1350 2856 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 2857
49bd8c66 2858 # note: timestamp is an estimate calculated from the current time and time_text
f3aa3c3f 2859 timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
052e1350 2860 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca 2861 author_id = try_get(comment_renderer,
14f25df2 2862 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
fe93e2c4 2863
49bd8c66 2864 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
14f25df2 2865 lambda x: x['likeCount']), str)) or 0
a1c5d2ca 2866 author_thumbnail = try_get(comment_renderer,
14f25df2 2867 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
a1c5d2ca
M
2868
2869 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2870 is_favorited = 'creatorHeart' in (try_get(
2871 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2872 return {
2873 'id': comment_id,
2874 'text': text,
d92f5d5a 2875 'timestamp': timestamp,
a1c5d2ca
M
2876 'time_text': time_text,
2877 'like_count': votes,
97524332 2878 'is_favorited': is_favorited,
a1c5d2ca
M
2879 'author': author,
2880 'author_id': author_id,
2881 'author_thumbnail': author_thumbnail,
2882 'author_is_uploader': author_is_uploader,
2883 'parent': parent or 'root'
2884 }
2885
46383212 2886 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2887
2888 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2d6659b9 2889
2890 def extract_header(contents):
2d6659b9 2891 _continuation = None
2892 for content in contents:
46383212 2893 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
f0d785d3 2894 expected_comment_count = self._get_count(
2895 comments_header_renderer, 'countText', 'commentsCount')
fe93e2c4 2896
2d6659b9 2897 if expected_comment_count:
46383212 2898 tracker['est_total'] = expected_comment_count
2899 self.to_screen(f'Downloading ~{expected_comment_count} comments')
2900 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2d6659b9 2901
2902 sort_menu_item = try_get(
2903 comments_header_renderer,
2904 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2905 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2906
2907 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2908 if not _continuation:
2909 continue
2910
46383212 2911 sort_text = str_or_none(sort_menu_item.get('title'))
2912 if not sort_text:
2d6659b9 2913 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
46383212 2914 self.to_screen('Sorting comments by %s' % sort_text.lower())
2d6659b9 2915 break
a2160aa4 2916 return _continuation
a1c5d2ca 2917
2d6659b9 2918 def extract_thread(contents):
a1c5d2ca 2919 if not parent:
46383212 2920 tracker['current_page_thread'] = 0
a1c5d2ca 2921 for content in contents:
46383212 2922 if not parent and tracker['total_parent_comments'] >= max_parents:
2923 yield
a1c5d2ca 2924 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
46383212 2925 comment_renderer = get_first(
2926 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2927 expected_type=dict, default={})
a1c5d2ca 2928
a1c5d2ca
M
2929 comment = self._extract_comment(comment_renderer, parent)
2930 if not comment:
2931 continue
46383212 2932
2933 tracker['running_total'] += 1
2934 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
a1c5d2ca 2935 yield comment
46383212 2936
a1c5d2ca
M
2937 # Attempt to get the replies
2938 comment_replies_renderer = try_get(
2939 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2940
2941 if comment_replies_renderer:
46383212 2942 tracker['current_page_thread'] += 1
a1c5d2ca 2943 comment_entries_iter = self._comment_entries(
99e9e001 2944 comment_replies_renderer, ytcfg, video_id,
46383212 2945 parent=comment.get('id'), tracker=tracker)
86e5f3ed 2946 yield from itertools.islice(comment_entries_iter, min(
2947 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
a1c5d2ca 2948
46383212 2949 # Keeps track of counts across recursive calls
2950 if not tracker:
2951 tracker = dict(
2952 running_total=0,
2953 est_total=0,
2954 current_page_thread=0,
2955 total_parent_comments=0,
2956 total_reply_comments=0)
2957
2958 # TODO: Deprecated
2d6659b9 2959 # YouTube comments have a max depth of 2
46383212 2960 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
2961 if max_depth:
2962 self._downloader.deprecation_warning(
2963 '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
2d6659b9 2964 if max_depth == 1 and parent:
2965 return
a1c5d2ca 2966
46383212 2967 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
2968 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2d6659b9 2969
46383212 2970 continuation = self._extract_continuation(root_continuation_data)
aae16f6e 2971
46383212 2972 response = None
6e634cbe 2973 is_forced_continuation = False
2d6659b9 2974 is_first_continuation = parent is None
6e634cbe 2975 if is_first_continuation and not continuation:
2976 # Sometimes you can get comments by generating the continuation yourself,
2977 # even if YouTube initially reports them being disabled - e.g. stories comments.
2978 # Note: if the comment section is actually disabled, YouTube may return a response with
2979 # required check_get_keys missing. So we will disable that check initially in this case.
2980 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
2981 is_forced_continuation = True
a1c5d2ca
M
2982
2983 for page_num in itertools.count(0):
2984 if not continuation:
2985 break
46383212 2986 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
2987 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2d6659b9 2988 if page_num == 0:
2989 if is_first_continuation:
2990 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2991 else:
2d6659b9 2992 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
46383212 2993 tracker['current_page_thread'], comment_prog_str)
2d6659b9 2994 else:
2995 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2996 ' ' if parent else '', ' replies' if parent else '',
2997 page_num, comment_prog_str)
2998
2999 response = self._extract_response(
fe93e2c4 3000 item_id=None, query=continuation,
2d6659b9 3001 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
6e634cbe 3002 check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
3003 is_forced_continuation = False
46383212 3004 continuation_contents = traverse_obj(
3005 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
a1c5d2ca 3006
2d6659b9 3007 continuation = None
46383212 3008 for continuation_section in continuation_contents:
3009 continuation_items = traverse_obj(
3010 continuation_section,
3011 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
3012 get_all=False, expected_type=list) or []
3013 if is_first_continuation:
3014 continuation = extract_header(continuation_items)
3015 is_first_continuation = False
2d6659b9 3016 if continuation:
a1c5d2ca 3017 break
46383212 3018 continue
a1c5d2ca 3019
46383212 3020 for entry in extract_thread(continuation_items):
3021 if not entry:
3022 return
3023 yield entry
3024 continuation = self._extract_continuation({'contents': continuation_items})
3025 if continuation:
2d6659b9 3026 break
a1c5d2ca 3027
6e634cbe 3028 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3029 if message and not parent and tracker['running_total'] == 0:
3030 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
3031
3032 @staticmethod
3033 def _generate_comment_continuation(video_id):
3034 """
3035 Generates initial comment section continuation token from given video id
3036 """
3037 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3038 return base64.b64encode(token.encode()).decode()
3039
a2160aa4 3040 def _get_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 3041 """Entry for comment extraction"""
2d6659b9 3042 def _real_comment_extract(contents):
aae16f6e 3043 renderer = next((
3044 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3045 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3046 yield from self._comment_entries(renderer, ytcfg, video_id)
99e9e001 3047
a2160aa4 3048 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
a2160aa4 3049 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
a1c5d2ca 3050
109dd3b2 3051 @staticmethod
99e9e001 3052 def _get_checkok_params():
3053 return {'contentCheckOk': True, 'racyCheckOk': True}
3054
3055 @classmethod
3056 def _generate_player_context(cls, sts=None):
109dd3b2 3057 context = {
3058 'html5Preference': 'HTML5_PREF_WANTS',
3059 }
3060 if sts is not None:
3061 context['signatureTimestamp'] = sts
3062 return {
3063 'playbackContext': {
3064 'contentPlaybackContext': context
a1a7907b 3065 },
99e9e001 3066 **cls._get_checkok_params()
109dd3b2 3067 }
3068
e7e94f2a
D
3069 @staticmethod
3070 def _is_agegated(player_response):
3071 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 3072 return True
e7e94f2a
D
3073
3074 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
3075 AGE_GATE_REASONS = (
3076 'confirm your age', 'age-restricted', 'inappropriate', # reason
3077 'age_verification_required', 'age_check_required', # status
3078 )
3079 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3080
3081 @staticmethod
3082 def _is_unplayable(player_response):
3083 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 3084
50ac0e54 3085 _STORY_PLAYER_PARAMS = '8AEB'
3086
3087 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
109dd3b2 3088
11f9be09 3089 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3090 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 3091 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 3092 headers = self.generate_api_headers(
99e9e001 3093 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 3094
6e634cbe 3095 yt_query = {
3096 'videoId': video_id,
6e634cbe 3097 }
50ac0e54 3098 if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
3099 yt_query['params'] = self._STORY_PLAYER_PARAMS
3100
11f9be09 3101 yt_query.update(self._generate_player_context(sts))
3102 return self._extract_response(
3103 item_id=video_id, ep='player', query=yt_query,
379e44ed 3104 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 3105 default_client=client,
11f9be09 3106 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3107 ) or None
3108
11f9be09 3109 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 3110 requested_clients = []
d0d012d4 3111 default = ['android', 'web']
000c15a4 3112 allowed_clients = sorted(
86e5f3ed 3113 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
000c15a4 3114 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 3115 for client in self._configuration_arg('player_client'):
3116 if client in allowed_clients:
3117 requested_clients.append(client)
d0d012d4 3118 elif client == 'default':
3119 requested_clients.extend(default)
b4c055ba 3120 elif client == 'all':
3121 requested_clients.extend(allowed_clients)
3122 else:
3123 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 3124 if not requested_clients:
d0d012d4 3125 requested_clients = default
cf7e015f 3126
11f9be09 3127 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3128 requested_clients.extend(
e7e94f2a 3129 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 3130
11f9be09 3131 return orderedSet(requested_clients)
cf7e015f 3132
50ac0e54 3133 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
11f9be09 3134 initial_pr = None
3135 if webpage:
b7c47b74 3136 initial_pr = self._search_json(
3137 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
6b09401b 3138
ae729626 3139 all_clients = set(clients)
c0bc527b 3140 clients = clients[::-1]
b6de707d 3141 prs = []
e7e94f2a 3142
ae729626 3143 def append_client(*client_names):
e7870111 3144 """ Append the first client name that exists but not already used """
ae729626 3145 for client_name in client_names:
e7870111
D
3146 actual_client = _split_innertube_client(client_name)[0]
3147 if actual_client in INNERTUBE_CLIENTS:
3148 if actual_client not in all_clients:
ae729626 3149 clients.append(client_name)
e7870111
D
3150 all_clients.add(actual_client)
3151 return
e7e94f2a 3152
379e44ed 3153 # Android player_response does not have microFormats which are needed for
3154 # extraction of some data. So we return the initial_pr with formats
3155 # stripped out even if not requested by the user
3156 # See: https://github.com/yt-dlp/yt-dlp/issues/501
379e44ed 3157 if initial_pr:
3158 pr = dict(initial_pr)
3159 pr['streamingData'] = None
b6de707d 3160 prs.append(pr)
379e44ed 3161
3162 last_error = None
b6de707d 3163 tried_iframe_fallback = False
3164 player_url = None
c0bc527b 3165 while clients:
e7870111 3166 client, base_client, variant = _split_innertube_client(clients.pop())
11f9be09 3167 player_ytcfg = master_ytcfg if client == 'web' else {}
a25bca9f 3168 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3169 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
c0bc527b 3170
b6de707d 3171 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3172 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3173 if 'js' in self._configuration_arg('player_skip'):
3174 require_js_player = False
3175 player_url = None
3176
3177 if not player_url and not tried_iframe_fallback and require_js_player:
3178 player_url = self._download_player_url(video_id)
3179 tried_iframe_fallback = True
3180
379e44ed 3181 try:
3182 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
50ac0e54 3183 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
379e44ed 3184 except ExtractorError as e:
3185 if last_error:
3186 self.report_warning(last_error)
3187 last_error = e
3188 continue
3189
11f9be09 3190 if pr:
a3e96421 3191 # YouTube may return a different video player response than expected.
3192 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3193 pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
3194 if pr_video_id and pr_video_id != video_id:
3195 self.report_warning(
c7dcf0b3 3196 f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
a3e96421 3197 else:
3198 prs.append(pr)
c0bc527b 3199
e7e94f2a 3200 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
e7870111
D
3201 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3202 append_client(f'{base_client}_creator')
e7e94f2a 3203 elif self._is_agegated(pr):
e7870111
D
3204 if variant == 'tv_embedded':
3205 append_client(f'{base_client}_embedded')
3206 elif not variant:
3207 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
c0bc527b 3208
379e44ed 3209 if last_error:
b6de707d 3210 if not len(prs):
379e44ed 3211 raise last_error
3212 self.report_warning(last_error)
b6de707d 3213 return prs, player_url
11f9be09 3214
c646d76f 3215 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, is_live, duration):
a0bb6ce5 3216 itags, stream_ids = {}, []
b25cac65 3217 itag_qualities, res_qualities = {}, {0: None}
d3fc8074 3218 q = qualities([
2a9c6dcd 3219 # Normally tiny is the smallest video-only formats. But
3220 # audio-only formats with unknown quality may get tagged as tiny
3221 'tiny',
3222 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 3223 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3224 ])
11f9be09 3225 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 3226
545cc85d 3227 for fmt in streaming_formats:
727029c5 3228 if fmt.get('targetDurationSec'):
545cc85d 3229 continue
321bf820 3230
cc2db878 3231 itag = str_or_none(fmt.get('itag'))
9297939e 3232 audio_track = fmt.get('audioTrack') or {}
3233 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
3234 if stream_id in stream_ids:
3235 continue
3236
cc2db878 3237 quality = fmt.get('quality')
2a9c6dcd 3238 height = int_or_none(fmt.get('height'))
d3fc8074 3239 if quality == 'tiny' or not quality:
3240 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 3241 # The 3gp format (17) in android client has a quality of "small",
3242 # but is actually worse than other formats
3243 if itag == '17':
3244 quality = 'tiny'
3245 if quality:
3246 if itag:
3247 itag_qualities[itag] = quality
3248 if height:
3249 res_qualities[height] = quality
cc2db878 3250 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3251 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3252 # number of fragment that would subsequently requested with (`&sq=N`)
3253 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3254 continue
3255
545cc85d 3256 fmt_url = fmt.get('url')
3257 if not fmt_url:
14f25df2 3258 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
545cc85d 3259 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3260 encrypted_sig = try_get(sc, lambda x: x['s'][0])
52023f12 3261 if not all((sc, fmt_url, player_url, encrypted_sig)):
545cc85d 3262 continue
52023f12 3263 try:
3264 fmt_url += '&%s=%s' % (
3265 traverse_obj(sc, ('sp', -1)) or 'signature',
3266 self._decrypt_signature(encrypted_sig, video_id, player_url)
3267 )
3268 except ExtractorError as e:
580ce007 3269 self.report_warning('Signature extraction failed: Some formats may be missing',
3270 video_id=video_id, only_once=True)
52023f12 3271 self.write_debug(e, only_once=True)
201e9eaa 3272 continue
545cc85d 3273
404f611f 3274 query = parse_qs(fmt_url)
3275 throttled = False
b2916526 3276 if query.get('n'):
404f611f 3277 try:
580ce007 3278 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
404f611f 3279 fmt_url = update_url_query(fmt_url, {
580ce007 3280 'n': decrypt_nsig(query['n'][0], video_id, player_url)
3281 })
404f611f 3282 except ExtractorError as e:
25836db6 3283 phantomjs_hint = ''
3284 if isinstance(e, JSInterpreter.Exception):
3285 phantomjs_hint = f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} to workaround the issue\n'
aa9369a2 3286 self.report_warning(
25836db6 3287 f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
3288 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
52023f12 3289 self.write_debug(e, only_once=True)
404f611f 3290 throttled = True
3291
545cc85d 3292 if itag:
a0bb6ce5 3293 itags[itag] = 'https'
9297939e 3294 stream_ids.append(stream_id)
3295
0ad92dfb 3296 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
ab6df717 3297 language_preference = (
3298 10 if audio_track.get('audioIsDefault') and 10
3299 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3300 else -1)
0ad92dfb 3301 # Some formats may have much smaller duration than others (possibly damaged during encoding)
62b58c09 3302 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
a1b2d843 3303 # Make sure to avoid false positives with small duration differences.
62b58c09 3304 # E.g. __2ABJjxzNo, ySuUZEjARPY
a1b2d843 3305 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
08d30158 3306 if is_damaged:
0f06bcd7 3307 self.report_warning(
3308 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
545cc85d 3309 dct = {
3310 'asr': int_or_none(fmt.get('audioSampleRate')),
3311 'filesize': int_or_none(fmt.get('contentLength')),
3312 'format_id': itag,
34921b43 3313 'format_note': join_nonempty(
26e8e044 3314 '%s%s' % (audio_track.get('displayName') or '',
ab6df717 3315 ' (default)' if language_preference > 0 else ''),
404f611f 3316 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
a4166234 3317 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3318 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
0ad92dfb 3319 throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
91e5e839 3320 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3321 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
a4211baf 3322 'fps': int_or_none(fmt.get('fps')) or None,
a4166234 3323 'audio_channels': fmt.get('audioChannels'),
2a9c6dcd 3324 'height': height,
dca3ff4a 3325 'quality': q(quality),
727029c5 3326 'has_drm': bool(fmt.get('drmFamilies')),
cc2db878 3327 'tbr': tbr,
545cc85d 3328 'url': fmt_url,
2a9c6dcd 3329 'width': int_or_none(fmt.get('width')),
ab6df717 3330 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
3331 'desc' if language_preference < -1 else ''),
3332 'language_preference': language_preference,
a405b38f 3333 # Strictly de-prioritize damaged and 3gp formats
3334 'preference': -10 if is_damaged else -2 if itag == '17' else None,
545cc85d 3335 }
60bdb7bd 3336 mime_mobj = re.match(
3337 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3338 if mime_mobj:
3339 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3340 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 3341 no_audio = dct.get('acodec') == 'none'
3342 no_video = dct.get('vcodec') == 'none'
3343 if no_audio:
3344 dct['vbr'] = tbr
3345 if no_video:
3346 dct['abr'] = tbr
3347 if no_audio or no_video:
545cc85d 3348 dct['downloader_options'] = {
3349 # Youtube throttles chunks >~10M
3350 'http_chunk_size': 10485760,
bf1317d2 3351 }
7c60c33e 3352 if dct.get('ext'):
3353 dct['container'] = dct['ext'] + '_dash'
11f9be09 3354 yield dct
545cc85d 3355
adbc4ec4 3356 live_from_start = is_live and self.get_param('live_from_start')
4bb6b02f 3357 skip_manifests = self._configuration_arg('skip')
adbc4ec4
THD
3358 if not self.get_param('youtube_include_hls_manifest', True):
3359 skip_manifests.append('hls')
0f06bcd7 3360 if not self.get_param('youtube_include_dash_manifest', True):
3361 skip_manifests.append('dash')
adbc4ec4
THD
3362 get_dash = 'dash' not in skip_manifests and (
3363 not is_live or live_from_start or self._configuration_arg('include_live_dash'))
3364 get_hls = not live_from_start and 'hls' not in skip_manifests
5d3a0e79 3365
a0bb6ce5 3366 def process_manifest_format(f, proto, itag):
3367 if itag in itags:
3368 if itags[itag] == proto or f'{itag}-{proto}' in itags:
3369 return False
3370 itag = f'{itag}-{proto}'
3371 if itag:
3372 f['format_id'] = itag
3373 itags[itag] = proto
3374
b25cac65 3375 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
5c6d2ef9 3376 if f['quality'] == -1 and f.get('height'):
3377 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
a0bb6ce5 3378 return True
2a9c6dcd 3379
c646d76f 3380 subtitles = {}
11f9be09 3381 for sd in streaming_data:
5d3a0e79 3382 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 3383 if hls_manifest_url:
c646d76f 3384 fmts, subs = self._extract_m3u8_formats_and_subtitles(hls_manifest_url, video_id, 'mp4', fatal=False, live=is_live)
3385 subtitles = self._merge_subtitles(subs, subtitles)
3386 for f in fmts:
a0bb6ce5 3387 if process_manifest_format(f, 'hls', self._search_regex(
3388 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3389 yield f
545cc85d 3390
5d3a0e79 3391 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
3392 if dash_manifest_url:
c646d76f 3393 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
3394 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
3395 for f in formats:
a0bb6ce5 3396 if process_manifest_format(f, 'dash', f['format_id']):
3397 f['filesize'] = int_or_none(self._search_regex(
3398 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
adbc4ec4
THD
3399 if live_from_start:
3400 f['is_from_start'] = True
3401
a0bb6ce5 3402 yield f
c646d76f 3403 yield subtitles
11f9be09 3404
720c3099 3405 def _extract_storyboard(self, player_responses, duration):
3406 spec = get_first(
3407 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
596379e2 3408 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3409 if not base_url:
720c3099 3410 return
720c3099 3411 L = len(spec) - 1
3412 for i, args in enumerate(spec):
3413 args = args.split('#')
3414 counts = list(map(int_or_none, args[:5]))
3415 if len(args) != 8 or not all(counts):
3416 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3417 continue
3418 width, height, frame_count, cols, rows = counts
3419 N, sigh = args[6:]
3420
3421 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3422 fragment_count = frame_count / (cols * rows)
3423 fragment_duration = duration / fragment_count
3424 yield {
3425 'format_id': f'sb{i}',
3426 'format_note': 'storyboard',
3427 'ext': 'mhtml',
3428 'protocol': 'mhtml',
3429 'acodec': 'none',
3430 'vcodec': 'none',
3431 'url': url,
3432 'width': width,
3433 'height': height,
45e8a04e 3434 'fps': frame_count / duration,
3435 'rows': rows,
3436 'columns': cols,
720c3099 3437 'fragments': [{
b3edc806 3438 'url': url.replace('$M', str(j)),
720c3099 3439 'duration': min(fragment_duration, duration - (j * fragment_duration)),
3440 } for j in range(math.ceil(fragment_count))],
3441 }
3442
adbc4ec4 3443 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
b6de707d 3444 webpage = None
3445 if 'webpage' not in self._configuration_arg('player_skip'):
50ac0e54 3446 query = {'bpctr': '9999999999', 'has_verified': '1'}
3447 if smuggled_data.get('is_story'):
3448 query['pp'] = self._STORY_PLAYER_PARAMS
b6de707d 3449 webpage = self._download_webpage(
50ac0e54 3450 webpage_url, video_id, fatal=False, query=query)
11f9be09 3451
3452 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 3453
b6de707d 3454 player_responses, player_url = self._extract_player_responses(
11f9be09 3455 self._get_requested_clients(url, smuggled_data),
50ac0e54 3456 video_id, webpage, master_ytcfg, smuggled_data)
11f9be09 3457
adbc4ec4
THD
3458 return webpage, master_ytcfg, player_responses, player_url
3459
a1b2d843 3460 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
adbc4ec4
THD
3461 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3462 is_live = get_first(video_details, 'isLive')
3463 if is_live is None:
3464 is_live = get_first(live_broadcast_details, 'isLiveNow')
3465
3466 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
c646d76f 3467 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, is_live, duration)
adbc4ec4 3468
c646d76f 3469 return live_broadcast_details, is_live, streaming_data, formats, subtitles
adbc4ec4
THD
3470
3471 def _real_extract(self, url):
3472 url, smuggled_data = unsmuggle_url(url, {})
3473 video_id = self._match_id(url)
3474
3475 base_url = self.http_scheme() + '//www.youtube.com/'
3476 webpage_url = base_url + 'watch?v=' + video_id
3477
3478 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3479
11f9be09 3480 playability_statuses = traverse_obj(
3481 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
3482
3483 trailer_video_id = get_first(
3484 playability_statuses,
3485 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
3486 expected_type=str)
3487 if trailer_video_id:
3488 return self.url_result(
3489 trailer_video_id, self.ie_key(), trailer_video_id)
3490
3491 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
3492 if webpage else (lambda x: None))
3493
3494 video_details = traverse_obj(
3495 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
3496 microformats = traverse_obj(
3497 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
3498 expected_type=dict, default=[])
3499 video_title = (
3500 get_first(video_details, 'title')
3501 or self._get_text(microformats, (..., 'title'))
3502 or search_meta(['og:title', 'twitter:title', 'title']))
3503 video_description = get_first(video_details, 'shortDescription')
3504
d89257f3 3505 multifeed_metadata_list = get_first(
3506 player_responses,
3507 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
3508 expected_type=str)
3509 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
3510 if self.get_param('noplaylist'):
11f9be09 3511 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
d89257f3 3512 else:
3513 entries = []
3514 feed_ids = []
3515 for feed in multifeed_metadata_list.split(','):
3516 # Unquote should take place before split on comma (,) since textual
3517 # fields may contain comma as well (see
3518 # https://github.com/ytdl-org/youtube-dl/issues/8536)
14f25df2 3519 feed_data = urllib.parse.parse_qs(
ac668111 3520 urllib.parse.unquote_plus(feed))
d89257f3 3521
3522 def feed_entry(name):
3523 return try_get(
14f25df2 3524 feed_data, lambda x: x[name][0], str)
d89257f3 3525
3526 feed_id = feed_entry('id')
3527 if not feed_id:
3528 continue
3529 feed_title = feed_entry('title')
3530 title = video_title
3531 if feed_title:
3532 title += ' (%s)' % feed_title
3533 entries.append({
3534 '_type': 'url_transparent',
3535 'ie_key': 'Youtube',
3536 'url': smuggle_url(
3537 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
3538 {'force_singlefeed': True}),
3539 'title': title,
3540 })
3541 feed_ids.append(feed_id)
3542 self.to_screen(
3543 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
3544 % (', '.join(feed_ids), video_id))
3545 return self.playlist_result(
3546 entries, video_id, video_title, video_description)
11f9be09 3547
a1b2d843 3548 duration = int_or_none(
3549 get_first(video_details, 'lengthSeconds')
3550 or get_first(microformats, 'lengthSeconds')
3551 or parse_duration(search_meta('duration'))) or None
3552
c646d76f 3553 live_broadcast_details, is_live, streaming_data, formats, automatic_captions = \
3554 self._list_formats(video_id, microformats, video_details, player_responses, player_url)
bf1317d2 3555
545cc85d 3556 if not formats:
11f9be09 3557 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 3558 self.report_drm(video_id)
11f9be09 3559 pemr = get_first(
3560 playability_statuses,
3561 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
3562 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
3563 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 3564 if subreason:
545cc85d 3565 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 3566 countries = get_first(microformats, 'availableCountries')
545cc85d 3567 if not countries:
3568 regions_allowed = search_meta('regionsAllowed')
3569 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 3570 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 3571 reason += f'. {subreason}'
545cc85d 3572 if reason:
b7da73eb 3573 self.raise_no_formats(reason, expected=True)
bf1317d2 3574
11f9be09 3575 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 3576 if not keywords and webpage:
3577 keywords = [
3578 unescapeHTML(m.group('content'))
3579 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
3580 for keyword in keywords:
3581 if keyword.startswith('yt:stretch='):
201c1459 3582 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
3583 if mobj:
3584 # NB: float is intentional for forcing float division
3585 w, h = (float(v) for v in mobj.groups())
3586 if w > 0 and h > 0:
3587 ratio = w / h
3588 for f in formats:
3589 if f.get('vcodec') != 'none':
3590 f['stretched_ratio'] = ratio
3591 break
a709d873 3592 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
ff2751ac 3593 thumbnail_url = search_meta(['og:image', 'twitter:image'])
3594 if thumbnail_url:
3595 thumbnails.append({
3596 'url': thumbnail_url,
ff2751ac 3597 })
fccf5021 3598 original_thumbnails = thumbnails.copy()
3599
0ba692ac 3600 # The best resolution thumbnails sometimes does not appear in the webpage
bfec31be 3601 # See: https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 3602 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
e820fbaa 3603 thumbnail_names = [
962ffcf8 3604 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
bfec31be 3605 # in resolution, these are not the custom thumbnail. So de-prioritize them
3606 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
3607 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
cca80fe6 3608 ]
cca80fe6 3609 n_thumbnail_names = len(thumbnail_names)
0ba692ac 3610 thumbnails.extend({
3611 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
3612 video_id=video_id, name=name, ext=ext,
3613 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 3614 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 3615 for thumb in thumbnails:
cca80fe6 3616 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 3617 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 3618 self._remove_duplicate_formats(thumbnails)
fccf5021 3619 self._downloader._sort_thumbnails(original_thumbnails)
545cc85d 3620
7ea65411 3621 category = get_first(microformats, 'category') or search_meta('genre')
3622 channel_id = str_or_none(
3623 get_first(video_details, 'channelId')
3624 or get_first(microformats, 'externalChannelId')
3625 or search_meta('channelId'))
7ea65411 3626 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
3627
3628 live_content = get_first(video_details, 'isLiveContent')
3629 is_upcoming = get_first(video_details, 'isUpcoming')
3630 if is_live is None:
3631 if is_upcoming or live_content is False:
3632 is_live = False
3633 if is_upcoming is None and (live_content or is_live):
3634 is_upcoming = False
adbc4ec4
THD
3635 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
3636 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
3637 if not duration and live_end_time and live_start_time:
3638 duration = live_end_time - live_start_time
3639
3640 if is_live and self.get_param('live_from_start'):
3641 self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
7ea65411 3642
720c3099 3643 formats.extend(self._extract_storyboard(player_responses, duration))
3644
31b532a1 3645 # source_preference is lower for throttled/potentially damaged formats
7e798d72 3646 self._sort_formats(formats, (
3647 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))
720c3099 3648
545cc85d 3649 info = {
3650 'id': video_id,
39ca3b5c 3651 'title': video_title,
545cc85d 3652 'formats': formats,
3653 'thumbnails': thumbnails,
fccf5021 3654 # The best thumbnail that we are sure exists. Prevents unnecessary
3655 # URL checking if user don't care about getting the best possible thumbnail
3656 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
545cc85d 3657 'description': video_description,
11f9be09 3658 'uploader': get_first(video_details, 'author'),
545cc85d 3659 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3660 'uploader_url': owner_profile_url,
3661 'channel_id': channel_id,
a70635b8 3662 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),
545cc85d 3663 'duration': duration,
3664 'view_count': int_or_none(
11f9be09 3665 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 3666 or search_meta('interactionCount')),
11f9be09 3667 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 3668 'age_limit': 18 if (
11f9be09 3669 get_first(microformats, 'isFamilySafe') is False
545cc85d 3670 or search_meta('isFamilyFriendly') == 'false'
3671 or search_meta('og:restrictions:age') == '18+') else 0,
3672 'webpage_url': webpage_url,
3673 'categories': [category] if category else None,
3674 'tags': keywords,
11f9be09 3675 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 3676 'is_live': is_live,
3677 'was_live': (False if is_live or is_upcoming or live_content is False
3678 else None if is_live is None or is_upcoming is None
3679 else live_content),
3680 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
adbc4ec4 3681 'release_timestamp': live_start_time,
545cc85d 3682 }
b477fc13 3683
e325a21a 3684 if get_first(video_details, 'isPostLiveDvr'):
3685 self.write_debug('Video is in Post-Live Manifestless mode')
3686 info['live_status'] = 'post_live'
3687 if (duration or 0) > 4 * 3600:
3688 self.report_warning(
3689 'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '
3690 'This is a known issue and patches are welcome')
3691
c646d76f 3692 subtitles = {}
3944e7af 3693 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 3694 if pctr:
ecdc9049 3695 def get_lang_code(track):
3696 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3697 or track.get('languageCode'))
3698
3699 # Converted into dicts to remove duplicates
3700 captions = {
3701 get_lang_code(sub): sub
3702 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3703 translation_languages = {
3704 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3705 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3706
774d79cc 3707 def process_language(container, base_url, lang_code, sub_name, query):
120916da 3708 lang_subs = container.setdefault(lang_code, [])
545cc85d 3709 for fmt in self._SUBTITLE_FORMATS:
3710 query.update({
3711 'fmt': fmt,
3712 })
3713 lang_subs.append({
3714 'ext': fmt,
60f393e4 3715 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
774d79cc 3716 'name': sub_name,
545cc85d 3717 })
7e72694b 3718
07b47084 3719 # NB: Constructing the full subtitle dictionary is slow
3720 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
3721 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
ecdc9049 3722 for lang_code, caption_track in captions.items():
3723 base_url = caption_track.get('baseUrl')
1235d333 3724 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
545cc85d 3725 if not base_url:
3726 continue
ecdc9049 3727 lang_name = self._get_text(caption_track, 'name', max_runs=1)
545cc85d 3728 if caption_track.get('kind') != 'asr':
545cc85d 3729 if not lang_code:
3730 continue
3731 process_language(
ecdc9049 3732 subtitles, base_url, lang_code, lang_name, {})
3733 if not caption_track.get('isTranslatable'):
3734 continue
3944e7af 3735 for trans_code, trans_name in translation_languages.items():
3736 if not trans_code:
545cc85d 3737 continue
1235d333 3738 orig_trans_code = trans_code
ecdc9049 3739 if caption_track.get('kind') != 'asr':
07b47084 3740 if not get_translated_subs:
18e49408 3741 continue
ecdc9049 3742 trans_code += f'-{lang_code}'
a70635b8 3743 trans_name += format_field(lang_name, None, ' from %s')
d49669ac 3744 # Add an "-orig" label to the original language so that it can be distinguished.
3745 # The subs are returned without "-orig" as well for compatibility
1235d333 3746 if lang_code == f'a-{orig_trans_code}':
0c8d9e5f 3747 process_language(
d49669ac 3748 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
3749 # Setting tlang=lang returns damaged subtitles.
d49669ac 3750 process_language(automatic_captions, base_url, trans_code, trans_name,
1235d333 3751 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
c646d76f 3752
3753 info['automatic_captions'] = automatic_captions
3754 info['subtitles'] = subtitles
7e72694b 3755
14f25df2 3756 parsed_url = urllib.parse.urlparse(url)
545cc85d 3757 for component in [parsed_url.fragment, parsed_url.query]:
14f25df2 3758 query = urllib.parse.parse_qs(component)
545cc85d 3759 for k, v in query.items():
3760 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3761 d_k += '_time'
3762 if d_k not in info and k in s_ks:
3763 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
3764
3765 # Youtube Music Auto-generated description
822b9d9c 3766 if video_description:
1890fc63 3767 mobj = re.search(
3768 r'''(?xs)
3769 (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
3770 (?P<album>[^\n]+)
3771 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
3772 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
3773 (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
3774 .+\nAuto-generated\ by\ YouTube\.\s*$
3775 ''', video_description)
822b9d9c 3776 if mobj:
822b9d9c
RA
3777 release_year = mobj.group('release_year')
3778 release_date = mobj.group('release_date')
3779 if release_date:
3780 release_date = release_date.replace('-', '')
3781 if not release_year:
545cc85d 3782 release_year = release_date[:4]
3783 info.update({
3784 'album': mobj.group('album'.strip()),
3785 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3786 'track': mobj.group('track').strip(),
3787 'release_date': release_date,
cc2db878 3788 'release_year': int_or_none(release_year),
545cc85d 3789 })
7e72694b 3790
545cc85d 3791 initial_data = None
3792 if webpage:
56ba69e4 3793 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
545cc85d 3794 if not initial_data:
99e9e001 3795 query = {'videoId': video_id}
3796 query.update(self._get_checkok_params())
109dd3b2 3797 initial_data = self._extract_response(
3798 item_id=video_id, ep='next', fatal=False,
99e9e001 3799 ytcfg=master_ytcfg, query=query,
3800 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 3801 note='Downloading initial data API JSON')
545cc85d 3802
0df111a3 3803 info['comment_count'] = traverse_obj(initial_data, (
3804 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
3805 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'
3806 ), (
3807 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
3808 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'
3809 ), expected_type=int_or_none, get_all=False)
3810
19a03940 3811 try: # This will error if there is no livechat
c60ee3a2 3812 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
19a03940 3813 except (KeyError, IndexError, TypeError):
3814 pass
3815 else:
ecdc9049 3816 info.setdefault('subtitles', {})['live_chat'] = [{
4ce05f57 3817 # url is needed to set cookies
3818 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
c60ee3a2 3819 'video_id': video_id,
3820 'ext': 'json',
f6745c49 3821 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 3822 }]
545cc85d 3823
3824 if initial_data:
7c365c21 3825 info['chapters'] = (
3826 self._extract_chapters_from_json(initial_data, duration)
3827 or self._extract_chapters_from_engagement_panel(initial_data, duration)
0fe51254 3828 or self._extract_chapters_from_description(video_description, duration)
7c365c21 3829 or None)
545cc85d 3830
17322130 3831 contents = traverse_obj(
3832 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
3833 expected_type=list, default=[])
3834
3835 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
3836 if vpir:
3837 stl = vpir.get('superTitleLink')
3838 if stl:
3839 stl = self._get_text(stl)
3840 if try_get(
3841 vpir,
3842 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3843 info['location'] = stl
3844 else:
affc4fef 3845 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
17322130 3846 if mobj:
545cc85d 3847 info.update({
17322130 3848 'series': mobj.group(1),
3849 'season_number': int(mobj.group(2)),
3850 'episode_number': int(mobj.group(3)),
545cc85d 3851 })
17322130 3852 for tlb in (try_get(
3853 vpir,
3854 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3855 list) or []):
3856 tbr = tlb.get('toggleButtonRenderer') or {}
3857 for getter, regex in [(
3858 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3859 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3860 lambda x: x['accessibility'],
3861 lambda x: x['accessibilityData']['accessibilityData'],
3862 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3863 label = (try_get(tbr, getter, dict) or {}).get('label')
3864 if label:
3865 mobj = re.match(regex, label)
3866 if mobj:
3867 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
545cc85d 3868 break
17322130 3869 sbr_tooltip = try_get(
3870 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3871 if sbr_tooltip:
3872 like_count, dislike_count = sbr_tooltip.split(' / ')
3873 info.update({
3874 'like_count': str_to_int(like_count),
3875 'dislike_count': str_to_int(dislike_count),
3876 })
3877 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
3878 if vsir:
3879 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
3880 info.update({
3881 'channel': self._get_text(vor, 'title'),
3882 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
3883
3884 rows = try_get(
3885 vsir,
3886 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3887 list) or []
3888 multiple_songs = False
3889 for row in rows:
3890 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3891 multiple_songs = True
3892 break
3893 for row in rows:
3894 mrr = row.get('metadataRowRenderer') or {}
3895 mrr_title = mrr.get('title')
3896 if not mrr_title:
3897 continue
3898 mrr_title = self._get_text(mrr, 'title')
3899 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3900 if mrr_title == 'License':
3901 info['license'] = mrr_contents_text
3902 elif not multiple_songs:
3903 if mrr_title == 'Album':
3904 info['album'] = mrr_contents_text
3905 elif mrr_title == 'Artist':
3906 info['artist'] = mrr_contents_text
3907 elif mrr_title == 'Song':
3908 info['track'] = mrr_contents_text
545cc85d 3909
3910 fallbacks = {
3911 'channel': 'uploader',
3912 'channel_id': 'uploader_id',
3913 'channel_url': 'uploader_url',
3914 }
992f9a73 3915
17322130 3916 # The upload date for scheduled, live and past live streams / premieres in microformats
3917 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
992f9a73 3918 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
17322130 3919 upload_date = (
3920 unified_strdate(get_first(microformats, 'uploadDate'))
3921 or unified_strdate(search_meta('uploadDate')))
3922 if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):
6e634cbe 3923 upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date
17322130 3924 info['upload_date'] = upload_date
992f9a73 3925
545cc85d 3926 for to, frm in fallbacks.items():
3927 if not info.get(to):
3928 info[to] = info.get(frm)
3929
3930 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3931 v = info.get(s_k)
3932 if v:
3933 info[d_k] = v
b84071c0 3934
11f9be09 3935 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3936 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3937 is_membersonly = None
b28f8d24 3938 is_premium = None
c224251a
M
3939 if initial_data and is_private is not None:
3940 is_membersonly = False
b28f8d24 3941 is_premium = False
47193e02 3942 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3943 badge_labels = set()
3944 for content in contents:
3945 if not isinstance(content, dict):
3946 continue
3947 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3948 for badge_label in badge_labels:
3949 if badge_label.lower() == 'members only':
3950 is_membersonly = True
3951 elif badge_label.lower() == 'premium':
3952 is_premium = True
3953 elif badge_label.lower() == 'unlisted':
3954 is_unlisted = True
c224251a 3955
c224251a
M
3956 info['availability'] = self._availability(
3957 is_private=is_private,
b28f8d24 3958 needs_premium=is_premium,
c224251a
M
3959 needs_subscription=is_membersonly,
3960 needs_auth=info['age_limit'] >= 18,
3961 is_unlisted=None if is_private is None else is_unlisted)
3962
a2160aa4 3963 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3964
11f9be09 3965 self.mark_watched(video_id, player_responses)
d77ab8e2 3966
545cc85d 3967 return info
c5e8d7af 3968
a61fd4cf 3969
a6213a49 3970class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
5f6a1245 3971
182bda88 3972 @staticmethod
3973 def passthrough_smuggled_data(func):
3974 def _smuggle(entries, smuggled_data):
3975 for entry in entries:
3976 # TODO: Convert URL to music.youtube instead.
3977 # Do we need to passthrough any other smuggled_data?
3978 entry['url'] = smuggle_url(entry['url'], smuggled_data)
3979 yield entry
3980
3981 @functools.wraps(func)
3982 def wrapper(self, url):
3983 url, smuggled_data = unsmuggle_url(url, {})
3984 if self.is_music_url(url):
3985 smuggled_data['is_music_url'] = True
3986 info_dict = func(self, url, smuggled_data)
3987 if smuggled_data and info_dict.get('entries'):
3988 info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)
3989 return info_dict
3990 return wrapper
3991
a6213a49 3992 def _extract_channel_id(self, webpage):
3993 channel_id = self._html_search_meta(
3994 'channelId', webpage, 'channel id', default=None)
3995 if channel_id:
3996 return channel_id
3997 channel_url = self._html_search_meta(
3998 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3999 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
4000 'twitter:app:url:googleplay'), webpage, 'channel url')
4001 return self._search_regex(
4002 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
4003 channel_url, 'channel id')
15f6397c 4004
8bdd16b4 4005 @staticmethod
cd7c66cf 4006 def _extract_basic_item_renderer(item):
4007 # Modified from _extract_grid_item_renderer
201c1459 4008 known_basic_renderers = (
a17526e4 4009 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
cd7c66cf 4010 )
4011 for key, renderer in item.items():
201c1459 4012 if not isinstance(renderer, dict):
cd7c66cf 4013 continue
201c1459 4014 elif key in known_basic_renderers:
4015 return renderer
4016 elif key.startswith('grid') and key.endswith('Renderer'):
4017 return renderer
8bdd16b4 4018
8bdd16b4 4019 def _grid_entries(self, grid_renderer):
4020 for item in grid_renderer['items']:
4021 if not isinstance(item, dict):
39b62db1 4022 continue
cd7c66cf 4023 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 4024 if not isinstance(renderer, dict):
4025 continue
052e1350 4026 title = self._get_text(renderer, 'title')
fe93e2c4 4027
8bdd16b4 4028 # playlist
4029 playlist_id = renderer.get('playlistId')
4030 if playlist_id:
4031 yield self.url_result(
4032 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4033 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4034 video_title=title)
201c1459 4035 continue
8bdd16b4 4036 # video
4037 video_id = renderer.get('videoId')
4038 if video_id:
4039 yield self._extract_video(renderer)
201c1459 4040 continue
8bdd16b4 4041 # channel
4042 channel_id = renderer.get('channelId')
4043 if channel_id:
8bdd16b4 4044 yield self.url_result(
4045 'https://www.youtube.com/channel/%s' % channel_id,
4046 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 4047 continue
4048 # generic endpoint URL support
4049 ep_url = urljoin('https://www.youtube.com/', try_get(
4050 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4051 str))
201c1459 4052 if ep_url:
4053 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4054 if ie.suitable(ep_url):
4055 yield self.url_result(
4056 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4057 break
8bdd16b4 4058
16aa9ea4 4059 def _music_reponsive_list_entry(self, renderer):
4060 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4061 if video_id:
4062 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
4063 ie=YoutubeIE.ie_key(), video_id=video_id)
4064 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4065 if playlist_id:
4066 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4067 if video_id:
4068 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4069 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4070 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4071 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4072 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4073 if browse_id:
4074 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4075 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4076
3d3dddc9 4077 def _shelf_entries_from_content(self, shelf_renderer):
4078 content = shelf_renderer.get('content')
4079 if not isinstance(content, dict):
8bdd16b4 4080 return
cd7c66cf 4081 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 4082 if renderer:
4083 # TODO: add support for nested playlists so each shelf is processed
4084 # as separate playlist
4085 # TODO: this includes only first N items
86e5f3ed 4086 yield from self._grid_entries(renderer)
3d3dddc9 4087 renderer = content.get('horizontalListRenderer')
4088 if renderer:
4089 # TODO
4090 pass
8bdd16b4 4091
29f7c58a 4092 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 4093 ep = try_get(
4094 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4095 str)
8bdd16b4 4096 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 4097 if shelf_url:
29f7c58a 4098 # Skipping links to another channels, note that checking for
4099 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4100 # will not work
4101 if skip_channels and '/channels?' in shelf_url:
4102 return
052e1350 4103 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 4104 yield self.url_result(shelf_url, video_title=title)
4105 # Shelf may not contain shelf URL, fallback to extraction from content
86e5f3ed 4106 yield from self._shelf_entries_from_content(shelf_renderer)
c5e8d7af 4107
8bdd16b4 4108 def _playlist_entries(self, video_list_renderer):
4109 for content in video_list_renderer['contents']:
4110 if not isinstance(content, dict):
4111 continue
4112 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4113 if not isinstance(renderer, dict):
4114 continue
4115 video_id = renderer.get('videoId')
4116 if not video_id:
4117 continue
4118 yield self._extract_video(renderer)
07aeced6 4119
3462ffa8 4120 def _rich_entries(self, rich_grid_renderer):
4121 renderer = try_get(
70d5c17b 4122 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 4123 video_id = renderer.get('videoId')
4124 if not video_id:
4125 return
4126 yield self._extract_video(renderer)
4127
8bdd16b4 4128 def _video_entry(self, video_renderer):
4129 video_id = video_renderer.get('videoId')
4130 if video_id:
4131 return self._extract_video(video_renderer)
dacb3a86 4132
ad210f4f 4133 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4134 url = urljoin('https://youtube.com', traverse_obj(
4135 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4136 if url:
4137 return self.url_result(
4138 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4139
8bdd16b4 4140 def _post_thread_entries(self, post_thread_renderer):
4141 post_renderer = try_get(
4142 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4143 if not post_renderer:
4144 return
4145 # video attachment
4146 video_renderer = try_get(
895b0931 4147 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4148 video_id = video_renderer.get('videoId')
4149 if video_id:
4150 entry = self._extract_video(video_renderer)
8bdd16b4 4151 if entry:
4152 yield entry
895b0931 4153 # playlist attachment
4154 playlist_id = try_get(
14f25df2 4155 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
895b0931 4156 if playlist_id:
4157 yield self.url_result(
e28f1c0a 4158 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4159 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4160 # inline video links
4161 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4162 for run in runs:
4163 if not isinstance(run, dict):
4164 continue
4165 ep_url = try_get(
14f25df2 4166 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
8bdd16b4 4167 if not ep_url:
4168 continue
4169 if not YoutubeIE.suitable(ep_url):
4170 continue
4171 ep_video_id = YoutubeIE._match_id(ep_url)
4172 if video_id == ep_video_id:
4173 continue
895b0931 4174 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 4175
8bdd16b4 4176 def _post_thread_continuation_entries(self, post_thread_continuation):
4177 contents = post_thread_continuation.get('contents')
4178 if not isinstance(contents, list):
4179 return
4180 for content in contents:
4181 renderer = content.get('backstagePostThreadRenderer')
6b0b0a28 4182 if isinstance(renderer, dict):
4183 yield from self._post_thread_entries(renderer)
8bdd16b4 4184 continue
6b0b0a28 4185 renderer = content.get('videoRenderer')
4186 if isinstance(renderer, dict):
4187 yield self._video_entry(renderer)
07aeced6 4188
39ed931e 4189 r''' # unused
4190 def _rich_grid_entries(self, contents):
4191 for content in contents:
4192 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4193 if video_renderer:
4194 entry = self._video_entry(video_renderer)
4195 if entry:
4196 yield entry
4197 '''
52efa4b3 4198
a6213a49 4199 def _extract_entries(self, parent_renderer, continuation_list):
4200 # continuation_list is modified in-place with continuation_list = [continuation_token]
4201 continuation_list[:] = [None]
4202 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4203 for content in contents:
4204 if not isinstance(content, dict):
4205 continue
16aa9ea4 4206 is_renderer = traverse_obj(
4207 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4208 expected_type=dict)
a6213a49 4209 if not is_renderer:
4210 renderer = content.get('richItemRenderer')
4211 if renderer:
4212 for entry in self._rich_entries(renderer):
4213 yield entry
4214 continuation_list[0] = self._extract_continuation(parent_renderer)
4215 continue
4216 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4217 for isr_content in isr_contents:
4218 if not isinstance(isr_content, dict):
8bdd16b4 4219 continue
69184e41 4220
a6213a49 4221 known_renderers = {
4222 'playlistVideoListRenderer': self._playlist_entries,
4223 'gridRenderer': self._grid_entries,
a17526e4 4224 'reelShelfRenderer': self._grid_entries,
4225 'shelfRenderer': self._shelf_entries,
16aa9ea4 4226 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
a6213a49 4227 'backstagePostThreadRenderer': self._post_thread_entries,
4228 'videoRenderer': lambda x: [self._video_entry(x)],
a61fd4cf 4229 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4230 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
ad210f4f 4231 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
a6213a49 4232 }
4233 for key, renderer in isr_content.items():
4234 if key not in known_renderers:
4235 continue
4236 for entry in known_renderers[key](renderer):
4237 if entry:
4238 yield entry
4239 continuation_list[0] = self._extract_continuation(renderer)
4240 break
70d5c17b 4241
4242 if not continuation_list[0]:
a6213a49 4243 continuation_list[0] = self._extract_continuation(is_renderer)
3462ffa8 4244
a6213a49 4245 if not continuation_list[0]:
4246 continuation_list[0] = self._extract_continuation(parent_renderer)
4247
4248 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4249 continuation_list = [None]
4250 extract_entries = lambda x: self._extract_entries(x, continuation_list)
29f7c58a 4251 tab_content = try_get(tab, lambda x: x['content'], dict)
4252 if not tab_content:
4253 return
3462ffa8 4254 parent_renderer = (
29f7c58a 4255 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4256 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
86e5f3ed 4257 yield from extract_entries(parent_renderer)
3462ffa8 4258 continuation = continuation_list[0]
d069eca7 4259
8bdd16b4 4260 for page_num in itertools.count(1):
4261 if not continuation:
4262 break
99e9e001 4263 headers = self.generate_api_headers(
4264 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 4265 response = self._extract_response(
86e5f3ed 4266 item_id=f'{item_id} page {page_num}',
fe93e2c4 4267 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 4268 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
4269
4270 if not response:
8bdd16b4 4271 break
ac56cf38 4272 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4273 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4274 visitor_data = self._extract_visitor_data(response) or visitor_data
ebf1b291 4275
69184e41 4276 known_continuation_renderers = {
4277 'playlistVideoListContinuation': self._playlist_entries,
4278 'gridContinuation': self._grid_entries,
4279 'itemSectionContinuation': self._post_thread_continuation_entries,
4280 'sectionListContinuation': extract_entries, # for feeds
4281 }
8bdd16b4 4282 continuation_contents = try_get(
69184e41 4283 response, lambda x: x['continuationContents'], dict) or {}
4284 continuation_renderer = None
4285 for key, value in continuation_contents.items():
4286 if key not in known_continuation_renderers:
3462ffa8 4287 continue
69184e41 4288 continuation_renderer = value
4289 continuation_list = [None]
86e5f3ed 4290 yield from known_continuation_renderers[key](continuation_renderer)
69184e41 4291 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
4292 break
4293 if continuation_renderer:
4294 continue
c5e8d7af 4295
a1b535bd 4296 known_renderers = {
e4b98809 4297 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
a1b535bd 4298 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4299 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 4300 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 4301 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 4302 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 4303 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 4304 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 4305 }
cce889b9 4306 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 4307 continuation_items = try_get(
cce889b9 4308 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 4309 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
4310 video_items_renderer = None
4311 for key, value in continuation_item.items():
4312 if key not in known_renderers:
8bdd16b4 4313 continue
a1b535bd 4314 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 4315 continuation_list = [None]
86e5f3ed 4316 yield from known_renderers[key][0](video_items_renderer)
9ba5705a 4317 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 4318 break
4319 if video_items_renderer:
4320 continue
8bdd16b4 4321 break
9558dcec 4322
8bdd16b4 4323 @staticmethod
7c219ea6 4324 def _extract_selected_tab(tabs, fatal=True):
8bdd16b4 4325 for tab in tabs:
cd684175 4326 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
4327 if renderer.get('selected') is True:
4328 return renderer
2b3c2546 4329 else:
7c219ea6 4330 if fatal:
4331 raise ExtractorError('Unable to find selected tab')
b82f815f 4332
61d3665d 4333 def _extract_uploader(self, data):
8bdd16b4 4334 uploader = {}
61d3665d 4335 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
47193e02 4336 owner = try_get(
4337 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
4338 if owner:
61d3665d 4339 owner_text = owner.get('text')
4340 uploader['uploader'] = self._search_regex(
4341 r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)
47193e02 4342 uploader['uploader_id'] = try_get(
14f25df2 4343 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)
47193e02 4344 uploader['uploader_url'] = urljoin(
4345 'https://www.youtube.com/',
14f25df2 4346 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))
9c3fe2ef 4347 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 4348
ac56cf38 4349 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
b60419c5 4350 playlist_id = title = description = channel_url = channel_name = channel_id = None
ac56cf38 4351 tags = []
b60419c5 4352
8bdd16b4 4353 selected_tab = self._extract_selected_tab(tabs)
f0d785d3 4354 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
8bdd16b4 4355 renderer = try_get(
4356 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
4357 if renderer:
b60419c5 4358 channel_name = renderer.get('title')
4359 channel_url = renderer.get('channelUrl')
4360 channel_id = renderer.get('externalId')
39ed931e 4361 else:
64c0d954 4362 renderer = try_get(
4363 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 4364
8bdd16b4 4365 if renderer:
4366 title = renderer.get('title')
ecc97af3 4367 description = renderer.get('description', '')
b60419c5 4368 playlist_id = channel_id
4369 tags = renderer.get('keywords', '').split()
b60419c5 4370
301d07fc 4371 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
4372 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
4373 def _get_uncropped(url):
4374 return url_or_none((url or '').split('=')[0] + '=s0')
4375
4376 avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')
4377 if avatar_thumbnails:
4378 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
4379 if uncropped_avatar:
4380 avatar_thumbnails.append({
4381 'url': uncropped_avatar,
4382 'id': 'avatar_uncropped',
4383 'preference': 1
4384 })
4385
4386 channel_banners = self._extract_thumbnails(
4387 data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))
4388 for banner in channel_banners:
4389 banner['preference'] = -10
4390
4391 if channel_banners:
4392 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
4393 if uncropped_banner:
4394 channel_banners.append({
4395 'url': uncropped_banner,
4396 'id': 'banner_uncropped',
4397 'preference': -5
4398 })
4399
4400 primary_thumbnails = self._extract_thumbnails(
a17526e4 4401 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
a709d873 4402
3462ffa8 4403 if playlist_id is None:
70d5c17b 4404 playlist_id = item_id
f0d785d3 4405
4406 playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')
4407 last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)
70d5c17b 4408 if title is None:
f0d785d3 4409 title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id
b60419c5 4410 title += format_field(selected_tab, 'title', ' - %s')
cd684175 4411 title += format_field(selected_tab, 'expandedText', ' - %s')
f0d785d3 4412
b60419c5 4413 metadata = {
4414 'playlist_id': playlist_id,
4415 'playlist_title': title,
4416 'playlist_description': description,
4417 'uploader': channel_name,
4418 'uploader_id': channel_id,
4419 'uploader_url': channel_url,
301d07fc 4420 'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,
b60419c5 4421 'tags': tags,
f0d785d3 4422 'view_count': self._get_count(playlist_stats, 1),
4423 'availability': self._extract_availability(data),
4424 'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),
6c73052c 4425 'playlist_count': self._get_count(playlist_stats, 0),
4426 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
b60419c5 4427 }
4428 if not channel_id:
4429 metadata.update(self._extract_uploader(data))
4430 metadata.update({
4431 'channel': metadata['uploader'],
4432 'channel_id': metadata['uploader_id'],
4433 'channel_url': metadata['uploader_url']})
4434 return self.playlist_result(
d069eca7 4435 self._entries(
ac56cf38 4436 selected_tab, playlist_id, ytcfg,
4437 self._extract_account_syncid(ytcfg, data),
4438 self._extract_visitor_data(data, ytcfg)),
b60419c5 4439 **metadata)
73c4ac2c 4440
6e634cbe 4441 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
ac56cf38 4442 first_id = last_id = response = None
2be71994 4443 for page_num in itertools.count(1):
cd7c66cf 4444 videos = list(self._playlist_entries(playlist))
4445 if not videos:
4446 return
2be71994 4447 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4448 if start >= len(videos):
4449 return
24146491 4450 yield from videos[start:]
2be71994 4451 first_id = first_id or videos[0]['id']
4452 last_id = videos[-1]['id']
79360d99 4453 watch_endpoint = try_get(
4454 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
ac56cf38 4455 headers = self.generate_api_headers(
4456 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4457 visitor_data=self._extract_visitor_data(response, data, ytcfg))
79360d99 4458 query = {
4459 'playlistId': playlist_id,
4460 'videoId': watch_endpoint.get('videoId') or last_id,
4461 'index': watch_endpoint.get('index') or len(videos),
4462 'params': watch_endpoint.get('params') or 'OAE%3D'
4463 }
4464 response = self._extract_response(
4465 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 4466 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 4467 check_get_keys='contents'
4468 )
cd7c66cf 4469 playlist = try_get(
79360d99 4470 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 4471
ac56cf38 4472 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
8bdd16b4 4473 title = playlist.get('title') or try_get(
14f25df2 4474 data, lambda x: x['titleText']['simpleText'], str)
8bdd16b4 4475 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 4476
4477 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 4478 playlist_url = urljoin(url, try_get(
4479 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4480 str))
6e634cbe 4481
4482 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
4483 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
4484 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
4485
4486 if playlist_url and playlist_url != url and not is_known_unviewable:
29f7c58a 4487 return self.url_result(
4488 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4489 video_title=title)
cd7c66cf 4490
8bdd16b4 4491 return self.playlist_result(
6e634cbe 4492 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
cd7c66cf 4493 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4494
47193e02 4495 def _extract_availability(self, data):
4496 """
4497 Gets the availability of a given playlist/tab.
4498 Note: Unless YouTube tells us explicitly, we do not assume it is public
4499 @param data: response
4500 """
4501 is_private = is_unlisted = None
4502 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4503 badge_labels = self._extract_badges(renderer)
4504
4505 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4506 privacy_dropdown_entries = try_get(
4507 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4508 for renderer_dict in privacy_dropdown_entries:
4509 is_selected = try_get(
4510 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4511 if not is_selected:
4512 continue
052e1350 4513 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
47193e02 4514 if label:
4515 badge_labels.add(label.lower())
4516 break
4517
4518 for badge_label in badge_labels:
4519 if badge_label == 'unlisted':
4520 is_unlisted = True
4521 elif badge_label == 'private':
4522 is_private = True
4523 elif badge_label == 'public':
4524 is_unlisted = is_private = False
4525 return self._availability(is_private, False, False, False, is_unlisted)
4526
4527 @staticmethod
4528 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4529 sidebar_renderer = try_get(
4530 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4531 for item in sidebar_renderer:
4532 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4533 if renderer:
4534 return renderer
4535
ac56cf38 4536 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
358de58c 4537 """
4538 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4539 """
5d342002 4540 browse_id = params = None
47193e02 4541 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4542 if not renderer:
4543 return
4544 menu_renderer = try_get(
4545 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4546 for menu_item in menu_renderer:
4547 if not isinstance(menu_item, dict):
358de58c 4548 continue
47193e02 4549 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4550 text = try_get(
14f25df2 4551 nav_item_renderer, lambda x: x['text']['simpleText'], str)
47193e02 4552 if not text or text.lower() != 'show unavailable videos':
4553 continue
4554 browse_endpoint = try_get(
4555 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4556 browse_id = browse_endpoint.get('browseId')
4557 params = browse_endpoint.get('params')
4558 break
5d342002 4559
11f9be09 4560 headers = self.generate_api_headers(
99e9e001 4561 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
ac56cf38 4562 visitor_data=self._extract_visitor_data(data, ytcfg))
47193e02 4563 query = {
4564 'params': params or 'wgYCCAA=',
4565 'browseId': browse_id or 'VL%s' % item_id
4566 }
4567 return self._extract_response(
4568 item_id=item_id, headers=headers, query=query,
fe93e2c4 4569 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4570 note='Downloading API JSON with unavailable videos')
358de58c 4571
2762dbb1 4572 @functools.cached_property
a25bca9f 4573 def skip_webpage(self):
4574 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
4575
ac56cf38 4576 def _extract_webpage(self, url, item_id, fatal=True):
be5c1ae8 4577 webpage, data = None, None
4578 for retry in self.RetryManager(fatal=fatal):
ac56cf38 4579 try:
be5c1ae8 4580 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
ac56cf38 4581 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4582 except ExtractorError as e:
4583 if isinstance(e.cause, network_exceptions):
14f25df2 4584 if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
be5c1ae8 4585 retry.error = e
4586 continue
4587 self._error_or_warning(e, fatal=fatal)
14fdfea9 4588 break
ac56cf38 4589
be5c1ae8 4590 try:
4591 self._extract_and_report_alerts(data)
4592 except ExtractorError as e:
4593 self._error_or_warning(e, fatal=fatal)
4594 break
ac56cf38 4595
be5c1ae8 4596 # Sometimes youtube returns a webpage with incomplete ytInitialData
4597 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4598 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
4599 retry.error = ExtractorError('Incomplete yt initial data received')
4600 continue
ac56cf38 4601
cd7c66cf 4602 return webpage, data
4603
a25bca9f 4604 def _report_playlist_authcheck(self, ytcfg, fatal=True):
4605 """Use if failed to extract ytcfg (and data) from initial webpage"""
4606 if not ytcfg and self.is_authenticated:
4607 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
4608 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
4609 raise ExtractorError(
4610 f'{msg}. If you are not downloading private content, or '
4611 'your cookies are only for the first account and channel,'
4612 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4613 expected=True)
4614 self.report_warning(msg, only_once=True)
4615
ac56cf38 4616 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4617 data = None
a25bca9f 4618 if not self.skip_webpage:
ac56cf38 4619 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4620 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
1108613f 4621 # Reject webpage data if redirected to home page without explicitly requesting
4622 selected_tab = self._extract_selected_tab(traverse_obj(
7c219ea6 4623 data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}
1108613f 4624 if (url != 'https://www.youtube.com/feed/recommended'
4625 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
4626 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
4627 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
4628 if fatal:
4629 raise ExtractorError(msg, expected=True)
4630 self.report_warning(msg, only_once=True)
ac56cf38 4631 if not data:
a25bca9f 4632 self._report_playlist_authcheck(ytcfg, fatal=fatal)
ac56cf38 4633 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4634 return data, ytcfg
4635
4636 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4637 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4638 resolve_response = self._extract_response(
4639 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4640 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4641 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4642 for ep_key, ep in endpoints.items():
4643 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4644 if params:
4645 return self._extract_response(
4646 item_id=item_id, query=params, ep=ep, headers=headers,
4647 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
7c219ea6 4648 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
ac56cf38 4649 err_note = 'Failed to resolve url (does the playlist exist?)'
4650 if fatal:
4651 raise ExtractorError(err_note, expected=True)
4652 self.report_warning(err_note, item_id)
4653
a6213a49 4654 _SEARCH_PARAMS = None
4655
af5c1c55 4656 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
a6213a49 4657 data = {'query': query}
4658 if params is NO_DEFAULT:
4659 params = self._SEARCH_PARAMS
4660 if params:
4661 data['params'] = params
16aa9ea4 4662
4663 content_keys = (
4664 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
4665 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
4666 # ytmusic search
4667 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
4668 ('continuationContents', ),
4669 )
a25bca9f 4670 display_id = f'query "{query}"'
86e5f3ed 4671 check_get_keys = tuple({keys[0] for keys in content_keys})
a25bca9f 4672 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
4673 self._report_playlist_authcheck(ytcfg, fatal=False)
16aa9ea4 4674
a61fd4cf 4675 continuation_list = [None]
a25bca9f 4676 search = None
a6213a49 4677 for page_num in itertools.count(1):
a61fd4cf 4678 data.update(continuation_list[0] or {})
a25bca9f 4679 headers = self.generate_api_headers(
4680 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
a6213a49 4681 search = self._extract_response(
a25bca9f 4682 item_id=f'{display_id} page {page_num}', ep='search', query=data,
4683 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
16aa9ea4 4684 slr_contents = traverse_obj(search, *content_keys)
4685 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
a61fd4cf 4686 if not continuation_list[0]:
a6213a49 4687 break
4688
4689
4690class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
4691 IE_DESC = 'YouTube Tabs'
4692 _VALID_URL = r'''(?x:
4693 https?://
4694 (?:\w+\.)?
4695 (?:
4696 youtube(?:kids)?\.com|
4697 %(invidious)s
4698 )/
4699 (?:
4700 (?P<channel_type>channel|c|user|browse)/|
4701 (?P<not_channel>
4702 feed/|hashtag/|
4703 (?:playlist|watch)\?.*?\blist=
4704 )|
4705 (?!(?:%(reserved_names)s)\b) # Direct URLs
4706 )
4707 (?P<id>[^/?\#&]+)
4708 )''' % {
4709 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
4710 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4711 }
4712 IE_NAME = 'youtube:tab'
4713
4714 _TESTS = [{
4715 'note': 'playlists, multipage',
4716 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
4717 'playlist_mincount': 94,
4718 'info_dict': {
4719 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4720 'title': 'Igor Kleiner - Playlists',
a6213a49 4721 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
976ae3ea 4722 'uploader': 'Igor Kleiner',
a6213a49 4723 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4724 'channel': 'Igor Kleiner',
4725 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4726 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4727 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4728 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 4729 'channel_follower_count': int
a6213a49 4730 },
4731 }, {
4732 'note': 'playlists, multipage, different order',
4733 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
4734 'playlist_mincount': 94,
4735 'info_dict': {
4736 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4737 'title': 'Igor Kleiner - Playlists',
a6213a49 4738 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4739 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4740 'uploader': 'Igor Kleiner',
4741 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4742 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4743 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4744 'channel': 'Igor Kleiner',
4745 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 4746 'channel_follower_count': int
a6213a49 4747 },
4748 }, {
4749 'note': 'playlists, series',
4750 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
4751 'playlist_mincount': 5,
4752 'info_dict': {
4753 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4754 'title': '3Blue1Brown - Playlists',
4755 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4756 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4757 'uploader': '3Blue1Brown',
976ae3ea 4758 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4759 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4760 'channel': '3Blue1Brown',
4761 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
4762 'tags': ['Mathematics'],
6c73052c 4763 'channel_follower_count': int
a6213a49 4764 },
4765 }, {
4766 'note': 'playlists, singlepage',
4767 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
4768 'playlist_mincount': 4,
4769 'info_dict': {
4770 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4771 'title': 'ThirstForScience - Playlists',
4772 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
4773 'uploader': 'ThirstForScience',
4774 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
976ae3ea 4775 'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4776 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4777 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4778 'tags': 'count:13',
4779 'channel': 'ThirstForScience',
6c73052c 4780 'channel_follower_count': int
a6213a49 4781 }
4782 }, {
4783 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
4784 'only_matching': True,
4785 }, {
4786 'note': 'basic, single video playlist',
4787 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4788 'info_dict': {
4789 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4790 'uploader': 'Sergey M.',
4791 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4792 'title': 'youtube-dl public playlist',
976ae3ea 4793 'description': '',
4794 'tags': [],
4795 'view_count': int,
4796 'modified_date': '20201130',
4797 'channel': 'Sergey M.',
4798 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4799 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4800 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
a6213a49 4801 },
4802 'playlist_count': 1,
4803 }, {
4804 'note': 'empty playlist',
4805 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4806 'info_dict': {
4807 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4808 'uploader': 'Sergey M.',
4809 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4810 'title': 'youtube-dl empty playlist',
976ae3ea 4811 'tags': [],
4812 'channel': 'Sergey M.',
4813 'description': '',
4814 'modified_date': '20160902',
4815 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4816 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4817 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
a6213a49 4818 },
4819 'playlist_count': 0,
4820 }, {
4821 'note': 'Home tab',
4822 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
4823 'info_dict': {
4824 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4825 'title': 'lex will - Home',
4826 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4827 'uploader': 'lex will',
4828 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4829 'channel': 'lex will',
4830 'tags': ['bible', 'history', 'prophesy'],
4831 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4832 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4833 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 4834 'channel_follower_count': int
a6213a49 4835 },
4836 'playlist_mincount': 2,
4837 }, {
4838 'note': 'Videos tab',
4839 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
4840 'info_dict': {
4841 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4842 'title': 'lex will - Videos',
4843 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4844 'uploader': 'lex will',
4845 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4846 'tags': ['bible', 'history', 'prophesy'],
4847 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4848 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4849 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4850 'channel': 'lex will',
6c73052c 4851 'channel_follower_count': int
a6213a49 4852 },
4853 'playlist_mincount': 975,
4854 }, {
4855 'note': 'Videos tab, sorted by popular',
4856 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
4857 'info_dict': {
4858 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4859 'title': 'lex will - Videos',
4860 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4861 'uploader': 'lex will',
4862 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4863 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4864 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4865 'channel': 'lex will',
4866 'tags': ['bible', 'history', 'prophesy'],
4867 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 4868 'channel_follower_count': int
a6213a49 4869 },
4870 'playlist_mincount': 199,
4871 }, {
4872 'note': 'Playlists tab',
4873 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
4874 'info_dict': {
4875 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4876 'title': 'lex will - Playlists',
4877 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4878 'uploader': 'lex will',
4879 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4880 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4881 'channel': 'lex will',
4882 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4883 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4884 'tags': ['bible', 'history', 'prophesy'],
6c73052c 4885 'channel_follower_count': int
a6213a49 4886 },
4887 'playlist_mincount': 17,
4888 }, {
4889 'note': 'Community tab',
4890 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
4891 'info_dict': {
4892 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4893 'title': 'lex will - Community',
4894 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4895 'uploader': 'lex will',
4896 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4897 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4898 'channel': 'lex will',
4899 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4900 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4901 'tags': ['bible', 'history', 'prophesy'],
6c73052c 4902 'channel_follower_count': int
a6213a49 4903 },
4904 'playlist_mincount': 18,
4905 }, {
4906 'note': 'Channels tab',
4907 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
4908 'info_dict': {
4909 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4910 'title': 'lex will - Channels',
4911 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4912 'uploader': 'lex will',
4913 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4914 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4915 'channel': 'lex will',
4916 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4917 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4918 'tags': ['bible', 'history', 'prophesy'],
6c73052c 4919 'channel_follower_count': int
a6213a49 4920 },
4921 'playlist_mincount': 12,
4922 }, {
4923 'note': 'Search tab',
4924 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
4925 'playlist_mincount': 40,
4926 'info_dict': {
4927 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4928 'title': '3Blue1Brown - Search - linear algebra',
4929 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4930 'uploader': '3Blue1Brown',
4931 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
976ae3ea 4932 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4933 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4934 'tags': ['Mathematics'],
4935 'channel': '3Blue1Brown',
4936 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
6c73052c 4937 'channel_follower_count': int
a6213a49 4938 },
4939 }, {
4940 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4941 'only_matching': True,
4942 }, {
4943 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4944 'only_matching': True,
4945 }, {
4946 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4947 'only_matching': True,
4948 }, {
4949 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
4950 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4951 'info_dict': {
4952 'title': '29C3: Not my department',
4953 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4954 'uploader': 'Christiaan008',
4955 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4956 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
976ae3ea 4957 'tags': [],
4958 'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
4959 'view_count': int,
4960 'modified_date': '20150605',
4961 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4962 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
4963 'channel': 'Christiaan008',
a6213a49 4964 },
4965 'playlist_count': 96,
4966 }, {
4967 'note': 'Large playlist',
4968 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
4969 'info_dict': {
4970 'title': 'Uploads from Cauchemar',
4971 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
4972 'uploader': 'Cauchemar',
4973 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
976ae3ea 4974 'channel_url': 'https://www.youtube.com/c/Cauchemar89',
4975 'tags': [],
4976 'modified_date': r're:\d{8}',
4977 'channel': 'Cauchemar',
4978 'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
4979 'view_count': int,
4980 'description': '',
4981 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
a6213a49 4982 },
4983 'playlist_mincount': 1123,
976ae3ea 4984 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 4985 }, {
4986 'note': 'even larger playlist, 8832 videos',
4987 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
4988 'only_matching': True,
4989 }, {
4990 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
4991 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
4992 'info_dict': {
4993 'title': 'Uploads from Interstellar Movie',
4994 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4995 'uploader': 'Interstellar Movie',
4996 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
976ae3ea 4997 'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
4998 'tags': [],
4999 'view_count': int,
5000 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5001 'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
5002 'channel': 'Interstellar Movie',
5003 'description': '',
5004 'modified_date': r're:\d{8}',
a6213a49 5005 },
5006 'playlist_mincount': 21,
5007 }, {
5008 'note': 'Playlist with "show unavailable videos" button',
5009 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5010 'info_dict': {
5011 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5012 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
5013 'uploader': 'Phim Siêu Nhân Nhật Bản',
5014 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
976ae3ea 5015 'view_count': int,
5016 'channel': 'Phim Siêu Nhân Nhật Bản',
5017 'tags': [],
5018 'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5019 'description': '',
5020 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5021 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5022 'modified_date': r're:\d{8}',
a6213a49 5023 },
5024 'playlist_mincount': 200,
976ae3ea 5025 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5026 }, {
5027 'note': 'Playlist with unavailable videos in page 7',
5028 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5029 'info_dict': {
5030 'title': 'Uploads from BlankTV',
5031 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5032 'uploader': 'BlankTV',
5033 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
976ae3ea 5034 'channel': 'BlankTV',
5035 'channel_url': 'https://www.youtube.com/c/blanktv',
5036 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5037 'view_count': int,
5038 'tags': [],
5039 'uploader_url': 'https://www.youtube.com/c/blanktv',
5040 'modified_date': r're:\d{8}',
5041 'description': '',
a6213a49 5042 },
5043 'playlist_mincount': 1000,
976ae3ea 5044 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5045 }, {
5046 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5047 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5048 'info_dict': {
5049 'title': 'Data Analysis with Dr Mike Pound',
5050 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5051 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5052 'uploader': 'Computerphile',
5053 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
976ae3ea 5054 'uploader_url': 'https://www.youtube.com/user/Computerphile',
5055 'tags': [],
5056 'view_count': int,
5057 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5058 'channel_url': 'https://www.youtube.com/user/Computerphile',
5059 'channel': 'Computerphile',
a6213a49 5060 },
5061 'playlist_mincount': 11,
5062 }, {
5063 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5064 'only_matching': True,
5065 }, {
5066 'note': 'Playlist URL that does not actually serve a playlist',
5067 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5068 'info_dict': {
5069 'id': 'FqZTN594JQw',
5070 'ext': 'webm',
5071 'title': "Smiley's People 01 detective, Adventure Series, Action",
5072 'uploader': 'STREEM',
5073 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
5074 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
5075 'upload_date': '20150526',
5076 'license': 'Standard YouTube License',
5077 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5078 'categories': ['People & Blogs'],
5079 'tags': list,
5080 'view_count': int,
5081 'like_count': int,
a6213a49 5082 },
5083 'params': {
5084 'skip_download': True,
5085 },
5086 'skip': 'This video is not available.',
5087 'add_ie': [YoutubeIE.ie_key()],
5088 }, {
5089 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5090 'only_matching': True,
5091 }, {
5092 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5093 'only_matching': True,
5094 }, {
5095 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5096 'info_dict': {
12a1b225 5097 'id': 'Wq15eF5vCbI', # This will keep changing
a6213a49 5098 'ext': 'mp4',
976ae3ea 5099 'title': str,
a6213a49 5100 'uploader': 'Sky News',
5101 'uploader_id': 'skynews',
5102 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
5103 'upload_date': r're:\d{8}',
976ae3ea 5104 'description': str,
a6213a49 5105 'categories': ['News & Politics'],
5106 'tags': list,
5107 'like_count': int,
6c73052c 5108 'release_timestamp': 1642502819,
976ae3ea 5109 'channel': 'Sky News',
5110 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5111 'age_limit': 0,
5112 'view_count': int,
6c73052c 5113 'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',
976ae3ea 5114 'playable_in_embed': True,
6c73052c 5115 'release_date': '20220118',
976ae3ea 5116 'availability': 'public',
5117 'live_status': 'is_live',
5118 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
6c73052c 5119 'channel_follower_count': int
a6213a49 5120 },
5121 'params': {
5122 'skip_download': True,
5123 },
976ae3ea 5124 'expected_warnings': ['Ignoring subtitle tracks found in '],
a6213a49 5125 }, {
5126 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5127 'info_dict': {
5128 'id': 'a48o2S1cPoo',
5129 'ext': 'mp4',
5130 'title': 'The Young Turks - Live Main Show',
5131 'uploader': 'The Young Turks',
5132 'uploader_id': 'TheYoungTurks',
5133 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
5134 'upload_date': '20150715',
5135 'license': 'Standard YouTube License',
5136 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5137 'categories': ['News & Politics'],
5138 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5139 'like_count': int,
a6213a49 5140 },
5141 'params': {
5142 'skip_download': True,
5143 },
5144 'only_matching': True,
5145 }, {
5146 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5147 'only_matching': True,
5148 }, {
5149 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5150 'only_matching': True,
5151 }, {
5152 'note': 'A channel that is not live. Should raise error',
5153 'url': 'https://www.youtube.com/user/numberphile/live',
5154 'only_matching': True,
5155 }, {
5156 'url': 'https://www.youtube.com/feed/trending',
5157 'only_matching': True,
5158 }, {
5159 'url': 'https://www.youtube.com/feed/library',
5160 'only_matching': True,
5161 }, {
5162 'url': 'https://www.youtube.com/feed/history',
5163 'only_matching': True,
5164 }, {
5165 'url': 'https://www.youtube.com/feed/subscriptions',
5166 'only_matching': True,
5167 }, {
5168 'url': 'https://www.youtube.com/feed/watch_later',
5169 'only_matching': True,
5170 }, {
5171 'note': 'Recommended - redirects to home page.',
5172 'url': 'https://www.youtube.com/feed/recommended',
5173 'only_matching': True,
5174 }, {
5175 'note': 'inline playlist with not always working continuations',
5176 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5177 'only_matching': True,
5178 }, {
5179 'url': 'https://www.youtube.com/course',
5180 'only_matching': True,
5181 }, {
5182 'url': 'https://www.youtube.com/zsecurity',
5183 'only_matching': True,
5184 }, {
5185 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5186 'only_matching': True,
5187 }, {
5188 'url': 'https://www.youtube.com/TheYoungTurks/live',
5189 'only_matching': True,
5190 }, {
5191 'url': 'https://www.youtube.com/hashtag/cctv9',
5192 'info_dict': {
5193 'id': 'cctv9',
5194 'title': '#cctv9',
976ae3ea 5195 'tags': [],
a6213a49 5196 },
5197 'playlist_mincount': 350,
5198 }, {
5199 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5200 'only_matching': True,
5201 }, {
5202 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5203 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5204 'only_matching': True
5205 }, {
5206 'note': '/browse/ should redirect to /channel/',
5207 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5208 'only_matching': True
5209 }, {
5210 'note': 'VLPL, should redirect to playlist?list=PL...',
5211 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5212 'info_dict': {
5213 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5214 'uploader': 'NoCopyrightSounds',
5215 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
5216 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
12a1b225 5217 'title': 'NCS : All Releases 💿',
976ae3ea 5218 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5219 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5220 'modified_date': r're:\d{8}',
5221 'view_count': int,
5222 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5223 'tags': [],
5224 'channel': 'NoCopyrightSounds',
a6213a49 5225 },
5226 'playlist_mincount': 166,
976ae3ea 5227 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5228 }, {
5229 'note': 'Topic, should redirect to playlist?list=UU...',
5230 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5231 'info_dict': {
5232 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5233 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5234 'title': 'Uploads from Royalty Free Music - Topic',
5235 'uploader': 'Royalty Free Music - Topic',
976ae3ea 5236 'tags': [],
5237 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5238 'channel': 'Royalty Free Music - Topic',
5239 'view_count': int,
5240 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5241 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5242 'modified_date': r're:\d{8}',
5243 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5244 'description': '',
a6213a49 5245 },
5246 'expected_warnings': [
a6213a49 5247 'The URL does not have a videos tab',
976ae3ea 5248 r'[Uu]navailable videos (are|will be) hidden',
a6213a49 5249 ],
5250 'playlist_mincount': 101,
5251 }, {
5252 'note': 'Topic without a UU playlist',
5253 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5254 'info_dict': {
5255 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5256 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
976ae3ea 5257 'tags': [],
a6213a49 5258 },
5259 'expected_warnings': [
976ae3ea 5260 'the playlist redirect gave error',
a6213a49 5261 ],
5262 'playlist_mincount': 9,
5263 }, {
5264 'note': 'Youtube music Album',
5265 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5266 'info_dict': {
5267 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5268 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
976ae3ea 5269 'tags': [],
5270 'view_count': int,
5271 'description': '',
5272 'availability': 'unlisted',
5273 'modified_date': r're:\d{8}',
a6213a49 5274 },
5275 'playlist_count': 50,
5276 }, {
5277 'note': 'unlisted single video playlist',
5278 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5279 'info_dict': {
5280 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5281 'uploader': 'colethedj',
5282 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5283 'title': 'yt-dlp unlisted playlist test',
976ae3ea 5284 'availability': 'unlisted',
5285 'tags': [],
12a1b225 5286 'modified_date': '20220418',
976ae3ea 5287 'channel': 'colethedj',
5288 'view_count': int,
5289 'description': '',
5290 'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5291 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5292 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
a6213a49 5293 },
5294 'playlist_count': 1,
5295 }, {
5296 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
5297 'url': 'https://www.youtube.com/feed/recommended',
5298 'info_dict': {
5299 'id': 'recommended',
5300 'title': 'recommended',
6c73052c 5301 'tags': [],
a6213a49 5302 },
5303 'playlist_mincount': 50,
5304 'params': {
5305 'skip_download': True,
5306 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5307 },
5308 }, {
5309 'note': 'API Fallback: /videos tab, sorted by oldest first',
5310 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
5311 'info_dict': {
5312 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5313 'title': 'Cody\'sLab - Videos',
5314 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
5315 'uploader': 'Cody\'sLab',
5316 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
976ae3ea 5317 'channel': 'Cody\'sLab',
5318 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5319 'tags': [],
5320 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5321 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
6c73052c 5322 'channel_follower_count': int
a6213a49 5323 },
5324 'playlist_mincount': 650,
5325 'params': {
5326 'skip_download': True,
5327 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5328 },
5329 }, {
5330 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
5331 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5332 'info_dict': {
5333 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5334 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5335 'title': 'Uploads from Royalty Free Music - Topic',
5336 'uploader': 'Royalty Free Music - Topic',
976ae3ea 5337 'modified_date': r're:\d{8}',
5338 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5339 'description': '',
5340 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5341 'tags': [],
5342 'channel': 'Royalty Free Music - Topic',
5343 'view_count': int,
5344 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
a6213a49 5345 },
5346 'expected_warnings': [
976ae3ea 5347 'does not have a videos tab',
5348 r'[Uu]navailable videos (are|will be) hidden',
a6213a49 5349 ],
5350 'playlist_mincount': 101,
5351 'params': {
5352 'skip_download': True,
5353 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5354 },
7c219ea6 5355 }, {
5356 'note': 'non-standard redirect to regional channel',
5357 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
5358 'only_matching': True
61d3665d 5359 }, {
5360 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
5361 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5362 'info_dict': {
5363 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5364 'modified_date': '20220407',
5365 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5366 'tags': [],
5367 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5368 'uploader': 'pukkandan',
5369 'availability': 'unlisted',
5370 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5371 'channel': 'pukkandan',
5372 'description': 'Test for collaborative playlist',
5373 'title': 'yt-dlp test - collaborative playlist',
12a1b225 5374 'view_count': int,
61d3665d 5375 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5376 },
5377 'playlist_mincount': 2
a6213a49 5378 }]
5379
5380 @classmethod
5381 def suitable(cls, url):
86e5f3ed 5382 return False if YoutubeIE.suitable(url) else super().suitable(url)
9297939e 5383
64f36541 5384 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')
fe03a6cd 5385
182bda88 5386 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
5387 def _real_extract(self, url, smuggled_data):
cd7c66cf 5388 item_id = self._match_id(url)
14f25df2 5389 url = urllib.parse.urlunparse(
5390 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 5391 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 5392
fe03a6cd 5393 def get_mobj(url):
37e57a9f 5394 mobj = self._URL_RE.match(url).groupdict()
07cce701 5395 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 5396 return mobj
5397
37e57a9f 5398 mobj, redirect_warning = get_mobj(url), None
fe03a6cd 5399 # Youtube returns incomplete data if tabname is not lower case
5400 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
fe03a6cd 5401 if is_channel:
5402 if smuggled_data.get('is_music_url'):
37e57a9f 5403 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
fe03a6cd 5404 item_id = item_id[2:]
37e57a9f 5405 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
5406 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
ac56cf38 5407 mdata = self._extract_tab_endpoint(
37e57a9f 5408 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
5409 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
14f25df2 5410 get_all=False, expected_type=str)
ac56cf38 5411 if not murl:
37e57a9f 5412 raise ExtractorError('Failed to resolve album to playlist')
ac56cf38 5413 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
37e57a9f 5414 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
5415 pre = f'https://www.youtube.com/channel/{item_id}'
5416
64f36541 5417 original_tab_name = tab
fe03a6cd 5418 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
5419 # Home URLs should redirect to /videos/
37e57a9f 5420 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
5421 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 5422 tab = '/videos'
5423
5424 url = ''.join((pre, tab, post))
5425 mobj = get_mobj(url)
cd7c66cf 5426
5427 # Handle both video/playlist URLs
201c1459 5428 qs = parse_qs(url)
86e5f3ed 5429 video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))
cd7c66cf 5430
fe03a6cd 5431 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 5432 if not playlist_id:
fe03a6cd 5433 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 5434 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 5435 # Common mistake: https://www.youtube.com/watch?list=playlist_id
37e57a9f 5436 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
5437 url = f'https://www.youtube.com/playlist?list={playlist_id}'
18db7548 5438 mobj = get_mobj(url)
cd7c66cf 5439
5440 if video_id and playlist_id:
a06916d9 5441 if self.get_param('noplaylist'):
37e57a9f 5442 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
5443 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5444 ie=YoutubeIE.ie_key(), video_id=video_id)
5445 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
cd7c66cf 5446
ac56cf38 5447 data, ytcfg = self._extract_data(url, item_id)
14fdfea9 5448
7c219ea6 5449 # YouTube may provide a non-standard redirect to the regional channel
5450 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
5451 redirect_url = traverse_obj(
5452 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
5453 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
5454 redirect_url = ''.join((
5455 urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))
5456 self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')
5457 return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())
5458
37e57a9f 5459 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
18db7548 5460 if tabs:
5461 selected_tab = self._extract_selected_tab(tabs)
64f36541 5462 selected_tab_name = selected_tab.get('title', '').lower()
5463 if selected_tab_name == 'home':
5464 selected_tab_name = 'featured'
5465 requested_tab_name = mobj['tab'][1:]
09f1580e 5466 if 'no-youtube-channel-redirect' not in compat_opts:
693f0600 5467 if requested_tab_name == 'live': # Live tab should have redirected to the video
5468 raise UserNotLive(video_id=mobj['id'])
64f36541 5469 if requested_tab_name not in ('', selected_tab_name):
5470 redirect_warning = f'The channel does not have a {requested_tab_name} tab'
5471 if not original_tab_name:
5472 if item_id[:2] == 'UC':
5473 # Topic channels don't have /videos. Use the equivalent playlist instead
5474 pl_id = f'UU{item_id[2:]}'
5475 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
5476 try:
5477 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
5478 except ExtractorError:
5479 redirect_warning += ' and the playlist redirect gave error'
5480 else:
5481 item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name
5482 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
5483 if selected_tab_name and selected_tab_name != requested_tab_name:
5484 redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'
5485 else:
5486 raise ExtractorError(redirect_warning, expected=True)
18db7548 5487
37e57a9f 5488 if redirect_warning:
64f36541 5489 self.to_screen(redirect_warning)
37e57a9f 5490 self.write_debug(f'Final URL: {url}')
18db7548 5491
358de58c 5492 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 5493 if 'no-youtube-unavailable-videos' not in compat_opts:
ac56cf38 5494 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
c0ac49bc 5495 self._extract_and_report_alerts(data, only_once=True)
37e57a9f 5496 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
8bdd16b4 5497 if tabs:
ac56cf38 5498 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
cd7c66cf 5499
37e57a9f 5500 playlist = traverse_obj(
5501 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
8bdd16b4 5502 if playlist:
ac56cf38 5503 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
cd7c66cf 5504
37e57a9f 5505 video_id = traverse_obj(
5506 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
8bdd16b4 5507 if video_id:
09f1580e 5508 if mobj['tab'] != '/live': # live tab is expected to redirect to video
37e57a9f 5509 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
5510 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5511 ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 5512
8bdd16b4 5513 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 5514
c5e8d7af 5515
8bdd16b4 5516class YoutubePlaylistIE(InfoExtractor):
96565c7e 5517 IE_DESC = 'YouTube playlists'
8bdd16b4 5518 _VALID_URL = r'''(?x)(?:
5519 (?:https?://)?
5520 (?:\w+\.)?
5521 (?:
5522 (?:
5523 youtube(?:kids)?\.com|
d9190e44 5524 %(invidious)s
8bdd16b4 5525 )
5526 /.*?\?.*?\blist=
5527 )?
5528 (?P<id>%(playlist_id)s)
d9190e44
RH
5529 )''' % {
5530 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
5531 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5532 }
8bdd16b4 5533 IE_NAME = 'youtube:playlist'
cdc628a4 5534 _TESTS = [{
8bdd16b4 5535 'note': 'issue #673',
5536 'url': 'PLBB231211A4F62143',
cdc628a4 5537 'info_dict': {
8bdd16b4 5538 'title': '[OLD]Team Fortress 2 (Class-based LP)',
5539 'id': 'PLBB231211A4F62143',
976ae3ea 5540 'uploader': 'Wickman',
8bdd16b4 5541 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 5542 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
976ae3ea 5543 'view_count': int,
5544 'uploader_url': 'https://www.youtube.com/user/Wickydoo',
5545 'modified_date': r're:\d{8}',
5546 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5547 'channel': 'Wickman',
5548 'tags': [],
5549 'channel_url': 'https://www.youtube.com/user/Wickydoo',
8bdd16b4 5550 },
5551 'playlist_mincount': 29,
5552 }, {
5553 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5554 'info_dict': {
5555 'title': 'YDL_safe_search',
5556 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5557 },
5558 'playlist_count': 2,
5559 'skip': 'This playlist is private',
9558dcec 5560 }, {
8bdd16b4 5561 'note': 'embedded',
5562 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5563 'playlist_count': 4,
9558dcec 5564 'info_dict': {
8bdd16b4 5565 'title': 'JODA15',
5566 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5567 'uploader': 'milan',
5568 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
976ae3ea 5569 'description': '',
5570 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5571 'tags': [],
5572 'modified_date': '20140919',
5573 'view_count': int,
5574 'channel': 'milan',
5575 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5576 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5577 },
5578 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
cdc628a4 5579 }, {
8bdd16b4 5580 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
12a1b225 5581 'playlist_mincount': 455,
8bdd16b4 5582 'info_dict': {
5583 'title': '2018 Chinese New Singles (11/6 updated)',
5584 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5585 'uploader': 'LBK',
5586 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 5587 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
976ae3ea 5588 'channel': 'LBK',
5589 'view_count': int,
5590 'channel_url': 'https://www.youtube.com/c/愛低音的國王',
5591 'tags': [],
5592 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
5593 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5594 'modified_date': r're:\d{8}',
5595 },
5596 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
daa0df9e 5597 }, {
29f7c58a 5598 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
5599 'only_matching': True,
5600 }, {
5601 # music album playlist
5602 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
5603 'only_matching': True,
5604 }]
5605
5606 @classmethod
5607 def suitable(cls, url):
201c1459 5608 if YoutubeTabIE.suitable(url):
5609 return False
49a57e70 5610 from ..utils import parse_qs
201c1459 5611 qs = parse_qs(url)
5612 if qs.get('v', [None])[0]:
5613 return False
86e5f3ed 5614 return super().suitable(url)
29f7c58a 5615
5616 def _real_extract(self, url):
5617 playlist_id = self._match_id(url)
46953e7e 5618 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 5619 url = update_url_query(
5620 'https://www.youtube.com/playlist',
5621 parse_qs(url) or {'list': playlist_id})
5622 if is_music_url:
5623 url = smuggle_url(url, {'is_music_url': True})
5624 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 5625
5626
5627class YoutubeYtBeIE(InfoExtractor):
c76eb41b 5628 IE_DESC = 'youtu.be'
29f7c58a 5629 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
5630 _TESTS = [{
8bdd16b4 5631 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
5632 'info_dict': {
5633 'id': 'yeWKywCrFtk',
5634 'ext': 'mp4',
5635 'title': 'Small Scale Baler and Braiding Rugs',
5636 'uploader': 'Backus-Page House Museum',
5637 'uploader_id': 'backuspagemuseum',
5638 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
5639 'upload_date': '20161008',
5640 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
5641 'categories': ['Nonprofits & Activism'],
5642 'tags': list,
5643 'like_count': int,
976ae3ea 5644 'age_limit': 0,
5645 'playable_in_embed': True,
5646 'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
5647 'channel': 'Backus-Page House Museum',
5648 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
5649 'live_status': 'not_live',
5650 'view_count': int,
5651 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
5652 'availability': 'public',
5653 'duration': 59,
12a1b225
A
5654 'comment_count': int,
5655 'channel_follower_count': int
8bdd16b4 5656 },
5657 'params': {
5658 'noplaylist': True,
5659 'skip_download': True,
5660 },
39e7107d 5661 }, {
8bdd16b4 5662 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 5663 'only_matching': True,
cdc628a4
PH
5664 }]
5665
8bdd16b4 5666 def _real_extract(self, url):
5ad28e7f 5667 mobj = self._match_valid_url(url)
29f7c58a 5668 video_id = mobj.group('id')
5669 playlist_id = mobj.group('playlist_id')
8bdd16b4 5670 return self.url_result(
29f7c58a 5671 update_url_query('https://www.youtube.com/watch', {
5672 'v': video_id,
5673 'list': playlist_id,
5674 'feature': 'youtu.be',
5675 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 5676
5677
b6ce9bb0 5678class YoutubeLivestreamEmbedIE(InfoExtractor):
5679 IE_DESC = 'YouTube livestream embeds'
5680 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
5681 _TESTS = [{
5682 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
5683 'only_matching': True,
5684 }]
5685
5686 def _real_extract(self, url):
5687 channel_id = self._match_id(url)
5688 return self.url_result(
5689 f'https://www.youtube.com/channel/{channel_id}/live',
5690 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
5691
5692
8bdd16b4 5693class YoutubeYtUserIE(InfoExtractor):
96565c7e 5694 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
b6ce9bb0 5695 IE_NAME = 'youtube:user'
8bdd16b4 5696 _VALID_URL = r'ytuser:(?P<id>.+)'
5697 _TESTS = [{
5698 'url': 'ytuser:phihag',
5699 'only_matching': True,
5700 }]
5701
5702 def _real_extract(self, url):
5703 user_id = self._match_id(url)
5704 return self.url_result(
c586f9e8 5705 'https://www.youtube.com/user/%s/videos' % user_id,
8bdd16b4 5706 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 5707
b05654f0 5708
3d3dddc9 5709class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 5710 IE_NAME = 'youtube:favorites'
96565c7e 5711 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
70d5c17b 5712 _VALID_URL = r':ytfav(?:ou?rite)?s?'
5713 _LOGIN_REQUIRED = True
5714 _TESTS = [{
5715 'url': ':ytfav',
5716 'only_matching': True,
5717 }, {
5718 'url': ':ytfavorites',
5719 'only_matching': True,
5720 }]
5721
5722 def _real_extract(self, url):
5723 return self.url_result(
5724 'https://www.youtube.com/playlist?list=LL',
5725 ie=YoutubeTabIE.ie_key())
5726
5727
ca5300c7 5728class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
5729 IE_NAME = 'youtube:notif'
5730 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
5731 _VALID_URL = r':ytnotif(?:ication)?s?'
5732 _LOGIN_REQUIRED = True
5733 _TESTS = [{
5734 'url': ':ytnotif',
5735 'only_matching': True,
5736 }, {
5737 'url': ':ytnotifications',
5738 'only_matching': True,
5739 }]
5740
5741 def _extract_notification_menu(self, response, continuation_list):
5742 notification_list = traverse_obj(
5743 response,
5744 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
5745 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
5746 expected_type=list) or []
5747 continuation_list[0] = None
5748 for item in notification_list:
5749 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
5750 if entry:
5751 yield entry
5752 continuation = item.get('continuationItemRenderer')
5753 if continuation:
5754 continuation_list[0] = continuation
5755
5756 def _extract_notification_renderer(self, notification):
5757 video_id = traverse_obj(
5758 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
5759 url = f'https://www.youtube.com/watch?v={video_id}'
5760 channel_id = None
5761 if not video_id:
5762 browse_ep = traverse_obj(
5763 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
5764 channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
5765 post_id = self._search_regex(
5766 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
5767 'post id', default=None)
5768 if not channel_id or not post_id:
5769 return
5770 # The direct /post url redirects to this in the browser
5771 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
5772
5773 channel = traverse_obj(
5774 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
5775 expected_type=str)
c7a7baaa 5776 notification_title = self._get_text(notification, 'shortMessage')
5777 if notification_title:
5778 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
5779 # TODO: handle recommended videos
ca5300c7 5780 title = self._search_regex(
c7a7baaa 5781 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
ca5300c7 5782 'video title', default=None)
ca5300c7 5783 upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')
5784 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())
5785 else None)
5786 return {
5787 '_type': 'url',
5788 'url': url,
5789 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
5790 'video_id': video_id,
5791 'title': title,
5792 'channel_id': channel_id,
5793 'channel': channel,
5794 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
5795 'upload_date': upload_date,
5796 }
5797
5798 def _notification_menu_entries(self, ytcfg):
5799 continuation_list = [None]
5800 response = None
5801 for page in itertools.count(1):
5802 ctoken = traverse_obj(
5803 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
5804 response = self._extract_response(
5805 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
5806 ep='notification/get_notification_menu', check_get_keys='actions',
5807 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
5808 yield from self._extract_notification_menu(response, continuation_list)
5809 if not continuation_list[0]:
5810 break
5811
5812 def _real_extract(self, url):
5813 display_id = 'notifications'
5814 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
5815 self._report_playlist_authcheck(ytcfg)
5816 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
5817
5818
a6213a49 5819class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
5820 IE_DESC = 'YouTube search'
78caa52a 5821 IE_NAME = 'youtube:search'
b05654f0 5822 _SEARCH_KEY = 'ytsearch'
a61fd4cf 5823 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
84bbc545 5824 _TESTS = [{
5825 'url': 'ytsearch5:youtube-dl test video',
5826 'playlist_count': 5,
5827 'info_dict': {
5828 'id': 'youtube-dl test video',
5829 'title': 'youtube-dl test video',
5830 }
5831 }]
b05654f0 5832
a61fd4cf 5833
5f7cb91a 5834class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
cb7fb546 5835 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 5836 _SEARCH_KEY = 'ytsearchdate'
a6213a49 5837 IE_DESC = 'YouTube search, newest videos first'
a61fd4cf 5838 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
84bbc545 5839 _TESTS = [{
5840 'url': 'ytsearchdate5:youtube-dl test video',
5841 'playlist_count': 5,
5842 'info_dict': {
5843 'id': 'youtube-dl test video',
5844 'title': 'youtube-dl test video',
5845 }
5846 }]
75dff0ee 5847
c9ae7b95 5848
a6213a49 5849class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
96565c7e 5850 IE_DESC = 'YouTube search URLs with sorting and filter support'
386e1dd9 5851 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
182bda88 5852 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
3462ffa8 5853 _TESTS = [{
5854 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
5855 'playlist_mincount': 5,
5856 'info_dict': {
11f9be09 5857 'id': 'youtube-dl test video',
3462ffa8 5858 'title': 'youtube-dl test video',
5859 }
a61fd4cf 5860 }, {
5861 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
5862 'playlist_mincount': 5,
5863 'info_dict': {
5864 'id': 'python',
5865 'title': 'python',
5866 }
ad210f4f 5867 }, {
5868 'url': 'https://www.youtube.com/results?search_query=%23cats',
5869 'playlist_mincount': 1,
5870 'info_dict': {
5871 'id': '#cats',
5872 'title': '#cats',
12a1b225
A
5873 # The test suite does not have support for nested playlists
5874 # 'entries': [{
5875 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
5876 # 'title': '#cats',
5877 # }],
ad210f4f 5878 },
3462ffa8 5879 }, {
5880 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
5881 'only_matching': True,
5882 }]
5883
5884 def _real_extract(self, url):
4dfbf869 5885 qs = parse_qs(url)
386e1dd9 5886 query = (qs.get('search_query') or qs.get('q'))[0]
a6213a49 5887 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
3462ffa8 5888
5889
16aa9ea4 5890class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
62b58c09 5891 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
16aa9ea4 5892 IE_NAME = 'youtube:music:search_url'
5893 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
5894 _TESTS = [{
5895 'url': 'https://music.youtube.com/search?q=royalty+free+music',
5896 'playlist_count': 16,
5897 'info_dict': {
5898 'id': 'royalty free music',
5899 'title': 'royalty free music',
5900 }
5901 }, {
5902 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
5903 'playlist_mincount': 30,
5904 'info_dict': {
5905 'id': 'royalty free music - songs',
5906 'title': 'royalty free music - songs',
5907 },
5908 'params': {'extract_flat': 'in_playlist'}
5909 }, {
5910 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
5911 'playlist_mincount': 30,
5912 'info_dict': {
5913 'id': 'royalty free music - community playlists',
5914 'title': 'royalty free music - community playlists',
5915 },
5916 'params': {'extract_flat': 'in_playlist'}
5917 }]
5918
5919 _SECTIONS = {
5920 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
5921 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
5922 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
5923 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
5924 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
5925 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
5926 }
5927
5928 def _real_extract(self, url):
5929 qs = parse_qs(url)
5930 query = (qs.get('search_query') or qs.get('q'))[0]
5931 params = qs.get('sp', (None,))[0]
5932 if params:
5933 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
5934 else:
ac668111 5935 section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
16aa9ea4 5936 params = self._SECTIONS.get(section)
5937 if not params:
5938 section = None
5939 title = join_nonempty(query, section, delim=' - ')
af5c1c55 5940 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
16aa9ea4 5941
5942
182bda88 5943class YoutubeFeedsInfoExtractor(InfoExtractor):
d7ae0639 5944 """
25f14e9f 5945 Base class for feed extractors
82d02080 5946 Subclasses must re-define the _FEED_NAME property.
d7ae0639 5947 """
b2e8bc1b 5948 _LOGIN_REQUIRED = True
82d02080 5949 _FEED_NAME = 'feeds'
a25bca9f 5950
5951 def _real_initialize(self):
5952 YoutubeBaseInfoExtractor._check_login_required(self)
d7ae0639 5953
82d02080 5954 @classproperty
d7ae0639 5955 def IE_NAME(self):
82d02080 5956 return f'youtube:{self._FEED_NAME}'
04cc9617 5957
3853309f 5958 def _real_extract(self, url):
3d3dddc9 5959 return self.url_result(
182bda88 5960 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
25f14e9f
S
5961
5962
ef2f3c7f 5963class YoutubeWatchLaterIE(InfoExtractor):
5964 IE_NAME = 'youtube:watchlater'
96565c7e 5965 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
3d3dddc9 5966 _VALID_URL = r':ytwatchlater'
bc7a9cd8 5967 _TESTS = [{
8bdd16b4 5968 'url': ':ytwatchlater',
bc7a9cd8
S
5969 'only_matching': True,
5970 }]
25f14e9f
S
5971
5972 def _real_extract(self, url):
ef2f3c7f 5973 return self.url_result(
5974 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 5975
5976
25f14e9f 5977class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
96565c7e 5978 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
3d3dddc9 5979 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 5980 _FEED_NAME = 'recommended'
45db527f 5981 _LOGIN_REQUIRED = False
3d3dddc9 5982 _TESTS = [{
5983 'url': ':ytrec',
5984 'only_matching': True,
5985 }, {
5986 'url': ':ytrecommended',
5987 'only_matching': True,
5988 }, {
5989 'url': 'https://youtube.com',
5990 'only_matching': True,
5991 }]
1ed5b5c9 5992
1ed5b5c9 5993
25f14e9f 5994class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
96565c7e 5995 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
3d3dddc9 5996 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 5997 _FEED_NAME = 'subscriptions'
3d3dddc9 5998 _TESTS = [{
5999 'url': ':ytsubs',
6000 'only_matching': True,
6001 }, {
6002 'url': ':ytsubscriptions',
6003 'only_matching': True,
6004 }]
1ed5b5c9 6005
1ed5b5c9 6006
25f14e9f 6007class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
96565c7e 6008 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
a5c56234 6009 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 6010 _FEED_NAME = 'history'
3d3dddc9 6011 _TESTS = [{
6012 'url': ':ythistory',
6013 'only_matching': True,
6014 }]
1ed5b5c9
JMF
6015
6016
6e634cbe 6017class YoutubeStoriesIE(InfoExtractor):
6018 IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
6019 IE_NAME = 'youtube:stories'
6020 _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
6021 _TESTS = [{
6022 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
6023 'only_matching': True,
6024 }]
6025
6026 def _real_extract(self, url):
6027 playlist_id = f'RLTD{self._match_id(url)}'
6028 return self.url_result(
50ac0e54 6029 smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),
6e634cbe 6030 ie=YoutubeTabIE, video_id=playlist_id)
6031
6032
15870e90
PH
6033class YoutubeTruncatedURLIE(InfoExtractor):
6034 IE_NAME = 'youtube:truncated_url'
6035 IE_DESC = False # Do not list
975d35db 6036 _VALID_URL = r'''(?x)
b95aab84
PH
6037 (?:https?://)?
6038 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
6039 (?:watch\?(?:
c4808c60 6040 feature=[a-z_]+|
b95aab84
PH
6041 annotation_id=annotation_[^&]+|
6042 x-yt-cl=[0-9]+|
c1708b89 6043 hl=[^&]*|
287be8c6 6044 t=[0-9]+
b95aab84
PH
6045 )?
6046 |
6047 attribution_link\?a=[^&]+
6048 )
6049 $
975d35db 6050 '''
15870e90 6051
c4808c60 6052 _TESTS = [{
2d3d2997 6053 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 6054 'only_matching': True,
dc2fc736 6055 }, {
2d3d2997 6056 'url': 'https://www.youtube.com/watch?',
dc2fc736 6057 'only_matching': True,
b95aab84
PH
6058 }, {
6059 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
6060 'only_matching': True,
6061 }, {
6062 'url': 'https://www.youtube.com/watch?feature=foo',
6063 'only_matching': True,
c1708b89
PH
6064 }, {
6065 'url': 'https://www.youtube.com/watch?hl=en-GB',
6066 'only_matching': True,
287be8c6
PH
6067 }, {
6068 'url': 'https://www.youtube.com/watch?t=2372',
6069 'only_matching': True,
c4808c60
PH
6070 }]
6071
15870e90
PH
6072 def _real_extract(self, url):
6073 raise ExtractorError(
78caa52a
PH
6074 'Did you forget to quote the URL? Remember that & is a meta '
6075 'character in most shells, so you want to put the URL in quotes, '
3867038a 6076 'like youtube-dl '
2d3d2997 6077 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 6078 ' or simply youtube-dl BaW_jenozKc .',
15870e90 6079 expected=True)
772fd5cc
PH
6080
6081
471d0367 6082class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
3cd786db 6083 IE_NAME = 'youtube:clip'
471d0367 6084 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
6085 _TESTS = [{
6086 # FIXME: Other metadata should be extracted from the clip, not from the base video
6087 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
6088 'info_dict': {
6089 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
6090 'ext': 'mp4',
6091 'section_start': 29.0,
6092 'section_end': 39.7,
6093 'duration': 10.7,
12a1b225
A
6094 'age_limit': 0,
6095 'availability': 'public',
6096 'categories': ['Gaming'],
6097 'channel': 'Scott The Woz',
6098 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
6099 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
6100 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
6101 'like_count': int,
6102 'playable_in_embed': True,
6103 'tags': 'count:17',
6104 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
6105 'title': 'Mobile Games on Console - Scott The Woz',
6106 'upload_date': '20210920',
6107 'uploader': 'Scott The Woz',
6108 'uploader_id': 'scottthewoz',
6109 'uploader_url': 'http://www.youtube.com/user/scottthewoz',
6110 'view_count': int,
6111 'live_status': 'not_live',
6112 'channel_follower_count': int
471d0367 6113 }
6114 }]
3cd786db 6115
6116 def _real_extract(self, url):
471d0367 6117 clip_id = self._match_id(url)
6118 _, data = self._extract_webpage(url, clip_id)
6119
6120 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
6121 if not video_id:
6122 raise ExtractorError('Unable to find video ID')
6123
6124 clip_data = traverse_obj(data, (
6125 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
6126 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
6127 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
6128 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
6129
6130 return {
6131 '_type': 'url_transparent',
6132 'url': f'https://www.youtube.com/watch?v={video_id}',
6133 'ie_key': YoutubeIE.ie_key(),
6134 'id': clip_id,
6135 'section_start': int(clip_data['startTimeMs']) / 1000,
6136 'section_end': int(clip_data['endTimeMs']) / 1000,
6137 }
3cd786db 6138
6139
772fd5cc
PH
6140class YoutubeTruncatedIDIE(InfoExtractor):
6141 IE_NAME = 'youtube:truncated_id'
6142 IE_DESC = False # Do not list
b95aab84 6143 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
6144
6145 _TESTS = [{
6146 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
6147 'only_matching': True,
6148 }]
6149
6150 def _real_extract(self, url):
6151 video_id = self._match_id(url)
6152 raise ExtractorError(
86e5f3ed 6153 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
772fd5cc 6154 expected=True)