]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
Fix bug in `--alias`
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
6e634cbe 1import base64
d92f5d5a 2import calendar
109dd3b2 3import copy
fe93e2c4 4import datetime
a5c56234 5import hashlib
0ca96d48 6import itertools
c5e8d7af 7import json
720c3099 8import math
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
46383212 12import sys
f8271158 13import threading
8a784c74 14import time
e0df6211 15import traceback
14f25df2 16import urllib.error
ac668111 17import urllib.parse
c5e8d7af 18
b05654f0 19from .common import InfoExtractor, SearchInfoExtractor
25836db6 20from .openload import PhantomJSwrapper
14f25df2 21from ..compat import functools
545cc85d 22from ..jsinterp import JSInterpreter
4bb4a188 23from ..utils import (
f8271158 24 NO_DEFAULT,
25 ExtractorError,
693f0600 26 UserNotLive,
720c3099 27 bug_reports_message,
82d02080 28 classproperty,
c5e8d7af 29 clean_html,
d92f5d5a 30 datetime_from_str,
11f9be09 31 dict_get,
2d30521a 32 float_or_none,
11f9be09 33 format_field,
ff91cf74 34 get_first,
dd27fd17 35 int_or_none,
641ad5d8 36 is_html,
34921b43 37 join_nonempty,
48416bc4 38 js_to_json,
94278f72 39 mimetype2ext,
9c0d7f49 40 network_exceptions,
11f9be09 41 orderedSet,
6310acf5 42 parse_codecs,
49bd8c66 43 parse_count,
7c80519c 44 parse_duration,
7ea65411 45 parse_iso8601,
4dfbf869 46 parse_qs,
dca3ff4a 47 qualities,
3995d37d 48 remove_start,
cf7e015f 49 smuggle_url,
dbdaaa23 50 str_or_none,
c93d53f5 51 str_to_int,
f3aa3c3f 52 strftime_or_none,
7c365c21 53 traverse_obj,
556dbe7f 54 try_get,
c5e8d7af
PH
55 unescapeHTML,
56 unified_strdate,
f0d785d3 57 unified_timestamp,
cf7e015f 58 unsmuggle_url,
8bdd16b4 59 update_url_query,
21c340b8 60 url_or_none,
fe93e2c4 61 urljoin,
7c365c21 62 variadic,
c5e8d7af
PH
63)
64
962ffcf8 65# any clients starting with _ cannot be explicitly requested by the user
000c15a4 66INNERTUBE_CLIENTS = {
67 'web': {
68 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
69 'INNERTUBE_CONTEXT': {
70 'client': {
71 'clientName': 'WEB',
a0c830f4 72 'clientVersion': '2.20220801.00.00',
000c15a4 73 }
74 },
75 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
76 },
77 'web_embedded': {
78 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
79 'INNERTUBE_CONTEXT': {
80 'client': {
81 'clientName': 'WEB_EMBEDDED_PLAYER',
a0c830f4 82 'clientVersion': '1.20220731.00.00',
000c15a4 83 },
84 },
85 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
86 },
87 'web_music': {
88 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
89 'INNERTUBE_HOST': 'music.youtube.com',
90 'INNERTUBE_CONTEXT': {
91 'client': {
92 'clientName': 'WEB_REMIX',
a0c830f4 93 'clientVersion': '1.20220727.01.00',
000c15a4 94 }
95 },
96 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
97 },
e7e94f2a 98 'web_creator': {
18c7683d 99 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
e7e94f2a
D
100 'INNERTUBE_CONTEXT': {
101 'client': {
102 'clientName': 'WEB_CREATOR',
a0c830f4 103 'clientVersion': '1.20220726.00.00',
e7e94f2a
D
104 }
105 },
106 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
107 },
000c15a4 108 'android': {
18c7683d 109 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
000c15a4 110 'INNERTUBE_CONTEXT': {
111 'client': {
112 'clientName': 'ANDROID',
c7dcf0b3 113 'clientVersion': '17.29.34',
114 'androidSdkVersion': 30
000c15a4 115 }
116 },
117 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
b6de707d 118 'REQUIRE_JS_PLAYER': False
000c15a4 119 },
120 'android_embedded': {
18c7683d 121 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
000c15a4 122 'INNERTUBE_CONTEXT': {
123 'client': {
124 'clientName': 'ANDROID_EMBEDDED_PLAYER',
c7dcf0b3 125 'clientVersion': '17.29.34',
126 'androidSdkVersion': 30
000c15a4 127 },
128 },
b6de707d 129 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
130 'REQUIRE_JS_PLAYER': False
000c15a4 131 },
132 'android_music': {
18c7683d 133 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
000c15a4 134 'INNERTUBE_CONTEXT': {
135 'client': {
136 'clientName': 'ANDROID_MUSIC',
a0c830f4 137 'clientVersion': '5.16.51',
c7dcf0b3 138 'androidSdkVersion': 30
000c15a4 139 }
140 },
141 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
b6de707d 142 'REQUIRE_JS_PLAYER': False
000c15a4 143 },
e7e94f2a 144 'android_creator': {
18c7683d 145 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
e7e94f2a
D
146 'INNERTUBE_CONTEXT': {
147 'client': {
148 'clientName': 'ANDROID_CREATOR',
a0c830f4 149 'clientVersion': '22.28.100',
c7dcf0b3 150 'androidSdkVersion': 30
e7e94f2a
D
151 },
152 },
b6de707d 153 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
154 'REQUIRE_JS_PLAYER': False
e7e94f2a 155 },
18c7683d 156 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
157 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
000c15a4 158 'ios': {
18c7683d 159 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
000c15a4 160 'INNERTUBE_CONTEXT': {
161 'client': {
162 'clientName': 'IOS',
a0c830f4 163 'clientVersion': '17.30.1',
18c7683d 164 'deviceModel': 'iPhone14,3',
000c15a4 165 }
166 },
b6de707d 167 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
168 'REQUIRE_JS_PLAYER': False
000c15a4 169 },
170 'ios_embedded': {
000c15a4 171 'INNERTUBE_CONTEXT': {
172 'client': {
173 'clientName': 'IOS_MESSAGES_EXTENSION',
a0c830f4 174 'clientVersion': '17.30.1',
18c7683d 175 'deviceModel': 'iPhone14,3',
000c15a4 176 },
177 },
b6de707d 178 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
179 'REQUIRE_JS_PLAYER': False
000c15a4 180 },
181 'ios_music': {
18c7683d 182 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
000c15a4 183 'INNERTUBE_CONTEXT': {
184 'client': {
185 'clientName': 'IOS_MUSIC',
a0c830f4 186 'clientVersion': '5.18',
000c15a4 187 },
188 },
b6de707d 189 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
190 'REQUIRE_JS_PLAYER': False
000c15a4 191 },
e7e94f2a
D
192 'ios_creator': {
193 'INNERTUBE_CONTEXT': {
194 'client': {
195 'clientName': 'IOS_CREATOR',
a0c830f4 196 'clientVersion': '22.29.101',
e7e94f2a
D
197 },
198 },
b6de707d 199 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
200 'REQUIRE_JS_PLAYER': False
e7e94f2a 201 },
3619f78d 202 # mweb has 'ultralow' formats
203 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 204 'mweb': {
18c7683d 205 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
000c15a4 206 'INNERTUBE_CONTEXT': {
207 'client': {
208 'clientName': 'MWEB',
a0c830f4 209 'clientVersion': '2.20220801.00.00',
000c15a4 210 }
211 },
212 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
e7870111
D
213 },
214 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
215 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
216 'tv_embedded': {
217 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
218 'INNERTUBE_CONTEXT': {
219 'client': {
220 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
221 'clientVersion': '2.0',
222 },
223 },
224 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
225 },
000c15a4 226}
227
228
e7870111
D
229def _split_innertube_client(client_name):
230 variant, *base = client_name.rsplit('.', 1)
231 if base:
232 return variant, base[0], variant
233 base, *variant = client_name.split('_', 1)
234 return client_name, base, variant[0] if variant else None
235
236
000c15a4 237def build_innertube_clients():
2e4cacd0 238 THIRD_PARTY = {
e7870111 239 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
65c2fde2 240 }
e7870111 241 BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
2e4cacd0 242 priority = qualities(BASE_CLIENTS[::-1])
000c15a4 243
244 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 245 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 246 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 247 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 248 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
000c15a4 249
e7870111 250 _, base_client, variant = _split_innertube_client(client)
2e4cacd0 251 ytcfg['priority'] = 10 * priority(base_client)
252
e48b3875 253 if not variant:
e7870111
D
254 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
255 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
256 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
257 embedscreen['priority'] -= 3
258 elif variant == 'embedded':
e48b3875 259 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
000c15a4 260 ytcfg['priority'] -= 2
e48b3875 261 else:
000c15a4 262 ytcfg['priority'] -= 3
263
264
265build_innertube_clients()
266
267
de7f3446 268class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 269 """Provide base functions for Youtube extractors"""
e00eb564 270
3462ffa8 271 _RESERVED_NAMES = (
3cd786db 272 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
182bda88 273 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
3619f78d 274 r'browse|oembed|get_video_info|iframe_api|s/player|'
cd7c66cf 275 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 276
3619f78d 277 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
278
52efa4b3 279 # _NETRC_MACHINE = 'youtube'
3619f78d 280
b2e8bc1b
JMF
281 # If True it will raise an error if no login info is provided
282 _LOGIN_REQUIRED = False
283
d9190e44
RH
284 _INVIDIOUS_SITES = (
285 # invidious-redirect websites
286 r'(?:www\.)?redirect\.invidious\.io',
287 r'(?:(?:www|dev)\.)?invidio\.us',
0a41f331 288 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
d9190e44
RH
289 r'(?:www\.)?invidious\.pussthecat\.org',
290 r'(?:www\.)?invidious\.zee\.li',
291 r'(?:www\.)?invidious\.ethibox\.fr',
292 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
4c968755
U
293 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
294 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
d9190e44
RH
295 # youtube-dl invidious instances list
296 r'(?:(?:www|no)\.)?invidiou\.sh',
297 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
298 r'(?:www\.)?invidious\.kabi\.tk',
299 r'(?:www\.)?invidious\.mastodon\.host',
300 r'(?:www\.)?invidious\.zapashcanon\.fr',
301 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
302 r'(?:www\.)?invidious\.tinfoil-hat\.net',
303 r'(?:www\.)?invidious\.himiko\.cloud',
304 r'(?:www\.)?invidious\.reallyancient\.tech',
305 r'(?:www\.)?invidious\.tube',
306 r'(?:www\.)?invidiou\.site',
307 r'(?:www\.)?invidious\.site',
308 r'(?:www\.)?invidious\.xyz',
309 r'(?:www\.)?invidious\.nixnet\.xyz',
310 r'(?:www\.)?invidious\.048596\.xyz',
311 r'(?:www\.)?invidious\.drycat\.fr',
312 r'(?:www\.)?inv\.skyn3t\.in',
313 r'(?:www\.)?tube\.poal\.co',
314 r'(?:www\.)?tube\.connect\.cafe',
315 r'(?:www\.)?vid\.wxzm\.sx',
316 r'(?:www\.)?vid\.mint\.lgbt',
317 r'(?:www\.)?vid\.puffyan\.us',
318 r'(?:www\.)?yewtu\.be',
319 r'(?:www\.)?yt\.elukerio\.org',
320 r'(?:www\.)?yt\.lelux\.fi',
321 r'(?:www\.)?invidious\.ggc-project\.de',
322 r'(?:www\.)?yt\.maisputain\.ovh',
323 r'(?:www\.)?ytprivate\.com',
324 r'(?:www\.)?invidious\.13ad\.de',
325 r'(?:www\.)?invidious\.toot\.koeln',
326 r'(?:www\.)?invidious\.fdn\.fr',
327 r'(?:www\.)?watch\.nettohikari\.com',
328 r'(?:www\.)?invidious\.namazso\.eu',
329 r'(?:www\.)?invidious\.silkky\.cloud',
330 r'(?:www\.)?invidious\.exonip\.de',
331 r'(?:www\.)?invidious\.riverside\.rocks',
332 r'(?:www\.)?invidious\.blamefran\.net',
333 r'(?:www\.)?invidious\.moomoo\.de',
334 r'(?:www\.)?ytb\.trom\.tf',
335 r'(?:www\.)?yt\.cyberhost\.uk',
336 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
337 r'(?:www\.)?qklhadlycap4cnod\.onion',
338 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
339 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
340 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
341 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
342 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
343 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
344 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
345 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
346 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
347 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
d1c4f6d4
JW
348 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
349 r'(?:www\.)?piped\.kavin\.rocks',
350 r'(?:www\.)?piped\.silkky\.cloud',
351 r'(?:www\.)?piped\.tokhmi\.xyz',
352 r'(?:www\.)?piped\.moomoo\.me',
353 r'(?:www\.)?il\.ax',
354 r'(?:www\.)?piped\.syncpundit\.com',
355 r'(?:www\.)?piped\.mha\.fi',
356 r'(?:www\.)?piped\.mint\.lgbt',
357 r'(?:www\.)?piped\.privacy\.com\.de',
d9190e44
RH
358 )
359
cce889b9 360 def _initialize_consent(self):
361 cookies = self._get_cookies('https://www.youtube.com/')
362 if cookies.get('__Secure-3PSID'):
363 return
364 consent_id = None
365 consent = cookies.get('CONSENT')
366 if consent:
367 if 'YES' in consent.value:
368 return
369 consent_id = self._search_regex(
370 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
371 if not consent_id:
372 consent_id = random.randint(100, 999)
373 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 374
f3aa3c3f 375 def _initialize_pref(self):
376 cookies = self._get_cookies('https://www.youtube.com/')
377 pref_cookie = cookies.get('PREF')
378 pref = {}
379 if pref_cookie:
380 try:
14f25df2 381 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
f3aa3c3f 382 except ValueError:
383 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
396a76f7 384 pref.update({'hl': 'en', 'tz': 'UTC'})
14f25df2 385 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
f3aa3c3f 386
b2e8bc1b 387 def _real_initialize(self):
f3aa3c3f 388 self._initialize_pref()
cce889b9 389 self._initialize_consent()
a25bca9f 390 self._check_login_required()
391
392 def _check_login_required(self):
24146491 393 if self._LOGIN_REQUIRED and not self._cookies_passed:
52efa4b3 394 self.raise_login_required('Login details are needed to download this content', method='cookies')
c5e8d7af 395
b7c47b74 396 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
397 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
a0566bbf 398
000c15a4 399 def _get_default_ytcfg(self, client='web'):
400 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 401
000c15a4 402 def _get_innertube_host(self, client='web'):
403 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 404
000c15a4 405 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 406 # try_get but with fallback to default ytcfg client values when present
407 _func = lambda y: try_get(y, getter, expected_type)
408 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
409
000c15a4 410 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 411 return self._ytcfg_get_safe(
412 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
14f25df2 413 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
109dd3b2 414
000c15a4 415 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 416 return self._ytcfg_get_safe(
417 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
14f25df2 418 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
109dd3b2 419
2ae778b8 420 def _select_api_hostname(self, req_api_hostname, default_client=None):
421 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
422 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
423
000c15a4 424 def _extract_api_key(self, ytcfg=None, default_client='web'):
14f25df2 425 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
109dd3b2 426
000c15a4 427 def _extract_context(self, ytcfg=None, default_client='web'):
f3aa3c3f 428 context = get_first(
429 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
396a76f7 430 # Enforce language and tz for extraction
431 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
432 client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
109dd3b2 433 return context
434
cf87314d 435 _SAPISID = None
436
109dd3b2 437 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 438 time_now = round(time.time())
cf87314d 439 if self._SAPISID is None:
440 yt_cookies = self._get_cookies('https://www.youtube.com')
441 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
442 # See: https://github.com/yt-dlp/yt-dlp/issues/393
443 sapisid_cookie = dict_get(
444 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
445 if sapisid_cookie and sapisid_cookie.value:
446 self._SAPISID = sapisid_cookie.value
447 self.write_debug('Extracted SAPISID cookie')
448 # SAPISID cookie is required if not already present
449 if not yt_cookies.get('SAPISID'):
450 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
451 self._set_cookie(
452 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
453 else:
454 self._SAPISID = False
455 if not self._SAPISID:
456 return None
1974e99f 457 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
458 sapisidhash = hashlib.sha1(
86e5f3ed 459 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
1974e99f 460 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
461
462 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 463 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 464 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 465
109dd3b2 466 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 467 data.update(query)
11f9be09 468 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 469 real_headers.update({'content-type': 'application/json'})
470 if headers:
471 real_headers.update(headers)
2ae778b8 472 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
473 or api_key or self._extract_api_key(default_client=default_client))
545cc85d 474 return self._download_json(
2ae778b8 475 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
a5c56234 476 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 477 data=json.dumps(data).encode('utf8'), headers=real_headers,
2ae778b8 478 query={'key': api_key, 'prettyPrint': 'false'})
f4f751af 479
65141660 480 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
481 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
1890fc63 482
99e9e001 483 @staticmethod
484 def _extract_session_index(*data):
485 """
486 Index of current account in account list.
487 See: https://github.com/yt-dlp/yt-dlp/pull/519
488 """
489 for ytcfg in data:
490 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
491 if session_index is not None:
492 return session_index
493
494 # Deprecated?
495 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca 496 if ytcfg:
14f25df2 497 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
a1c5d2ca
M
498 if token:
499 return token
99e9e001 500 if webpage:
501 return self._search_regex(
502 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
503 'identity token', default=None, fatal=False)
a1c5d2ca
M
504
505 @staticmethod
fe93e2c4 506 def _extract_account_syncid(*args):
8ea3f7b9 507 """
508 Extract syncId required to download private playlists of secondary channels
fe93e2c4 509 @params response and/or ytcfg
8ea3f7b9 510 """
fe93e2c4 511 for data in args:
512 # ytcfg includes channel_syncid if on secondary channel
14f25df2 513 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
fe93e2c4 514 if delegated_sid:
515 return delegated_sid
516 sync_ids = (try_get(
517 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
14f25df2 518 lambda x: x['DATASYNC_ID']), str) or '').split('||')
fe93e2c4 519 if len(sync_ids) >= 2 and sync_ids[1]:
520 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
521 # and just "user_syncid||" for primary channel. We only want the channel_syncid
522 return sync_ids[0]
a1c5d2ca 523
ac56cf38 524 @staticmethod
525 def _extract_visitor_data(*args):
526 """
527 Extracts visitorData from an API response or ytcfg
528 Appears to be used to track session state
529 """
9222c381 530 return get_first(
6c73052c 531 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
9222c381 532 expected_type=str)
ac56cf38 533
2762dbb1 534 @functools.cached_property
99e9e001 535 def is_authenticated(self):
536 return bool(self._generate_sapisidhash_header())
537
11f9be09 538 def extract_ytcfg(self, video_id, webpage):
8c54a305 539 if not webpage:
540 return {}
29f7c58a 541 return self._parse_json(
542 self._search_regex(
543 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 544 default='{}'), video_id, fatal=False) or {}
545
11f9be09 546 def generate_api_headers(
99e9e001 547 self, *, ytcfg=None, account_syncid=None, session_index=None,
548 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
549
2ae778b8 550 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
f4f751af 551 headers = {
14f25df2 552 'X-YouTube-Client-Name': str(
11f9be09 553 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
554 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 555 'Origin': origin,
556 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
557 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
ac56cf38 558 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
99e9e001 559 }
560 if session_index is None:
314ee305 561 session_index = self._extract_session_index(ytcfg)
562 if account_syncid or session_index is not None:
563 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 564
109dd3b2 565 auth = self._generate_sapisidhash_header(origin)
f4f751af 566 if auth is not None:
567 headers['Authorization'] = auth
109dd3b2 568 headers['X-Origin'] = origin
99e9e001 569 return {h: v for h, v in headers.items() if v is not None}
29f7c58a 570
a25bca9f 571 def _download_ytcfg(self, client, video_id):
572 url = {
573 'web': 'https://www.youtube.com',
574 'web_music': 'https://music.youtube.com',
575 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
576 }.get(client)
577 if not url:
578 return {}
579 webpage = self._download_webpage(
580 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
581 return self.extract_ytcfg(video_id, webpage) or {}
582
2d6659b9 583 @staticmethod
584 def _build_api_continuation_query(continuation, ctp=None):
585 query = {
586 'continuation': continuation
587 }
588 # TODO: Inconsistency with clickTrackingParams.
589 # Currently we have a fixed ctp contained within context (from ytcfg)
590 # and a ctp in root query for continuation.
591 if ctp:
592 query['clickTracking'] = {'clickTrackingParams': ctp}
593 return query
594
2d6659b9 595 @classmethod
596 def _extract_next_continuation_data(cls, renderer):
597 next_continuation = try_get(
598 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
599 lambda x: x['continuation']['reloadContinuationData']), dict)
600 if not next_continuation:
601 return
602 continuation = next_continuation.get('continuation')
603 if not continuation:
604 return
605 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 606 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 607
608 @classmethod
609 def _extract_continuation_ep_data(cls, continuation_ep: dict):
610 if isinstance(continuation_ep, dict):
611 continuation = try_get(
14f25df2 612 continuation_ep, lambda x: x['continuationCommand']['token'], str)
2d6659b9 613 if not continuation:
614 return
615 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 616 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 617
618 @classmethod
619 def _extract_continuation(cls, renderer):
620 next_continuation = cls._extract_next_continuation_data(renderer)
621 if next_continuation:
622 return next_continuation
fe93e2c4 623
2d6659b9 624 contents = []
625 for key in ('contents', 'items'):
626 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 627
2d6659b9 628 for content in contents:
629 if not isinstance(content, dict):
630 continue
631 continuation_ep = try_get(
632 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
633 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
634 dict)
635 continuation = cls._extract_continuation_ep_data(continuation_ep)
636 if continuation:
637 return continuation
638
fe93e2c4 639 @classmethod
640 def _extract_alerts(cls, data):
109dd3b2 641 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
642 if not isinstance(alert_dict, dict):
643 continue
644 for alert in alert_dict.values():
645 alert_type = alert.get('type')
646 if not alert_type:
647 continue
052e1350 648 message = cls._get_text(alert, 'text')
109dd3b2 649 if message:
650 yield alert_type, message
651
c0ac49bc 652 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
109dd3b2 653 errors = []
654 warnings = []
655 for alert_type, alert_message in alerts:
641ad5d8 656 if alert_type.lower() == 'error' and fatal:
109dd3b2 657 errors.append([alert_type, alert_message])
658 else:
659 warnings.append([alert_type, alert_message])
660
661 for alert_type, alert_message in (warnings + errors[:-1]):
86e5f3ed 662 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
109dd3b2 663 if errors:
664 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
665
666 def _extract_and_report_alerts(self, data, *args, **kwargs):
667 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
668
47193e02 669 def _extract_badges(self, renderer: dict):
670 badges = set()
671 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
14f25df2 672 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], str)
47193e02 673 if label:
674 badges.add(label.lower())
675 return badges
676
677 @staticmethod
052e1350 678 def _get_text(data, *path_list, max_runs=None):
679 for path in path_list or [None]:
680 if path is None:
681 obj = [data]
682 else:
683 obj = traverse_obj(data, path, default=[])
684 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
685 obj = [obj]
686 for item in obj:
14f25df2 687 text = try_get(item, lambda x: x['simpleText'], str)
052e1350 688 if text:
689 return text
690 runs = try_get(item, lambda x: x['runs'], list) or []
691 if not runs and isinstance(item, list):
692 runs = item
693
694 runs = runs[:min(len(runs), max_runs or len(runs))]
695 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
696 if text:
697 return text
47193e02 698
f0d785d3 699 def _get_count(self, data, *path_list):
700 count_text = self._get_text(data, *path_list) or ''
701 count = parse_count(count_text)
702 if count is None:
703 count = str_to_int(
704 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
705 return count
706
a709d873 707 @staticmethod
708 def _extract_thumbnails(data, *path_list):
709 """
710 Extract thumbnails from thumbnails dict
711 @param path_list: path list to level that contains 'thumbnails' key
712 """
713 thumbnails = []
714 for path in path_list or [()]:
715 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
716 thumbnail_url = url_or_none(thumbnail.get('url'))
717 if not thumbnail_url:
718 continue
719 # Sometimes youtube gives a wrong thumbnail URL. See:
720 # https://github.com/yt-dlp/yt-dlp/issues/233
721 # https://github.com/ytdl-org/youtube-dl/issues/28023
722 if 'maxresdefault' in thumbnail_url:
723 thumbnail_url = thumbnail_url.split('?')[0]
724 thumbnails.append({
725 'url': thumbnail_url,
726 'height': int_or_none(thumbnail.get('height')),
727 'width': int_or_none(thumbnail.get('width')),
728 })
729 return thumbnails
730
f3aa3c3f 731 @staticmethod
732 def extract_relative_time(relative_time_text):
733 """
734 Extracts a relative time from string and converts to dt object
f0d785d3 735 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
f3aa3c3f 736 """
f0d785d3 737 mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
f3aa3c3f 738 if mobj:
f0d785d3 739 start = mobj.group('start')
740 if start:
741 return datetime_from_str(start)
f3aa3c3f 742 try:
f0d785d3 743 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
f3aa3c3f 744 except ValueError:
745 return None
746
747 def _extract_time_text(self, renderer, *path_list):
a25bca9f 748 """@returns (timestamp, time_text)"""
f3aa3c3f 749 text = self._get_text(renderer, *path_list) or ''
750 dt = self.extract_relative_time(text)
751 timestamp = None
752 if isinstance(dt, datetime.datetime):
753 timestamp = calendar.timegm(dt.timetuple())
f0d785d3 754
755 if timestamp is None:
756 timestamp = (
757 unified_timestamp(text) or unified_timestamp(
758 self._search_regex(
17322130 759 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
396a76f7 760 text.lower(), 'time text', default=None)))
f0d785d3 761
f3aa3c3f 762 if text and timestamp is None:
17322130 763 self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)
f3aa3c3f 764 return timestamp, text
765
109dd3b2 766 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
767 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 768 default_client='web'):
be5c1ae8 769 for retry in self.RetryManager():
109dd3b2 770 try:
771 response = self._call_api(
772 ep=ep, fatal=True, headers=headers,
be5c1ae8 773 video_id=item_id, query=query, note=note,
109dd3b2 774 context=self._extract_context(ytcfg, default_client),
775 api_key=self._extract_api_key(ytcfg, default_client),
be5c1ae8 776 api_hostname=api_hostname, default_client=default_client)
109dd3b2 777 except ExtractorError as e:
be5c1ae8 778 if not isinstance(e.cause, network_exceptions):
779 return self._error_or_warning(e, fatal=fatal)
780 elif not isinstance(e.cause, urllib.error.HTTPError):
781 retry.error = e
782 continue
109dd3b2 783
be5c1ae8 784 first_bytes = e.cause.read(512)
785 if not is_html(first_bytes):
786 yt_error = try_get(
787 self._parse_json(
788 self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
789 lambda x: x['error']['message'], str)
790 if yt_error:
791 self._report_alerts([('ERROR', yt_error)], fatal=False)
792 # Downloading page may result in intermittent 5xx HTTP error
793 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
794 # We also want to catch all other network exceptions since errors in later pages can be troublesome
795 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
796 if e.cause.code not in (403, 429):
797 retry.error = e
798 continue
799 return self._error_or_warning(e, fatal=fatal)
800
801 try:
802 self._extract_and_report_alerts(response, only_once=True)
803 except ExtractorError as e:
804 # YouTube servers may return errors we want to retry on in a 200 OK response
805 # See: https://github.com/yt-dlp/yt-dlp/issues/839
806 if 'unknown error' in e.msg.lower():
807 retry.error = e
808 continue
809 return self._error_or_warning(e, fatal=fatal)
810 # Youtube sometimes sends incomplete data
811 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
812 if not traverse_obj(response, *variadic(check_get_keys)):
3ce29336 813 retry.error = ExtractorError('Incomplete data received', expected=True)
be5c1ae8 814 continue
815
816 return response
109dd3b2 817
9297939e 818 @staticmethod
819 def is_music_url(url):
820 return re.match(r'https?://music\.youtube\.com/', url) is not None
821
30a074c2 822 def _extract_video(self, renderer):
823 video_id = renderer.get('videoId')
052e1350 824 title = self._get_text(renderer, 'title')
825 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 826 duration = parse_duration(self._get_text(
827 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
1c1b2f96 828 if duration is None:
829 duration = parse_duration(self._search_regex(
830 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
831 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
832 video_id, default=None, group='duration'))
833
f0d785d3 834 view_count = self._get_count(renderer, 'viewCountText')
fe93e2c4 835
052e1350 836 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
f3aa3c3f 837 channel_id = traverse_obj(
a44ca5a4 838 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
839 expected_type=str, get_all=False)
f3aa3c3f 840 timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
841 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
842 overlay_style = traverse_obj(
a44ca5a4 843 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
844 get_all=False, expected_type=str)
f3aa3c3f 845 badges = self._extract_badges(renderer)
a709d873 846 thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
fd2ad7cb 847 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
a44ca5a4 848 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
849 expected_type=str)) or ''
fd2ad7cb 850 url = f'https://www.youtube.com/watch?v={video_id}'
a44ca5a4 851 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
fd2ad7cb 852 url = f'https://www.youtube.com/shorts/{video_id}'
a709d873 853
30a074c2 854 return {
39ed931e 855 '_type': 'url',
30a074c2 856 'ie_key': YoutubeIE.ie_key(),
857 'id': video_id,
fd2ad7cb 858 'url': url,
30a074c2 859 'title': title,
860 'description': description,
861 'duration': duration,
862 'view_count': view_count,
863 'uploader': uploader,
f3aa3c3f 864 'channel_id': channel_id,
a709d873 865 'thumbnails': thumbnails,
a44ca5a4 866 'upload_date': (strftime_or_none(timestamp, '%Y%m%d')
867 if self._configuration_arg('approximate_date', ie_key='youtubetab')
868 else None),
f3aa3c3f 869 'live_status': ('is_upcoming' if scheduled_timestamp is not None
870 else 'was_live' if 'streamed' in time_text.lower()
a831c2ea 871 else 'is_live' if overlay_style == 'LIVE' or 'live now' in badges
f3aa3c3f 872 else None),
873 'release_timestamp': scheduled_timestamp,
874 'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
30a074c2 875 }
876
0c148415 877
360e1ca5 878class YoutubeIE(YoutubeBaseInfoExtractor):
96565c7e 879 IE_DESC = 'YouTube'
cb7dfeea 880 _VALID_URL = r"""(?x)^
c5e8d7af 881 (
edb53e2d 882 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 883 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
884 (?:www\.)?deturl\.com/www\.youtube\.com|
885 (?:www\.)?pwnyoutube\.com|
886 (?:www\.)?hooktube\.com|
887 (?:www\.)?yourepeat\.com|
888 tube\.majestyc\.net|
889 %(invidious)s|
890 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
891 (?:.*?\#/)? # handle anchor (#/) redirect urls
892 (?: # the various things that can precede the ID:
b6ce9bb0 893 (?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
c5e8d7af 894 |(?: # or the v= param in all its forms
f7000f3a 895 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 896 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 897 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
898 v=
899 )
f4b05232 900 ))
cbaed4bb
S
901 |(?:
902 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
903 vid\.plus| # or vid.plus/xxxx
904 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 905 %(invidious)s
cbaed4bb 906 )/
edb53e2d 907 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 908 )
c5e8d7af 909 )? # all until now is optional -> you can pass the naked ID
201c1459 910 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 911 (?(1).+)? # if we found the ID, everything can follow
9297939e 912 (?:\#|$)""" % {
d9190e44 913 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
bc2ca1bb 914 }
bfd973ec 915 _EMBED_REGEX = [r'''(?x)
916 (?:
917 <iframe[^>]+?src=|
918 data-video-url=|
919 <embed[^>]+?src=|
920 embedSWF\(?:\s*|
921 <object[^>]+data=|
922 new\s+SWFObject\(
923 )
924 (["\'])
925 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
926 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
927 \1''']
e40c758c 928 _PLAYER_INFO_RE = (
cc2db878 929 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
930 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 931 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 932 )
2c62dc26 933 _formats = {
c2d3cb4c 934 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
935 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
936 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
937 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
938 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
939 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
940 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
941 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 942 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 943 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
944 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
945 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
946 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
947 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
948 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 949 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 950 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
951 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 952
953
954 # 3D videos
c2d3cb4c 955 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
956 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
957 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
958 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 959 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
960 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
961 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 962
96fb5605 963 # Apple HTTP Live Streaming
11f12195 964 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 965 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
966 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
967 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
968 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
969 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 970 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
971 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
972
973 # DASH mp4 video
d23028a8
S
974 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
975 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
976 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
977 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
978 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 979 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
980 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
981 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
982 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
983 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
984 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
985 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 986
f6f1fc92 987 # Dash mp4 audio
d23028a8
S
988 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
989 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
990 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
991 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
992 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
993 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
994 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
995
996 # Dash webm
d23028a8
S
997 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
998 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
999 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1000 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1001 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1002 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1003 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1004 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1005 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1006 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1007 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1008 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1009 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1010 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1011 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1012 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1013 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1014 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1015 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1016 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1017 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1018 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1019
1020 # Dash webm audio
d23028a8
S
1021 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1022 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1023
0857baad 1024 # Dash webm audio with opus inside
d23028a8
S
1025 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1026 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1027 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1028
ce6b9a2d
PH
1029 # RTMP (unnamed)
1030 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1031
1032 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
1033 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1034 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1035 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1036 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1037 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1038 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1039 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1040 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 1041 }
29f7c58a 1042 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1043
fd5c4aab
S
1044 _GEO_BYPASS = False
1045
78caa52a 1046 IE_NAME = 'youtube'
2eb88d95
PH
1047 _TESTS = [
1048 {
2d3d2997 1049 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1050 'info_dict': {
1051 'id': 'BaW_jenozKc',
1052 'ext': 'mp4',
3867038a 1053 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1054 'uploader': 'Philipp Hagemeister',
1055 'uploader_id': 'phihag',
ec85ded8 1056 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
ff9f925b 1057 'channel': 'Philipp Hagemeister',
dd4c4492
S
1058 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1059 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1060 'upload_date': '20121002',
ff9f925b 1061 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
4bc3a23e 1062 'categories': ['Science & Technology'],
3867038a 1063 'tags': ['youtube-dl'],
556dbe7f 1064 'duration': 10,
dbdaaa23 1065 'view_count': int,
3e7c1224 1066 'like_count': int,
ff9f925b 1067 'availability': 'public',
1068 'playable_in_embed': True,
1069 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1070 'live_status': 'not_live',
1071 'age_limit': 0,
7c80519c 1072 'start_time': 1,
297a564b 1073 'end_time': 9,
12a1b225 1074 'comment_count': int,
6c73052c 1075 'channel_follower_count': int
2eb88d95 1076 }
0e853ca4 1077 },
fccd3771 1078 {
4bc3a23e
PH
1079 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1080 'note': 'Embed-only video (#1746)',
1081 'info_dict': {
1082 'id': 'yZIXLfi8CZQ',
1083 'ext': 'mp4',
1084 'upload_date': '20120608',
1085 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1086 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1087 'uploader': 'SET India',
94bfcd23 1088 'uploader_id': 'setindia',
ec85ded8 1089 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1090 'age_limit': 18,
545cc85d 1091 },
1092 'skip': 'Private video',
fccd3771 1093 },
11b56058 1094 {
8bdd16b4 1095 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1096 'note': 'Use the first video ID in the URL',
1097 'info_dict': {
1098 'id': 'BaW_jenozKc',
1099 'ext': 'mp4',
3867038a 1100 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1101 'uploader': 'Philipp Hagemeister',
1102 'uploader_id': 'phihag',
ec85ded8 1103 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
976ae3ea 1104 'channel': 'Philipp Hagemeister',
1105 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1106 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
11b56058 1107 'upload_date': '20121002',
976ae3ea 1108 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
11b56058 1109 'categories': ['Science & Technology'],
3867038a 1110 'tags': ['youtube-dl'],
556dbe7f 1111 'duration': 10,
dbdaaa23 1112 'view_count': int,
11b56058 1113 'like_count': int,
976ae3ea 1114 'availability': 'public',
1115 'playable_in_embed': True,
1116 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1117 'live_status': 'not_live',
1118 'age_limit': 0,
12a1b225 1119 'comment_count': int,
6c73052c 1120 'channel_follower_count': int
34a7de29
S
1121 },
1122 'params': {
1123 'skip_download': True,
1124 },
11b56058 1125 },
dd27fd17 1126 {
2d3d2997 1127 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1128 'note': '256k DASH audio (format 141) via DASH manifest',
1129 'info_dict': {
1130 'id': 'a9LDPn-MO4I',
1131 'ext': 'm4a',
1132 'upload_date': '20121002',
1133 'uploader_id': '8KVIDEO',
ec85ded8 1134 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1135 'description': '',
1136 'uploader': '8KVIDEO',
1137 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1138 },
4bc3a23e
PH
1139 'params': {
1140 'youtube_include_dash_manifest': True,
1141 'format': '141',
4919603f 1142 },
de3c7fe0 1143 'skip': 'format 141 not served anymore',
dd27fd17 1144 },
8bdd16b4 1145 # DASH manifest with encrypted signature
1146 {
1147 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1148 'info_dict': {
1149 'id': 'IB3lcPjvWLA',
1150 'ext': 'm4a',
1151 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1152 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1153 'duration': 244,
1154 'uploader': 'AfrojackVEVO',
1155 'uploader_id': 'AfrojackVEVO',
1156 'upload_date': '20131011',
cc2db878 1157 'abr': 129.495,
976ae3ea 1158 'like_count': int,
1159 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1160 'playable_in_embed': True,
1161 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1162 'view_count': int,
1163 'track': 'The Spark',
1164 'live_status': 'not_live',
1165 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1166 'channel': 'Afrojack',
1167 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
1168 'tags': 'count:19',
1169 'availability': 'public',
1170 'categories': ['Music'],
1171 'age_limit': 0,
1172 'alt_title': 'The Spark',
6c73052c 1173 'channel_follower_count': int
8bdd16b4 1174 },
1175 'params': {
1176 'youtube_include_dash_manifest': True,
1177 'format': '141/bestaudio[ext=m4a]',
1178 },
1179 },
65c2fde2 1180 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1181 {
65c2fde2 1182 'note': 'Embed allowed age-gate video',
2d3d2997 1183 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1184 'info_dict': {
1185 'id': 'HtVdAasjOgU',
1186 'ext': 'mp4',
1187 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1188 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1189 'duration': 142,
c522adb1
JMF
1190 'uploader': 'The Witcher',
1191 'uploader_id': 'WitcherGame',
ec85ded8 1192 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1193 'upload_date': '20140605',
34952f09 1194 'age_limit': 18,
976ae3ea 1195 'categories': ['Gaming'],
1196 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1197 'availability': 'needs_auth',
1198 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1199 'like_count': int,
1200 'channel': 'The Witcher',
1201 'live_status': 'not_live',
1202 'tags': 'count:17',
1203 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1204 'playable_in_embed': True,
1205 'view_count': int,
6c73052c 1206 'channel_follower_count': int
c522adb1
JMF
1207 },
1208 },
65c2fde2 1209 {
1210 'note': 'Age-gate video with embed allowed in public site',
1211 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1212 'info_dict': {
1213 'id': 'HsUATh_Nc2U',
1214 'ext': 'mp4',
1215 'title': 'Godzilla 2 (Official Video)',
1216 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1217 'upload_date': '20200408',
1218 'uploader_id': 'FlyingKitty900',
1219 'uploader': 'FlyingKitty',
1220 'age_limit': 18,
976ae3ea 1221 'availability': 'needs_auth',
1222 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1223 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
1224 'channel': 'FlyingKitty',
1225 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1226 'view_count': int,
1227 'categories': ['Entertainment'],
1228 'live_status': 'not_live',
1229 'tags': ['Flyingkitty', 'godzilla 2'],
1230 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1231 'like_count': int,
1232 'duration': 177,
1233 'playable_in_embed': True,
6c73052c 1234 'channel_follower_count': int
65c2fde2 1235 },
1236 },
1237 {
1238 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1239 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1240 'info_dict': {
1241 'id': 'Tq92D6wQ1mg',
1242 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1243 'ext': 'mp4',
17322130 1244 'upload_date': '20191228',
65c2fde2 1245 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1246 'uploader': 'Projekt Melody',
1247 'description': 'md5:17eccca93a786d51bc67646756894066',
1248 'age_limit': 18,
976ae3ea 1249 'like_count': int,
1250 'availability': 'needs_auth',
1251 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1252 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1253 'view_count': int,
1254 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1255 'channel': 'Projekt Melody',
1256 'live_status': 'not_live',
1257 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1258 'playable_in_embed': True,
1259 'categories': ['Entertainment'],
1260 'duration': 106,
1261 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
12a1b225 1262 'comment_count': int,
6c73052c 1263 'channel_follower_count': int
65c2fde2 1264 },
1265 },
1266 {
1267 'note': 'Non-Agegated non-embeddable video',
1268 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1269 'info_dict': {
1270 'id': 'MeJVWBSsPAY',
1271 'ext': 'mp4',
1272 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1273 'uploader': 'Herr Lurik',
1274 'uploader_id': 'st3in234',
1275 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1276 'upload_date': '20130730',
976ae3ea 1277 'track': 'Such mich find mich',
1278 'age_limit': 0,
1279 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1280 'like_count': int,
1281 'playable_in_embed': False,
1282 'creator': 'OOMPH!',
1283 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1284 'view_count': int,
1285 'alt_title': 'Such mich find mich',
1286 'duration': 210,
1287 'channel': 'Herr Lurik',
1288 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1289 'categories': ['Music'],
1290 'availability': 'public',
1291 'uploader_url': 'http://www.youtube.com/user/st3in234',
1292 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1293 'live_status': 'not_live',
1294 'artist': 'OOMPH!',
6c73052c 1295 'channel_follower_count': int
65c2fde2 1296 },
1297 },
1298 {
1299 'note': 'Non-bypassable age-gated video',
1300 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1301 'only_matching': True,
1302 },
8bdd16b4 1303 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1304 # YouTube Red ad is not captured for creator
1305 {
1306 'url': '__2ABJjxzNo',
1307 'info_dict': {
1308 'id': '__2ABJjxzNo',
1309 'ext': 'mp4',
1310 'duration': 266,
1311 'upload_date': '20100430',
1312 'uploader_id': 'deadmau5',
1313 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1314 'creator': 'deadmau5',
1315 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1316 'uploader': 'deadmau5',
1317 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1318 'alt_title': 'Some Chords',
976ae3ea 1319 'availability': 'public',
1320 'tags': 'count:14',
1321 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1322 'view_count': int,
1323 'live_status': 'not_live',
1324 'channel': 'deadmau5',
1325 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1326 'like_count': int,
1327 'track': 'Some Chords',
1328 'artist': 'deadmau5',
1329 'playable_in_embed': True,
1330 'age_limit': 0,
1331 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1332 'categories': ['Music'],
1333 'album': 'Some Chords',
6c73052c 1334 'channel_follower_count': int
8bdd16b4 1335 },
1336 'expected_warnings': [
1337 'DASH manifest missing',
1338 ]
1339 },
067aa17e 1340 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1341 {
1342 'url': 'lqQg6PlCWgI',
1343 'info_dict': {
1344 'id': 'lqQg6PlCWgI',
1345 'ext': 'mp4',
556dbe7f 1346 'duration': 6085,
90227264 1347 'upload_date': '20150827',
cbe2bd91 1348 'uploader_id': 'olympic',
ec85ded8 1349 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
12a1b225 1350 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
11f9be09 1351 'uploader': 'Olympics',
cbe2bd91 1352 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
976ae3ea 1353 'like_count': int,
1354 'release_timestamp': 1343767800,
1355 'playable_in_embed': True,
1356 'categories': ['Sports'],
1357 'release_date': '20120731',
1358 'channel': 'Olympics',
1359 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1360 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1361 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1362 'age_limit': 0,
1363 'availability': 'public',
1364 'live_status': 'was_live',
1365 'view_count': int,
1366 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
6c73052c 1367 'channel_follower_count': int
cbe2bd91
PH
1368 },
1369 'params': {
1370 'skip_download': 'requires avconv',
e52a40ab 1371 }
cbe2bd91 1372 },
6271f1ca
PH
1373 # Non-square pixels
1374 {
1375 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1376 'info_dict': {
1377 'id': '_b-2C3KPAM0',
1378 'ext': 'mp4',
1379 'stretched_ratio': 16 / 9.,
556dbe7f 1380 'duration': 85,
6271f1ca
PH
1381 'upload_date': '20110310',
1382 'uploader_id': 'AllenMeow',
ec85ded8 1383 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1384 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1385 'uploader': '孫ᄋᄅ',
6271f1ca 1386 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
976ae3ea 1387 'playable_in_embed': True,
1388 'channel': '孫ᄋᄅ',
1389 'age_limit': 0,
1390 'tags': 'count:11',
1391 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1392 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1393 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1394 'view_count': int,
1395 'categories': ['People & Blogs'],
1396 'like_count': int,
1397 'live_status': 'not_live',
1398 'availability': 'unlisted',
12a1b225 1399 'comment_count': int,
6c73052c 1400 'channel_follower_count': int
6271f1ca 1401 },
06b491eb
S
1402 },
1403 # url_encoded_fmt_stream_map is empty string
1404 {
1405 'url': 'qEJwOuvDf7I',
1406 'info_dict': {
1407 'id': 'qEJwOuvDf7I',
f57b7835 1408 'ext': 'webm',
06b491eb
S
1409 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1410 'description': '',
1411 'upload_date': '20150404',
1412 'uploader_id': 'spbelect',
1413 'uploader': 'Наблюдатели Петербурга',
1414 },
1415 'params': {
1416 'skip_download': 'requires avconv',
e323cf3f
S
1417 },
1418 'skip': 'This live event has ended.',
06b491eb 1419 },
067aa17e 1420 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1421 {
1422 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1423 'info_dict': {
1424 'id': 'FIl7x6_3R5Y',
eb6793ba 1425 'ext': 'webm',
da77d856
S
1426 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1427 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1428 'duration': 220,
da77d856
S
1429 'upload_date': '20150625',
1430 'uploader_id': 'dorappi2000',
ec85ded8 1431 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1432 'uploader': 'dorappi2000',
eb6793ba 1433 'formats': 'mincount:31',
da77d856 1434 },
eb6793ba 1435 'skip': 'not actual anymore',
2ee8f5d8 1436 },
8a1a26ce
YCH
1437 # DASH manifest with segment_list
1438 {
1439 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1440 'md5': '8ce563a1d667b599d21064e982ab9e31',
1441 'info_dict': {
1442 'id': 'CsmdDsKjzN8',
1443 'ext': 'mp4',
17ee98e1 1444 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1445 'uploader': 'Airtek',
1446 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1447 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1448 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1449 },
1450 'params': {
1451 'youtube_include_dash_manifest': True,
1452 'format': '135', # bestvideo
be49068d
S
1453 },
1454 'skip': 'This live event has ended.',
2ee8f5d8 1455 },
cf7e015f
S
1456 {
1457 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1458 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1459 'info_dict': {
545cc85d 1460 'id': 'jvGDaLqkpTg',
1461 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1462 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1463 },
1464 'playlist': [{
1465 'info_dict': {
545cc85d 1466 'id': 'jvGDaLqkpTg',
cf7e015f 1467 'ext': 'mp4',
545cc85d 1468 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1469 'description': 'md5:e03b909557865076822aa169218d6a5d',
1470 'duration': 10643,
1471 'upload_date': '20161111',
1472 'uploader': 'Team PGP',
1473 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1474 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1475 },
1476 }, {
1477 'info_dict': {
545cc85d 1478 'id': '3AKt1R1aDnw',
cf7e015f 1479 'ext': 'mp4',
545cc85d 1480 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1481 'description': 'md5:e03b909557865076822aa169218d6a5d',
1482 'duration': 10991,
1483 'upload_date': '20161111',
1484 'uploader': 'Team PGP',
1485 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1486 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1487 },
1488 }, {
1489 'info_dict': {
545cc85d 1490 'id': 'RtAMM00gpVc',
cf7e015f 1491 'ext': 'mp4',
545cc85d 1492 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1493 'description': 'md5:e03b909557865076822aa169218d6a5d',
1494 'duration': 10995,
1495 'upload_date': '20161111',
1496 'uploader': 'Team PGP',
1497 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1498 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1499 },
1500 }, {
1501 'info_dict': {
545cc85d 1502 'id': '6N2fdlP3C5U',
cf7e015f 1503 'ext': 'mp4',
545cc85d 1504 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1505 'description': 'md5:e03b909557865076822aa169218d6a5d',
1506 'duration': 10990,
1507 'upload_date': '20161111',
1508 'uploader': 'Team PGP',
1509 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1510 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1511 },
1512 }],
1513 'params': {
1514 'skip_download': True,
1515 },
65c2fde2 1516 'skip': 'Not multifeed anymore',
cbaed4bb 1517 },
f9f49d87 1518 {
067aa17e 1519 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1520 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1521 'info_dict': {
1522 'id': 'gVfLd0zydlo',
1523 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1524 },
1525 'playlist_count': 2,
be49068d 1526 'skip': 'Not multifeed anymore',
f9f49d87 1527 },
cbaed4bb 1528 {
2d3d2997 1529 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1530 'only_matching': True,
0e49d9a6 1531 },
6d4fc66b 1532 {
2d3d2997 1533 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1534 'only_matching': True,
1535 },
0e49d9a6 1536 {
067aa17e 1537 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1538 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1539 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1540 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1541 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1542 'info_dict': {
1543 'id': 'lsguqyKfVQg',
1544 'ext': 'mp4',
1545 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1546 'alt_title': 'Dark Walk',
0e49d9a6 1547 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1548 'duration': 133,
0e49d9a6
LL
1549 'upload_date': '20151119',
1550 'uploader_id': 'IronSoulElf',
ec85ded8 1551 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1552 'uploader': 'IronSoulElf',
11f9be09 1553 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1554 'track': 'Dark Walk',
1555 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1556 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
976ae3ea 1557 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1558 'categories': ['Film & Animation'],
1559 'view_count': int,
1560 'live_status': 'not_live',
1561 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1562 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1563 'tags': 'count:13',
1564 'availability': 'public',
1565 'channel': 'IronSoulElf',
1566 'playable_in_embed': True,
1567 'like_count': int,
1568 'age_limit': 0,
6c73052c 1569 'channel_follower_count': int
0e49d9a6
LL
1570 },
1571 'params': {
1572 'skip_download': True,
1573 },
1574 },
61f92af1 1575 {
067aa17e 1576 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1577 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1578 'only_matching': True,
1579 },
313dfc45
LL
1580 {
1581 # Video with yt:stretch=17:0
1582 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1583 'info_dict': {
1584 'id': 'Q39EVAstoRM',
1585 'ext': 'mp4',
1586 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1587 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1588 'upload_date': '20151107',
1589 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1590 'uploader': 'CH GAMER DROID',
1591 },
1592 'params': {
1593 'skip_download': True,
1594 },
be49068d 1595 'skip': 'This video does not exist.',
313dfc45 1596 },
201c1459 1597 {
1598 # Video with incomplete 'yt:stretch=16:'
1599 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1600 'only_matching': True,
1601 },
7caf9830
S
1602 {
1603 # Video licensed under Creative Commons
1604 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1605 'info_dict': {
1606 'id': 'M4gD1WSo5mA',
1607 'ext': 'mp4',
1608 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1609 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1610 'duration': 721,
17322130 1611 'upload_date': '20150128',
7caf9830 1612 'uploader_id': 'BerkmanCenter',
ec85ded8 1613 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1614 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830 1615 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1616 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1617 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1618 'like_count': int,
1619 'age_limit': 0,
1620 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1621 'channel': 'The Berkman Klein Center for Internet & Society',
1622 'availability': 'public',
1623 'view_count': int,
1624 'categories': ['Education'],
1625 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1626 'live_status': 'not_live',
1627 'playable_in_embed': True,
12a1b225 1628 'comment_count': int,
6c73052c 1629 'channel_follower_count': int
7caf9830
S
1630 },
1631 'params': {
1632 'skip_download': True,
1633 },
1634 },
fd050249
S
1635 {
1636 # Channel-like uploader_url
1637 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1638 'info_dict': {
1639 'id': 'eQcmzGIKrzg',
1640 'ext': 'mp4',
1641 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1642 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1643 'duration': 4060,
17322130 1644 'upload_date': '20151120',
eb6793ba 1645 'uploader': 'Bernie Sanders',
fd050249 1646 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1647 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249 1648 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1649 'playable_in_embed': True,
1650 'tags': 'count:12',
1651 'like_count': int,
1652 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1653 'age_limit': 0,
1654 'availability': 'public',
1655 'categories': ['News & Politics'],
1656 'channel': 'Bernie Sanders',
1657 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1658 'view_count': int,
1659 'live_status': 'not_live',
1660 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
12a1b225 1661 'comment_count': int,
6c73052c 1662 'channel_follower_count': int
fd050249
S
1663 },
1664 'params': {
1665 'skip_download': True,
1666 },
1667 },
040ac686
S
1668 {
1669 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1670 'only_matching': True,
7f29cf54
S
1671 },
1672 {
067aa17e 1673 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1674 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1675 'only_matching': True,
6496ccb4
S
1676 },
1677 {
1678 # Rental video preview
1679 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1680 'info_dict': {
1681 'id': 'uGpuVWrhIzE',
1682 'ext': 'mp4',
1683 'title': 'Piku - Trailer',
1684 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1685 'upload_date': '20150811',
1686 'uploader': 'FlixMatrix',
1687 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1688 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1689 'license': 'Standard YouTube License',
1690 },
1691 'params': {
1692 'skip_download': True,
1693 },
eb6793ba 1694 'skip': 'This video is not available.',
022a5d66 1695 },
12afdc2a
S
1696 {
1697 # YouTube Red video with episode data
1698 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1699 'info_dict': {
1700 'id': 'iqKdEhx-dD4',
1701 'ext': 'mp4',
1702 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1703 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1704 'duration': 2085,
12afdc2a
S
1705 'upload_date': '20170118',
1706 'uploader': 'Vsauce',
1707 'uploader_id': 'Vsauce',
1708 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1709 'series': 'Mind Field',
1710 'season_number': 1,
1711 'episode_number': 1,
976ae3ea 1712 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1713 'tags': 'count:12',
1714 'view_count': int,
1715 'availability': 'public',
1716 'age_limit': 0,
1717 'channel': 'Vsauce',
1718 'episode': 'Episode 1',
1719 'categories': ['Entertainment'],
1720 'season': 'Season 1',
1721 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1722 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1723 'like_count': int,
1724 'playable_in_embed': True,
1725 'live_status': 'not_live',
6c73052c 1726 'channel_follower_count': int
12afdc2a
S
1727 },
1728 'params': {
1729 'skip_download': True,
1730 },
1731 'expected_warnings': [
1732 'Skipping DASH manifest',
1733 ],
1734 },
c7121fa7
S
1735 {
1736 # The following content has been identified by the YouTube community
1737 # as inappropriate or offensive to some audiences.
1738 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1739 'info_dict': {
1740 'id': '6SJNVb0GnPI',
1741 'ext': 'mp4',
1742 'title': 'Race Differences in Intelligence',
1743 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1744 'duration': 965,
1745 'upload_date': '20140124',
1746 'uploader': 'New Century Foundation',
1747 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1748 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1749 },
1750 'params': {
1751 'skip_download': True,
1752 },
545cc85d 1753 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1754 },
022a5d66
S
1755 {
1756 # itag 212
1757 'url': '1t24XAntNCY',
1758 'only_matching': True,
fd5c4aab
S
1759 },
1760 {
1761 # geo restricted to JP
1762 'url': 'sJL6WA-aGkQ',
1763 'only_matching': True,
1764 },
cd5a74a2
S
1765 {
1766 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1767 'only_matching': True,
1768 },
bc2ca1bb 1769 {
1770 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1771 'only_matching': True,
1772 },
1773 {
1774 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1775 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1776 'only_matching': True,
1777 },
825cd268
RA
1778 {
1779 # DRM protected
1780 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1781 'only_matching': True,
4fe54c12
S
1782 },
1783 {
1784 # Video with unsupported adaptive stream type formats
1785 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1786 'info_dict': {
1787 'id': 'Z4Vy8R84T1U',
1788 'ext': 'mp4',
1789 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1790 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1791 'duration': 433,
1792 'upload_date': '20130923',
1793 'uploader': 'Amelia Putri Harwita',
1794 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1795 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1796 'formats': 'maxcount:10',
1797 },
1798 'params': {
1799 'skip_download': True,
1800 'youtube_include_dash_manifest': False,
1801 },
5429d6a9 1802 'skip': 'not actual anymore',
5caabd3c 1803 },
1804 {
822b9d9c 1805 # Youtube Music Auto-generated description
5caabd3c 1806 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1807 'info_dict': {
1808 'id': 'MgNrAu2pzNs',
1809 'ext': 'mp4',
1810 'title': 'Voyeur Girl',
1811 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1812 'upload_date': '20190312',
5429d6a9
S
1813 'uploader': 'Stephen - Topic',
1814 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1815 'artist': 'Stephen',
1816 'track': 'Voyeur Girl',
1817 'album': 'it\'s too much love to know my dear',
1818 'release_date': '20190313',
1819 'release_year': 2019,
976ae3ea 1820 'alt_title': 'Voyeur Girl',
1821 'view_count': int,
1822 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1823 'playable_in_embed': True,
1824 'like_count': int,
1825 'categories': ['Music'],
1826 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1827 'channel': 'Stephen',
1828 'availability': 'public',
1829 'creator': 'Stephen',
1830 'duration': 169,
1831 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
1832 'age_limit': 0,
1833 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1834 'tags': 'count:11',
1835 'live_status': 'not_live',
6c73052c 1836 'channel_follower_count': int
5caabd3c 1837 },
1838 'params': {
1839 'skip_download': True,
1840 },
1841 },
66b48727
RA
1842 {
1843 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1844 'only_matching': True,
1845 },
011e75e6
S
1846 {
1847 # invalid -> valid video id redirection
1848 'url': 'DJztXj2GPfl',
1849 'info_dict': {
1850 'id': 'DJztXj2GPfk',
1851 'ext': 'mp4',
1852 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1853 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1854 'upload_date': '20090125',
1855 'uploader': 'Prochorowka',
1856 'uploader_id': 'Prochorowka',
1857 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1858 'artist': 'Panjabi MC',
1859 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1860 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1861 },
1862 'params': {
1863 'skip_download': True,
1864 },
545cc85d 1865 'skip': 'Video unavailable',
ea74e00b
DP
1866 },
1867 {
1868 # empty description results in an empty string
1869 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1870 'info_dict': {
1871 'id': 'x41yOUIvK2k',
1872 'ext': 'mp4',
1873 'title': 'IMG 3456',
1874 'description': '',
1875 'upload_date': '20170613',
1876 'uploader_id': 'ElevageOrVert',
1877 'uploader': 'ElevageOrVert',
976ae3ea 1878 'view_count': int,
1879 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
1880 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
1881 'like_count': int,
1882 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
1883 'tags': [],
1884 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
1885 'availability': 'public',
1886 'age_limit': 0,
1887 'categories': ['Pets & Animals'],
1888 'duration': 7,
1889 'playable_in_embed': True,
1890 'live_status': 'not_live',
1891 'channel': 'ElevageOrVert',
6c73052c 1892 'channel_follower_count': int
ea74e00b
DP
1893 },
1894 'params': {
1895 'skip_download': True,
1896 },
1897 },
a0566bbf 1898 {
29f7c58a 1899 # with '};' inside yt initial data (see [1])
1900 # see [2] for an example with '};' inside ytInitialPlayerResponse
1901 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1902 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1903 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1904 'info_dict': {
1905 'id': 'CHqg6qOn4no',
1906 'ext': 'mp4',
1907 'title': 'Part 77 Sort a list of simple types in c#',
1908 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1909 'upload_date': '20130831',
1910 'uploader_id': 'kudvenkat',
1911 'uploader': 'kudvenkat',
976ae3ea 1912 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
1913 'like_count': int,
1914 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
1915 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
1916 'live_status': 'not_live',
1917 'categories': ['Education'],
1918 'availability': 'public',
1919 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
1920 'tags': 'count:12',
1921 'playable_in_embed': True,
1922 'age_limit': 0,
1923 'view_count': int,
1924 'duration': 522,
1925 'channel': 'kudvenkat',
12a1b225 1926 'comment_count': int,
6c73052c 1927 'channel_follower_count': int
a0566bbf 1928 },
1929 'params': {
1930 'skip_download': True,
1931 },
1932 },
29f7c58a 1933 {
1934 # another example of '};' in ytInitialData
1935 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1936 'only_matching': True,
1937 },
1938 {
1939 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1940 'only_matching': True,
1941 },
545cc85d 1942 {
cc2db878 1943 # https://github.com/ytdl-org/youtube-dl/pull/28094
1944 'url': 'OtqTfy26tG0',
1945 'info_dict': {
1946 'id': 'OtqTfy26tG0',
1947 'ext': 'mp4',
1948 'title': 'Burn Out',
1949 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1950 'upload_date': '20141120',
1951 'uploader': 'The Cinematic Orchestra - Topic',
1952 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1953 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1954 'artist': 'The Cinematic Orchestra',
1955 'track': 'Burn Out',
1956 'album': 'Every Day',
976ae3ea 1957 'like_count': int,
1958 'live_status': 'not_live',
1959 'alt_title': 'Burn Out',
1960 'duration': 614,
1961 'age_limit': 0,
1962 'view_count': int,
1963 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1964 'creator': 'The Cinematic Orchestra',
1965 'channel': 'The Cinematic Orchestra',
1966 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
1967 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1968 'availability': 'public',
1969 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
1970 'categories': ['Music'],
1971 'playable_in_embed': True,
6c73052c 1972 'channel_follower_count': int
cc2db878 1973 },
1974 'params': {
1975 'skip_download': True,
1976 },
545cc85d 1977 },
bc2ca1bb 1978 {
1979 # controversial video, only works with bpctr when authenticated with cookies
1980 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1981 'only_matching': True,
1982 },
a1a7907b 1983 {
1984 # controversial video, requires bpctr/contentCheckOk
1985 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1986 'info_dict': {
1987 'id': 'SZJvDhaSDnc',
1988 'ext': 'mp4',
1989 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1990 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
976ae3ea 1991 'uploader': 'CBS Mornings',
11f9be09 1992 'uploader_id': 'CBSThisMorning',
a1a7907b 1993 'upload_date': '20140716',
976ae3ea 1994 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
1995 'duration': 170,
1996 'categories': ['News & Politics'],
1997 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
1998 'view_count': int,
1999 'channel': 'CBS Mornings',
2000 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2001 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2002 'age_limit': 18,
2003 'availability': 'needs_auth',
2004 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2005 'like_count': int,
2006 'live_status': 'not_live',
2007 'playable_in_embed': True,
6c73052c 2008 'channel_follower_count': int
a1a7907b 2009 }
2010 },
f7ad7160 2011 {
2012 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2013 'url': 'cBvYw8_A0vQ',
2014 'info_dict': {
2015 'id': 'cBvYw8_A0vQ',
2016 'ext': 'mp4',
2017 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2018 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2019 'upload_date': '20201120',
2020 'uploader': 'Walk around Japan',
2021 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2022 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
976ae3ea 2023 'duration': 1456,
2024 'categories': ['Travel & Events'],
2025 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2026 'view_count': int,
2027 'channel': 'Walk around Japan',
2028 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2029 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2030 'age_limit': 0,
2031 'availability': 'public',
2032 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2033 'live_status': 'not_live',
2034 'playable_in_embed': True,
6c73052c 2035 'channel_follower_count': int
f7ad7160 2036 },
2037 'params': {
2038 'skip_download': True,
2039 },
0fb983f6 2040 }, {
2041 # Has multiple audio streams
2042 'url': 'WaOKSUlf4TM',
2043 'only_matching': True
9297939e 2044 }, {
2045 # Requires Premium: has format 141 when requested using YTM url
2046 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2047 'only_matching': True
2048 }, {
120916da 2049 # multiple subtitles with same lang_code
2050 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2051 'only_matching': True,
109dd3b2 2052 }, {
2053 # Force use android client fallback
2054 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2055 'info_dict': {
2056 'id': 'YOelRv7fMxY',
11f9be09 2057 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 2058 'ext': '3gp',
2059 'upload_date': '20210624',
2060 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2061 'uploader': 'colinfurze',
11f9be09 2062 'uploader_id': 'colinfurze',
109dd3b2 2063 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
976ae3ea 2064 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2065 'duration': 596,
2066 'categories': ['Entertainment'],
2067 'uploader_url': 'http://www.youtube.com/user/colinfurze',
2068 'view_count': int,
2069 'channel': 'colinfurze',
2070 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2071 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2072 'age_limit': 0,
2073 'availability': 'public',
2074 'like_count': int,
2075 'live_status': 'not_live',
2076 'playable_in_embed': True,
6c73052c 2077 'channel_follower_count': int
109dd3b2 2078 },
2079 'params': {
2080 'format': '17', # 3gp format available on android
2081 'extractor_args': {'youtube': {'player_client': ['android']}},
2082 },
120916da 2083 },
109dd3b2 2084 {
2085 # Skip download of additional client configs (remix client config in this case)
2086 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2087 'only_matching': True,
2088 'params': {
2089 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2090 },
8fc54b12 2091 }, {
2092 # shorts
2093 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2094 'only_matching': True,
9222c381 2095 }, {
2096 'note': 'Storyboards',
2097 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2098 'info_dict': {
2099 'id': '5KLPxDtMqe8',
2100 'ext': 'mhtml',
2101 'format_id': 'sb0',
2102 'title': 'Your Brain is Plastic',
2103 'uploader_id': 'scishow',
2104 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2105 'upload_date': '20140324',
2106 'uploader': 'SciShow',
976ae3ea 2107 'like_count': int,
2108 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2109 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2110 'view_count': int,
2111 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2112 'playable_in_embed': True,
2113 'tags': 'count:12',
2114 'uploader_url': 'http://www.youtube.com/user/scishow',
2115 'availability': 'public',
2116 'channel': 'SciShow',
2117 'live_status': 'not_live',
2118 'duration': 248,
2119 'categories': ['Education'],
2120 'age_limit': 0,
6c73052c 2121 'channel_follower_count': int
9222c381 2122 }, 'params': {'format': 'mhtml', 'skip_download': True}
992f9a73 2123 }, {
2124 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2125 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2126 'info_dict': {
2127 'id': '2NUZ8W2llS4',
2128 'ext': 'mp4',
2129 'title': 'The NP that test your phone performance 🙂',
2130 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2131 'uploader': 'Leon Nguyen',
2132 'uploader_id': 'VNSXIII',
2133 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2134 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2135 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2136 'duration': 21,
2137 'view_count': int,
2138 'age_limit': 0,
2139 'categories': ['Gaming'],
2140 'tags': 'count:23',
2141 'playable_in_embed': True,
2142 'live_status': 'not_live',
2143 'upload_date': '20220103',
2144 'like_count': int,
2145 'availability': 'public',
2146 'channel': 'Leon Nguyen',
2147 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
12a1b225 2148 'comment_count': int,
992f9a73 2149 'channel_follower_count': int
2150 }
2151 }, {
2152 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2153 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2154 'info_dict': {
2155 'id': 'mzZzzBU6lrM',
2156 'ext': 'mp4',
2157 'title': 'I Met GeorgeNotFound In Real Life...',
2158 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
2159 'uploader': 'Quackity',
2160 'uploader_id': 'QuackityHQ',
2161 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
2162 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2163 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2164 'duration': 955,
2165 'view_count': int,
2166 'age_limit': 0,
2167 'categories': ['Entertainment'],
2168 'tags': 'count:26',
2169 'playable_in_embed': True,
2170 'live_status': 'not_live',
2171 'release_timestamp': 1641172509,
2172 'release_date': '20220103',
2173 'upload_date': '20220103',
2174 'like_count': int,
2175 'availability': 'public',
2176 'channel': 'Quackity',
2177 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2178 'channel_follower_count': int
2179 }
2180 },
2181 { # continuous livestream. Microformat upload date should be preferred.
2182 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2183 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2184 'info_dict': {
2185 'id': 'kgx4WGK0oNU',
2186 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2187 'ext': 'mp4',
2188 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2189 'availability': 'public',
2190 'age_limit': 0,
2191 'release_timestamp': 1637975704,
2192 'upload_date': '20210619',
2193 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2194 'live_status': 'is_live',
2195 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
2196 'uploader': '阿鲍Abao',
2197 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2198 'channel': 'Abao in Tokyo',
2199 'channel_follower_count': int,
2200 'release_date': '20211127',
2201 'tags': 'count:39',
2202 'categories': ['People & Blogs'],
2203 'like_count': int,
2204 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2205 'view_count': int,
2206 'playable_in_embed': True,
2207 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
2208 },
2209 'params': {'skip_download': True}
6e634cbe 2210 }, {
2211 # Story. Requires specific player params to work.
ee27297f 2212 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
6e634cbe 2213 'info_dict': {
ee27297f 2214 'id': 'vv8qTUWmulI',
6e634cbe 2215 'ext': 'mp4',
ee27297f 2216 'availability': 'unlisted',
2217 'view_count': int,
2218 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
2219 'upload_date': '20220526',
2220 'categories': ['Education'],
2221 'title': 'Story',
2222 'channel': 'IT\'S HISTORY',
2223 'description': '',
2224 'uploader_id': 'BlastfromthePast',
2225 'duration': 12,
2226 'uploader': 'IT\'S HISTORY',
6e634cbe 2227 'playable_in_embed': True,
6e634cbe 2228 'age_limit': 0,
6e634cbe 2229 'live_status': 'not_live',
ee27297f 2230 'tags': [],
2231 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
2232 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
2233 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
12a1b225
A
2234 },
2235 'skip': 'stories get removed after some period of time',
ee27297f 2236 }, {
2237 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2238 'info_dict': {
2239 'id': 'tjjjtzRLHvA',
2240 'ext': 'mp4',
2241 'title': 'ハッシュタグ無し };if window.ytcsi',
2242 'upload_date': '20220323',
2243 'like_count': int,
2244 'availability': 'unlisted',
2245 'channel': 'nao20010128nao',
2246 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
2247 'age_limit': 0,
2248 'uploader': 'nao20010128nao',
2249 'uploader_id': 'nao20010128nao',
2250 'categories': ['Music'],
6e634cbe 2251 'view_count': int,
2252 'description': '',
ee27297f 2253 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2254 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2255 'live_status': 'not_live',
2256 'playable_in_embed': True,
2257 'channel_follower_count': int,
2258 'duration': 6,
2259 'tags': [],
2260 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
6e634cbe 2261 }
a4166234 2262 }, {
2263 'note': '6 channel audio',
2264 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2265 'only_matching': True,
6e634cbe 2266 }
2eb88d95
PH
2267 ]
2268
f2e8dbcc 2269 _WEBPAGE_TESTS = [
2270 # YouTube <object> embed
2271 {
2272 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2273 'md5': '873c81d308b979f0e23ee7e620b312a3',
2274 'info_dict': {
2275 'id': 'msN87y-iEx0',
2276 'ext': 'mp4',
2277 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2278 'upload_date': '20080526',
2279 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2280 'uploader': 'Christopher Sykes',
2281 'uploader_id': 'ChristopherJSykes',
2282 'age_limit': 0,
2283 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2284 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2285 'playable_in_embed': True,
2286 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2287 'like_count': int,
2288 'comment_count': int,
2289 'channel': 'Christopher Sykes',
2290 'live_status': 'not_live',
2291 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2292 'availability': 'public',
2293 'duration': 195,
2294 'view_count': int,
2295 'categories': ['Science & Technology'],
2296 'channel_follower_count': int,
2297 'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
2298 },
2299 'params': {
2300 'skip_download': True,
2301 }
2302 },
2303 ]
2304
201c1459 2305 @classmethod
2306 def suitable(cls, url):
4dfbf869 2307 from ..utils import parse_qs
2308
201c1459 2309 qs = parse_qs(url)
2310 if qs.get('list', [None])[0]:
2311 return False
86e5f3ed 2312 return super().suitable(url)
201c1459 2313
e0df6211 2314 def __init__(self, *args, **kwargs):
86e5f3ed 2315 super().__init__(*args, **kwargs)
545cc85d 2316 self._code_cache = {}
83799698 2317 self._player_cache = {}
e0df6211 2318
adbc4ec4 2319 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
adbc4ec4
THD
2320 lock = threading.Lock()
2321
2322 is_live = True
185bf310 2323 start_time = time.time()
adbc4ec4
THD
2324 formats = [f for f in formats if f.get('is_from_start')]
2325
185bf310 2326 def refetch_manifest(format_id, delay):
2327 nonlocal formats, start_time, is_live
2328 if time.time() <= start_time + delay:
adbc4ec4
THD
2329 return
2330
2331 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2332 video_details = traverse_obj(
2333 prs, (..., 'videoDetails'), expected_type=dict, default=[])
2334 microformats = traverse_obj(
2335 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2336 expected_type=dict, default=[])
c646d76f 2337 _, is_live, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
185bf310 2338 start_time = time.time()
adbc4ec4 2339
185bf310 2340 def mpd_feed(format_id, delay):
adbc4ec4
THD
2341 """
2342 @returns (manifest_url, manifest_stream_number, is_live) or None
2343 """
2344 with lock:
185bf310 2345 refetch_manifest(format_id, delay)
adbc4ec4
THD
2346
2347 f = next((f for f in formats if f['format_id'] == format_id), None)
2348 if not f:
185bf310 2349 if not is_live:
2350 self.to_screen(f'{video_id}: Video is no longer live')
2351 else:
2352 self.report_warning(
2353 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
adbc4ec4
THD
2354 return None
2355 return f['manifest_url'], f['manifest_stream_number'], is_live
2356
2357 for f in formats:
a539f065 2358 f['is_live'] = True
adbc4ec4
THD
2359 f['protocol'] = 'http_dash_segments_generator'
2360 f['fragments'] = functools.partial(
2361 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
2362
2363 def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
2364 FETCH_SPAN, MAX_DURATION = 5, 432000
2365
2366 mpd_url, stream_number, is_live = None, None, True
2367
2368 begin_index = 0
2369 download_start_time = ctx.get('start') or time.time()
2370
2371 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2372 if lack_early_segments:
2373 self.report_warning(bug_reports_message(
2374 'Starting download from the last 120 hours of the live stream since '
2375 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2376 lack_early_segments = True
2377
2378 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2379 fragments, fragment_base_url = None, None
2380
a539f065 2381 def _extract_sequence_from_mpd(refresh_sequence, immediate):
adbc4ec4
THD
2382 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2383 # Obtain from MPD's maximum seq value
2384 old_mpd_url = mpd_url
185bf310 2385 last_error = ctx.pop('last_error', None)
14f25df2 2386 expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
185bf310 2387 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2388 or (mpd_url, stream_number, False))
2389 if not refresh_sequence:
2390 if expire_fast and not is_live:
2391 return False, last_seq
2392 elif old_mpd_url == mpd_url:
2393 return True, last_seq
adbc4ec4
THD
2394 try:
2395 fmts, _ = self._extract_mpd_formats_and_subtitles(
2396 mpd_url, None, note=False, errnote=False, fatal=False)
2397 except ExtractorError:
2398 fmts = None
2399 if not fmts:
a539f065 2400 no_fragment_score += 2
adbc4ec4
THD
2401 return False, last_seq
2402 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2403 fragments = fmt_info['fragments']
2404 fragment_base_url = fmt_info['fragment_base_url']
2405 assert fragment_base_url
2406
2407 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2408 return True, _last_seq
2409
2410 while is_live:
2411 fetch_time = time.time()
2412 if no_fragment_score > 30:
2413 return
2414 if last_segment_url:
2415 # Obtain from "X-Head-Seqnum" header value from each segment
2416 try:
2417 urlh = self._request_webpage(
2418 last_segment_url, None, note=False, errnote=False, fatal=False)
2419 except ExtractorError:
2420 urlh = None
2421 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2422 if last_seq is None:
a539f065 2423 no_fragment_score += 2
adbc4ec4
THD
2424 last_segment_url = None
2425 continue
2426 else:
a539f065
LNO
2427 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2428 no_fragment_score += 2
185bf310 2429 if not should_continue:
adbc4ec4
THD
2430 continue
2431
2432 if known_idx > last_seq:
2433 last_segment_url = None
2434 continue
2435
2436 last_seq += 1
2437
2438 if begin_index < 0 and known_idx < 0:
2439 # skip from the start when it's negative value
2440 known_idx = last_seq + begin_index
2441 if lack_early_segments:
2442 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2443 try:
2444 for idx in range(known_idx, last_seq):
2445 # do not update sequence here or you'll get skipped some part of it
a539f065 2446 should_continue, _ = _extract_sequence_from_mpd(False, False)
185bf310 2447 if not should_continue:
adbc4ec4
THD
2448 known_idx = idx - 1
2449 raise ExtractorError('breaking out of outer loop')
2450 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2451 yield {
2452 'url': last_segment_url,
36195c44 2453 'fragment_count': last_seq,
adbc4ec4
THD
2454 }
2455 if known_idx == last_seq:
2456 no_fragment_score += 5
2457 else:
2458 no_fragment_score = 0
2459 known_idx = last_seq
2460 except ExtractorError:
2461 continue
2462
2463 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2464
b6de707d 2465 def _extract_player_url(self, *ytcfgs, webpage=None):
2466 player_url = traverse_obj(
2467 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
14f25df2 2468 get_all=False, expected_type=str)
11f9be09 2469 if not player_url:
b6de707d 2470 return
60f393e4 2471 return urljoin('https://www.youtube.com', player_url)
109dd3b2 2472
b6de707d 2473 def _download_player_url(self, video_id, fatal=False):
2474 res = self._download_webpage(
2475 'https://www.youtube.com/iframe_api',
2476 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2477 if res:
2478 player_version = self._search_regex(
2479 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2480 if player_version:
2481 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2482
60064c53
PH
2483 def _signature_cache_id(self, example_sig):
2484 """ Return a string representation of a signature """
14f25df2 2485 return '.'.join(str(len(part)) for part in example_sig.split('.'))
60064c53 2486
e40c758c
S
2487 @classmethod
2488 def _extract_player_info(cls, player_url):
2489 for player_re in cls._PLAYER_INFO_RE:
2490 id_m = re.search(player_re, player_url)
2491 if id_m:
2492 break
2493 else:
c081b35c 2494 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 2495 return id_m.group('id')
e40c758c 2496
404f611f 2497 def _load_player(self, video_id, player_url, fatal=True):
109dd3b2 2498 player_id = self._extract_player_info(player_url)
2499 if player_id not in self._code_cache:
1276a43a 2500 code = self._download_webpage(
109dd3b2 2501 player_url, video_id, fatal=fatal,
2502 note='Downloading player ' + player_id,
2503 errnote='Download of %s failed' % player_url)
1276a43a 2504 if code:
2505 self._code_cache[player_id] = code
404f611f 2506 return self._code_cache.get(player_id)
109dd3b2 2507
e40c758c 2508 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 2509 player_id = self._extract_player_info(player_url)
e0df6211 2510
c4417ddb 2511 # Read from filesystem cache
86e5f3ed 2512 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
c4417ddb 2513 assert os.path.basename(func_id) == func_id
a0e07d31 2514
ae61d108 2515 self.write_debug(f'Extracting signature function {func_id}')
580ce007 2516 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
83799698 2517
580ce007 2518 if not cache_spec:
2519 code = self._load_player(video_id, player_url)
404f611f 2520 if code:
109dd3b2 2521 res = self._parse_sig_js(code)
ac668111 2522 test_string = ''.join(map(chr, range(len(example_sig))))
580ce007 2523 cache_spec = [ord(c) for c in res(test_string)]
9809740b 2524 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
580ce007 2525
2526 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 2527
60064c53 2528 def _print_sig_code(self, func, example_sig):
404f611f 2529 if not self.get_param('youtube_print_sig_code'):
2530 return
2531
edf3e38e
PH
2532 def gen_sig_code(idxs):
2533 def _genslice(start, end, step):
78caa52a 2534 starts = '' if start == 0 else str(start)
8bcc8756 2535 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 2536 steps = '' if step == 1 else (':%d' % step)
86e5f3ed 2537 return f's[{starts}{ends}{steps}]'
edf3e38e
PH
2538
2539 step = None
7af808a5
PH
2540 # Quelch pyflakes warnings - start will be set when step is set
2541 start = '(Never used)'
edf3e38e
PH
2542 for i, prev in zip(idxs[1:], idxs[:-1]):
2543 if step is not None:
2544 if i - prev == step:
2545 continue
2546 yield _genslice(start, prev, step)
2547 step = None
2548 continue
2549 if i - prev in [-1, 1]:
2550 step = i - prev
2551 start = prev
2552 continue
2553 else:
78caa52a 2554 yield 's[%d]' % prev
edf3e38e 2555 if step is None:
78caa52a 2556 yield 's[%d]' % i
edf3e38e
PH
2557 else:
2558 yield _genslice(start, i, step)
2559
ac668111 2560 test_string = ''.join(map(chr, range(len(example_sig))))
c705320f 2561 cache_res = func(test_string)
edf3e38e 2562 cache_spec = [ord(c) for c in cache_res]
78caa52a 2563 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53 2564 signature_id_tuple = '(%s)' % (
14f25df2 2565 ', '.join(str(len(p)) for p in example_sig.split('.')))
69ea8ca4 2566 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 2567 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 2568 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 2569
e0df6211
PH
2570 def _parse_sig_js(self, jscode):
2571 funcname = self._search_regex(
abefc03f
S
2572 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2573 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
2574 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2575 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2576 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2577 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 2578 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
2579 # Obsolete patterns
2580 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 2581 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
2582 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2583 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2584 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2585 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2586 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2587 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 2588 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
2589
2590 jsi = JSInterpreter(jscode)
2591 initial_function = jsi.extract_function(funcname)
e0df6211
PH
2592 return lambda s: initial_function([s])
2593
580ce007 2594 def _cached(self, func, *cache_id):
2595 def inner(*args, **kwargs):
2596 if cache_id not in self._player_cache:
2597 try:
2598 self._player_cache[cache_id] = func(*args, **kwargs)
2599 except ExtractorError as e:
2600 self._player_cache[cache_id] = e
2601 except Exception as e:
2602 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
2603
2604 ret = self._player_cache[cache_id]
2605 if isinstance(ret, Exception):
2606 raise ret
2607 return ret
2608 return inner
2609
545cc85d 2610 def _decrypt_signature(self, s, video_id, player_url):
257a2501 2611 """Turn the encrypted s field into a working signature"""
580ce007 2612 extract_sig = self._cached(
2613 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
2614 func = extract_sig(video_id, player_url, s)
2615 self._print_sig_code(func, s)
2616 return func(s)
404f611f 2617
2618 def _decrypt_nsig(self, s, video_id, player_url):
2619 """Turn the encrypted n field into a working signature"""
2620 if player_url is None:
2621 raise ExtractorError('Cannot decrypt nsig without player_url')
60f393e4 2622 player_url = urljoin('https://www.youtube.com', player_url)
404f611f 2623
580ce007 2624 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
2625 if self.get_param('youtube_print_sig_code'):
2626 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
404f611f 2627
25836db6 2628 try:
2629 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
2630 ret = extract_nsig(jsi, func_code)(s)
2631 except JSInterpreter.Exception as e:
2632 try:
2633 jsi = PhantomJSwrapper(self)
2634 except ExtractorError:
2635 raise e
2636 self.report_warning(
2637 f'Native nsig extraction failed: Trying with PhantomJS\n'
2638 f' n = {s} ; player = {player_url}', video_id)
2639 self.write_debug(e)
2640
2641 args, func_body = func_code
2642 ret = jsi.execute(
2643 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
2644 video_id=video_id, note='Executing signature code').strip()
580ce007 2645
2646 self.write_debug(f'Decrypted nsig {s} => {ret}')
2647 return ret
2648
90a1df30 2649 def _extract_n_function_name(self, jscode):
2650 funcname, idx = self._search_regex(
2651 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
2652 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
2653 if not idx:
2654 return funcname
2655
2656 return json.loads(js_to_json(self._search_regex(
2657 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
2658 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
2659
580ce007 2660 def _extract_n_function_code(self, video_id, player_url):
404f611f 2661 player_id = self._extract_player_info(player_url)
9809740b 2662 func_code = self.cache.load('youtube-nsig', player_id)
580ce007 2663 jscode = func_code or self._load_player(video_id, player_url)
2664 jsi = JSInterpreter(jscode)
404f611f 2665
2666 if func_code:
580ce007 2667 return jsi, player_id, func_code
404f611f 2668
90a1df30 2669 func_code = jsi.extract_function_code(self._extract_n_function_name(jscode))
580ce007 2670 self.cache.store('youtube-nsig', player_id, func_code)
2671 return jsi, player_id, func_code
2672
2673 def _extract_n_function_from_code(self, jsi, func_code):
8f53dc44 2674 func = jsi.extract_function_from_code(*func_code)
f6ca640b 2675
580ce007 2676 def extract_nsig(s):
25836db6 2677 try:
2678 ret = func([s])
2679 except JSInterpreter.Exception:
2680 raise
2681 except Exception as e:
2682 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
2683
f6ca640b 2684 if ret.startswith('enhanced_except_'):
25836db6 2685 raise JSInterpreter.Exception('Signature function returned an exception')
f6ca640b 2686 return ret
580ce007 2687
2688 return extract_nsig
e0df6211 2689
109dd3b2 2690 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2691 """
2692 Extract signatureTimestamp (sts)
2693 Required to tell API what sig/player version is in use.
2694 """
2695 sts = None
2696 if isinstance(ytcfg, dict):
2697 sts = int_or_none(ytcfg.get('STS'))
2698
2699 if not sts:
2700 # Attempt to extract from player
2701 if player_url is None:
2702 error_msg = 'Cannot extract signature timestamp without player_url.'
2703 if fatal:
2704 raise ExtractorError(error_msg)
2705 self.report_warning(error_msg)
2706 return
404f611f 2707 code = self._load_player(video_id, player_url, fatal=fatal)
2708 if code:
109dd3b2 2709 sts = int_or_none(self._search_regex(
2710 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2711 'JS player signature timestamp', group='sts', fatal=fatal))
2712 return sts
2713
11f9be09 2714 def _mark_watched(self, video_id, player_responses):
06cc8f10
B
2715 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
2716 label = 'fully ' if is_full else ''
2717 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
2718 expected_type=url_or_none)
2719 if not url:
2720 self.report_warning(f'Unable to mark {label}watched')
2721 return
14f25df2 2722 parsed_url = urllib.parse.urlparse(url)
2723 qs = urllib.parse.parse_qs(parsed_url.query)
06cc8f10
B
2724
2725 # cpn generation algorithm is reverse engineered from base.js.
2726 # In fact it works even with dummy cpn.
2727 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2728 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
2729
2730 # # more consistent results setting it to right before the end
2731 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
2732
2733 qs.update({
2734 'ver': ['2'],
2735 'cpn': [cpn],
2736 'cmt': video_length,
2737 'el': 'detailpage', # otherwise defaults to "shorts"
2738 })
2739
2740 if is_full:
2741 # these seem to mark watchtime "history" in the real world
2742 # they're required, so send in a single value
2743 qs.update({
2744 'st': video_length,
2745 'et': video_length,
2746 })
2747
14f25df2 2748 url = urllib.parse.urlunparse(
2749 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
06cc8f10
B
2750
2751 self._download_webpage(
2752 url, video_id, f'Marking {label}watched',
2753 'Unable to mark watched', fatal=False)
d77ab8e2 2754
bfd973ec 2755 @classmethod
2756 def _extract_from_webpage(cls, url, webpage):
2757 # Invidious Instances
2758 # https://github.com/yt-dlp/yt-dlp/issues/195
2759 # https://github.com/iv-org/invidious/pull/1730
2760 mobj = re.search(
2761 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
2762 webpage)
2763 if mobj:
2764 yield cls.url_result(mobj.group('url'), cls)
2765 raise cls.StopExtraction()
2766
2767 yield from super()._extract_from_webpage(url, webpage)
66c9fa36
S
2768
2769 # lazyYT YouTube embed
bfd973ec 2770 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
2771 yield cls.url_result(unescapeHTML(id_), cls, id_)
66c9fa36
S
2772
2773 # Wordpress "YouTube Video Importer" plugin
bfd973ec 2774 for m in re.findall(r'''(?x)<div[^>]+
2775 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2776 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
2777 yield cls.url_result(m[-1], cls, m[-1])
66c9fa36 2778
97665381
PH
2779 @classmethod
2780 def extract_id(cls, url):
ae61d108 2781 video_id = cls.get_temp_id(url)
2782 if not video_id:
2783 raise ExtractorError(f'Invalid URL: {url}')
2784 return video_id
c5e8d7af 2785
7c365c21 2786 def _extract_chapters_from_json(self, data, duration):
2787 chapter_list = traverse_obj(
2788 data, (
2789 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2790 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2791 ), expected_type=list)
2792
2793 return self._extract_chapters(
2794 chapter_list,
2795 chapter_time=lambda chapter: float_or_none(
2796 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2797 chapter_title=lambda chapter: traverse_obj(
2798 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2799 duration=duration)
2800
2801 def _extract_chapters_from_engagement_panel(self, data, duration):
2802 content_list = traverse_obj(
8bdd16b4 2803 data,
7c365c21 2804 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2805 expected_type=list, default=[])
052e1350 2806 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2807 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 2808
1890fc63 2809 return next(filter(None, (
2810 self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2811 chapter_time, chapter_title, duration)
2812 for contents in content_list)), [])
7c365c21 2813
1890fc63 2814 def _extract_chapters_from_description(self, description, duration):
2815 return self._extract_chapters(
2816 re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),
2817 chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
2818 duration=duration, strict=False)
84213ea8 2819
1890fc63 2820 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
2821 if not duration:
2822 return
2823 chapter_list = [{
2824 'start_time': chapter_time(chapter),
2825 'title': chapter_title(chapter),
2826 } for chapter in chapter_list or []]
2827 if not strict:
2828 chapter_list.sort(key=lambda c: c['start_time'] or 0)
2829
a3976e07 2830 chapters = [{'start_time': 0}]
1890fc63 2831 for idx, chapter in enumerate(chapter_list):
a3976e07 2832 if chapter['start_time'] is None:
1890fc63 2833 self.report_warning(f'Incomplete chapter {idx}')
2834 elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
1890fc63 2835 chapters.append(chapter)
2836 else:
2837 self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')
a3976e07 2838 return chapters[1:]
84213ea8 2839
a1c5d2ca
M
2840 def _extract_comment(self, comment_renderer, parent=None):
2841 comment_id = comment_renderer.get('commentId')
2842 if not comment_id:
2843 return
fe93e2c4 2844
052e1350 2845 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 2846
49bd8c66 2847 # note: timestamp is an estimate calculated from the current time and time_text
f3aa3c3f 2848 timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
052e1350 2849 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca 2850 author_id = try_get(comment_renderer,
14f25df2 2851 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
fe93e2c4 2852
49bd8c66 2853 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
14f25df2 2854 lambda x: x['likeCount']), str)) or 0
a1c5d2ca 2855 author_thumbnail = try_get(comment_renderer,
14f25df2 2856 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
a1c5d2ca
M
2857
2858 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2859 is_favorited = 'creatorHeart' in (try_get(
2860 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2861 return {
2862 'id': comment_id,
2863 'text': text,
d92f5d5a 2864 'timestamp': timestamp,
a1c5d2ca
M
2865 'time_text': time_text,
2866 'like_count': votes,
97524332 2867 'is_favorited': is_favorited,
a1c5d2ca
M
2868 'author': author,
2869 'author_id': author_id,
2870 'author_thumbnail': author_thumbnail,
2871 'author_is_uploader': author_is_uploader,
2872 'parent': parent or 'root'
2873 }
2874
46383212 2875 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2876
2877 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2d6659b9 2878
2879 def extract_header(contents):
2d6659b9 2880 _continuation = None
2881 for content in contents:
46383212 2882 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
f0d785d3 2883 expected_comment_count = self._get_count(
2884 comments_header_renderer, 'countText', 'commentsCount')
fe93e2c4 2885
2d6659b9 2886 if expected_comment_count:
46383212 2887 tracker['est_total'] = expected_comment_count
2888 self.to_screen(f'Downloading ~{expected_comment_count} comments')
2889 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2d6659b9 2890
2891 sort_menu_item = try_get(
2892 comments_header_renderer,
2893 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2894 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2895
2896 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2897 if not _continuation:
2898 continue
2899
46383212 2900 sort_text = str_or_none(sort_menu_item.get('title'))
2901 if not sort_text:
2d6659b9 2902 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
46383212 2903 self.to_screen('Sorting comments by %s' % sort_text.lower())
2d6659b9 2904 break
a2160aa4 2905 return _continuation
a1c5d2ca 2906
2d6659b9 2907 def extract_thread(contents):
a1c5d2ca 2908 if not parent:
46383212 2909 tracker['current_page_thread'] = 0
a1c5d2ca 2910 for content in contents:
46383212 2911 if not parent and tracker['total_parent_comments'] >= max_parents:
2912 yield
a1c5d2ca 2913 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
46383212 2914 comment_renderer = get_first(
2915 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2916 expected_type=dict, default={})
a1c5d2ca 2917
a1c5d2ca
M
2918 comment = self._extract_comment(comment_renderer, parent)
2919 if not comment:
2920 continue
46383212 2921
2922 tracker['running_total'] += 1
2923 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
a1c5d2ca 2924 yield comment
46383212 2925
a1c5d2ca
M
2926 # Attempt to get the replies
2927 comment_replies_renderer = try_get(
2928 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2929
2930 if comment_replies_renderer:
46383212 2931 tracker['current_page_thread'] += 1
a1c5d2ca 2932 comment_entries_iter = self._comment_entries(
99e9e001 2933 comment_replies_renderer, ytcfg, video_id,
46383212 2934 parent=comment.get('id'), tracker=tracker)
86e5f3ed 2935 yield from itertools.islice(comment_entries_iter, min(
2936 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
a1c5d2ca 2937
46383212 2938 # Keeps track of counts across recursive calls
2939 if not tracker:
2940 tracker = dict(
2941 running_total=0,
2942 est_total=0,
2943 current_page_thread=0,
2944 total_parent_comments=0,
2945 total_reply_comments=0)
2946
2947 # TODO: Deprecated
2d6659b9 2948 # YouTube comments have a max depth of 2
46383212 2949 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
2950 if max_depth:
2951 self._downloader.deprecation_warning(
2952 '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
2d6659b9 2953 if max_depth == 1 and parent:
2954 return
a1c5d2ca 2955
46383212 2956 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
2957 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2d6659b9 2958
46383212 2959 continuation = self._extract_continuation(root_continuation_data)
aae16f6e 2960
46383212 2961 response = None
6e634cbe 2962 is_forced_continuation = False
2d6659b9 2963 is_first_continuation = parent is None
6e634cbe 2964 if is_first_continuation and not continuation:
2965 # Sometimes you can get comments by generating the continuation yourself,
2966 # even if YouTube initially reports them being disabled - e.g. stories comments.
2967 # Note: if the comment section is actually disabled, YouTube may return a response with
2968 # required check_get_keys missing. So we will disable that check initially in this case.
2969 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
2970 is_forced_continuation = True
a1c5d2ca
M
2971
2972 for page_num in itertools.count(0):
2973 if not continuation:
2974 break
46383212 2975 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
2976 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2d6659b9 2977 if page_num == 0:
2978 if is_first_continuation:
2979 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2980 else:
2d6659b9 2981 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
46383212 2982 tracker['current_page_thread'], comment_prog_str)
2d6659b9 2983 else:
2984 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2985 ' ' if parent else '', ' replies' if parent else '',
2986 page_num, comment_prog_str)
2987
2988 response = self._extract_response(
fe93e2c4 2989 item_id=None, query=continuation,
2d6659b9 2990 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
6e634cbe 2991 check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
2992 is_forced_continuation = False
46383212 2993 continuation_contents = traverse_obj(
2994 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
a1c5d2ca 2995
2d6659b9 2996 continuation = None
46383212 2997 for continuation_section in continuation_contents:
2998 continuation_items = traverse_obj(
2999 continuation_section,
3000 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
3001 get_all=False, expected_type=list) or []
3002 if is_first_continuation:
3003 continuation = extract_header(continuation_items)
3004 is_first_continuation = False
2d6659b9 3005 if continuation:
a1c5d2ca 3006 break
46383212 3007 continue
a1c5d2ca 3008
46383212 3009 for entry in extract_thread(continuation_items):
3010 if not entry:
3011 return
3012 yield entry
3013 continuation = self._extract_continuation({'contents': continuation_items})
3014 if continuation:
2d6659b9 3015 break
a1c5d2ca 3016
6e634cbe 3017 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3018 if message and not parent and tracker['running_total'] == 0:
3019 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
3020
3021 @staticmethod
3022 def _generate_comment_continuation(video_id):
3023 """
3024 Generates initial comment section continuation token from given video id
3025 """
3026 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3027 return base64.b64encode(token.encode()).decode()
3028
a2160aa4 3029 def _get_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 3030 """Entry for comment extraction"""
2d6659b9 3031 def _real_comment_extract(contents):
aae16f6e 3032 renderer = next((
3033 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3034 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3035 yield from self._comment_entries(renderer, ytcfg, video_id)
99e9e001 3036
a2160aa4 3037 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
a2160aa4 3038 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
a1c5d2ca 3039
109dd3b2 3040 @staticmethod
99e9e001 3041 def _get_checkok_params():
3042 return {'contentCheckOk': True, 'racyCheckOk': True}
3043
3044 @classmethod
3045 def _generate_player_context(cls, sts=None):
109dd3b2 3046 context = {
3047 'html5Preference': 'HTML5_PREF_WANTS',
3048 }
3049 if sts is not None:
3050 context['signatureTimestamp'] = sts
3051 return {
3052 'playbackContext': {
3053 'contentPlaybackContext': context
a1a7907b 3054 },
99e9e001 3055 **cls._get_checkok_params()
109dd3b2 3056 }
3057
e7e94f2a
D
3058 @staticmethod
3059 def _is_agegated(player_response):
3060 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 3061 return True
e7e94f2a
D
3062
3063 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
3064 AGE_GATE_REASONS = (
3065 'confirm your age', 'age-restricted', 'inappropriate', # reason
3066 'age_verification_required', 'age_check_required', # status
3067 )
3068 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3069
3070 @staticmethod
3071 def _is_unplayable(player_response):
3072 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 3073
99e9e001 3074 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
109dd3b2 3075
11f9be09 3076 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3077 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 3078 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 3079 headers = self.generate_api_headers(
99e9e001 3080 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 3081
6e634cbe 3082 yt_query = {
3083 'videoId': video_id,
3084 'params': '8AEB' # enable stories
3085 }
11f9be09 3086 yt_query.update(self._generate_player_context(sts))
3087 return self._extract_response(
3088 item_id=video_id, ep='player', query=yt_query,
379e44ed 3089 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 3090 default_client=client,
11f9be09 3091 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3092 ) or None
3093
11f9be09 3094 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 3095 requested_clients = []
d0d012d4 3096 default = ['android', 'web']
000c15a4 3097 allowed_clients = sorted(
86e5f3ed 3098 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
000c15a4 3099 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 3100 for client in self._configuration_arg('player_client'):
3101 if client in allowed_clients:
3102 requested_clients.append(client)
d0d012d4 3103 elif client == 'default':
3104 requested_clients.extend(default)
b4c055ba 3105 elif client == 'all':
3106 requested_clients.extend(allowed_clients)
3107 else:
3108 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 3109 if not requested_clients:
d0d012d4 3110 requested_clients = default
cf7e015f 3111
11f9be09 3112 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3113 requested_clients.extend(
e7e94f2a 3114 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 3115
11f9be09 3116 return orderedSet(requested_clients)
cf7e015f 3117
99e9e001 3118 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
11f9be09 3119 initial_pr = None
3120 if webpage:
b7c47b74 3121 initial_pr = self._search_json(
3122 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
6b09401b 3123
ae729626 3124 all_clients = set(clients)
c0bc527b 3125 clients = clients[::-1]
b6de707d 3126 prs = []
e7e94f2a 3127
ae729626 3128 def append_client(*client_names):
e7870111 3129 """ Append the first client name that exists but not already used """
ae729626 3130 for client_name in client_names:
e7870111
D
3131 actual_client = _split_innertube_client(client_name)[0]
3132 if actual_client in INNERTUBE_CLIENTS:
3133 if actual_client not in all_clients:
ae729626 3134 clients.append(client_name)
e7870111
D
3135 all_clients.add(actual_client)
3136 return
e7e94f2a 3137
379e44ed 3138 # Android player_response does not have microFormats which are needed for
3139 # extraction of some data. So we return the initial_pr with formats
3140 # stripped out even if not requested by the user
3141 # See: https://github.com/yt-dlp/yt-dlp/issues/501
379e44ed 3142 if initial_pr:
3143 pr = dict(initial_pr)
3144 pr['streamingData'] = None
b6de707d 3145 prs.append(pr)
379e44ed 3146
3147 last_error = None
b6de707d 3148 tried_iframe_fallback = False
3149 player_url = None
c0bc527b 3150 while clients:
e7870111 3151 client, base_client, variant = _split_innertube_client(clients.pop())
11f9be09 3152 player_ytcfg = master_ytcfg if client == 'web' else {}
a25bca9f 3153 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3154 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
c0bc527b 3155
b6de707d 3156 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3157 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3158 if 'js' in self._configuration_arg('player_skip'):
3159 require_js_player = False
3160 player_url = None
3161
3162 if not player_url and not tried_iframe_fallback and require_js_player:
3163 player_url = self._download_player_url(video_id)
3164 tried_iframe_fallback = True
3165
379e44ed 3166 try:
3167 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
99e9e001 3168 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
379e44ed 3169 except ExtractorError as e:
3170 if last_error:
3171 self.report_warning(last_error)
3172 last_error = e
3173 continue
3174
11f9be09 3175 if pr:
a3e96421 3176 # YouTube may return a different video player response than expected.
3177 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3178 pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
3179 if pr_video_id and pr_video_id != video_id:
3180 self.report_warning(
c7dcf0b3 3181 f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
a3e96421 3182 else:
3183 prs.append(pr)
c0bc527b 3184
e7e94f2a 3185 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
e7870111
D
3186 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3187 append_client(f'{base_client}_creator')
e7e94f2a 3188 elif self._is_agegated(pr):
e7870111
D
3189 if variant == 'tv_embedded':
3190 append_client(f'{base_client}_embedded')
3191 elif not variant:
3192 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
c0bc527b 3193
379e44ed 3194 if last_error:
b6de707d 3195 if not len(prs):
379e44ed 3196 raise last_error
3197 self.report_warning(last_error)
b6de707d 3198 return prs, player_url
11f9be09 3199
c646d76f 3200 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, is_live, duration):
a0bb6ce5 3201 itags, stream_ids = {}, []
b25cac65 3202 itag_qualities, res_qualities = {}, {0: None}
d3fc8074 3203 q = qualities([
2a9c6dcd 3204 # Normally tiny is the smallest video-only formats. But
3205 # audio-only formats with unknown quality may get tagged as tiny
3206 'tiny',
3207 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 3208 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3209 ])
11f9be09 3210 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 3211
545cc85d 3212 for fmt in streaming_formats:
727029c5 3213 if fmt.get('targetDurationSec'):
545cc85d 3214 continue
321bf820 3215
cc2db878 3216 itag = str_or_none(fmt.get('itag'))
9297939e 3217 audio_track = fmt.get('audioTrack') or {}
3218 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
3219 if stream_id in stream_ids:
3220 continue
3221
cc2db878 3222 quality = fmt.get('quality')
2a9c6dcd 3223 height = int_or_none(fmt.get('height'))
d3fc8074 3224 if quality == 'tiny' or not quality:
3225 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 3226 # The 3gp format (17) in android client has a quality of "small",
3227 # but is actually worse than other formats
3228 if itag == '17':
3229 quality = 'tiny'
3230 if quality:
3231 if itag:
3232 itag_qualities[itag] = quality
3233 if height:
3234 res_qualities[height] = quality
cc2db878 3235 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3236 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3237 # number of fragment that would subsequently requested with (`&sq=N`)
3238 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3239 continue
3240
545cc85d 3241 fmt_url = fmt.get('url')
3242 if not fmt_url:
14f25df2 3243 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
545cc85d 3244 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3245 encrypted_sig = try_get(sc, lambda x: x['s'][0])
52023f12 3246 if not all((sc, fmt_url, player_url, encrypted_sig)):
545cc85d 3247 continue
52023f12 3248 try:
3249 fmt_url += '&%s=%s' % (
3250 traverse_obj(sc, ('sp', -1)) or 'signature',
3251 self._decrypt_signature(encrypted_sig, video_id, player_url)
3252 )
3253 except ExtractorError as e:
580ce007 3254 self.report_warning('Signature extraction failed: Some formats may be missing',
3255 video_id=video_id, only_once=True)
52023f12 3256 self.write_debug(e, only_once=True)
201e9eaa 3257 continue
545cc85d 3258
404f611f 3259 query = parse_qs(fmt_url)
3260 throttled = False
b2916526 3261 if query.get('n'):
404f611f 3262 try:
580ce007 3263 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
404f611f 3264 fmt_url = update_url_query(fmt_url, {
580ce007 3265 'n': decrypt_nsig(query['n'][0], video_id, player_url)
3266 })
404f611f 3267 except ExtractorError as e:
25836db6 3268 phantomjs_hint = ''
3269 if isinstance(e, JSInterpreter.Exception):
3270 phantomjs_hint = f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} to workaround the issue\n'
aa9369a2 3271 self.report_warning(
25836db6 3272 f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
3273 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
52023f12 3274 self.write_debug(e, only_once=True)
404f611f 3275 throttled = True
3276
545cc85d 3277 if itag:
a0bb6ce5 3278 itags[itag] = 'https'
9297939e 3279 stream_ids.append(stream_id)
3280
0ad92dfb 3281 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
ab6df717 3282 language_preference = (
3283 10 if audio_track.get('audioIsDefault') and 10
3284 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3285 else -1)
0ad92dfb 3286 # Some formats may have much smaller duration than others (possibly damaged during encoding)
62b58c09 3287 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
a1b2d843 3288 # Make sure to avoid false positives with small duration differences.
62b58c09 3289 # E.g. __2ABJjxzNo, ySuUZEjARPY
a1b2d843 3290 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
08d30158 3291 if is_damaged:
0f06bcd7 3292 self.report_warning(
3293 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
545cc85d 3294 dct = {
3295 'asr': int_or_none(fmt.get('audioSampleRate')),
3296 'filesize': int_or_none(fmt.get('contentLength')),
3297 'format_id': itag,
34921b43 3298 'format_note': join_nonempty(
26e8e044 3299 '%s%s' % (audio_track.get('displayName') or '',
ab6df717 3300 ' (default)' if language_preference > 0 else ''),
404f611f 3301 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
a4166234 3302 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3303 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
0ad92dfb 3304 throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
91e5e839 3305 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3306 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
a4211baf 3307 'fps': int_or_none(fmt.get('fps')) or None,
a4166234 3308 'audio_channels': fmt.get('audioChannels'),
2a9c6dcd 3309 'height': height,
dca3ff4a 3310 'quality': q(quality),
727029c5 3311 'has_drm': bool(fmt.get('drmFamilies')),
cc2db878 3312 'tbr': tbr,
545cc85d 3313 'url': fmt_url,
2a9c6dcd 3314 'width': int_or_none(fmt.get('width')),
ab6df717 3315 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
3316 'desc' if language_preference < -1 else ''),
3317 'language_preference': language_preference,
a405b38f 3318 # Strictly de-prioritize damaged and 3gp formats
3319 'preference': -10 if is_damaged else -2 if itag == '17' else None,
545cc85d 3320 }
60bdb7bd 3321 mime_mobj = re.match(
3322 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3323 if mime_mobj:
3324 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3325 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 3326 no_audio = dct.get('acodec') == 'none'
3327 no_video = dct.get('vcodec') == 'none'
3328 if no_audio:
3329 dct['vbr'] = tbr
3330 if no_video:
3331 dct['abr'] = tbr
3332 if no_audio or no_video:
545cc85d 3333 dct['downloader_options'] = {
3334 # Youtube throttles chunks >~10M
3335 'http_chunk_size': 10485760,
bf1317d2 3336 }
7c60c33e 3337 if dct.get('ext'):
3338 dct['container'] = dct['ext'] + '_dash'
11f9be09 3339 yield dct
545cc85d 3340
adbc4ec4 3341 live_from_start = is_live and self.get_param('live_from_start')
4bb6b02f 3342 skip_manifests = self._configuration_arg('skip')
adbc4ec4
THD
3343 if not self.get_param('youtube_include_hls_manifest', True):
3344 skip_manifests.append('hls')
0f06bcd7 3345 if not self.get_param('youtube_include_dash_manifest', True):
3346 skip_manifests.append('dash')
adbc4ec4
THD
3347 get_dash = 'dash' not in skip_manifests and (
3348 not is_live or live_from_start or self._configuration_arg('include_live_dash'))
3349 get_hls = not live_from_start and 'hls' not in skip_manifests
5d3a0e79 3350
a0bb6ce5 3351 def process_manifest_format(f, proto, itag):
3352 if itag in itags:
3353 if itags[itag] == proto or f'{itag}-{proto}' in itags:
3354 return False
3355 itag = f'{itag}-{proto}'
3356 if itag:
3357 f['format_id'] = itag
3358 itags[itag] = proto
3359
b25cac65 3360 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
5c6d2ef9 3361 if f['quality'] == -1 and f.get('height'):
3362 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
a0bb6ce5 3363 return True
2a9c6dcd 3364
c646d76f 3365 subtitles = {}
11f9be09 3366 for sd in streaming_data:
5d3a0e79 3367 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 3368 if hls_manifest_url:
c646d76f 3369 fmts, subs = self._extract_m3u8_formats_and_subtitles(hls_manifest_url, video_id, 'mp4', fatal=False, live=is_live)
3370 subtitles = self._merge_subtitles(subs, subtitles)
3371 for f in fmts:
a0bb6ce5 3372 if process_manifest_format(f, 'hls', self._search_regex(
3373 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3374 yield f
545cc85d 3375
5d3a0e79 3376 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
3377 if dash_manifest_url:
c646d76f 3378 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
3379 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
3380 for f in formats:
a0bb6ce5 3381 if process_manifest_format(f, 'dash', f['format_id']):
3382 f['filesize'] = int_or_none(self._search_regex(
3383 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
adbc4ec4
THD
3384 if live_from_start:
3385 f['is_from_start'] = True
3386
a0bb6ce5 3387 yield f
c646d76f 3388 yield subtitles
11f9be09 3389
720c3099 3390 def _extract_storyboard(self, player_responses, duration):
3391 spec = get_first(
3392 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
596379e2 3393 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3394 if not base_url:
720c3099 3395 return
720c3099 3396 L = len(spec) - 1
3397 for i, args in enumerate(spec):
3398 args = args.split('#')
3399 counts = list(map(int_or_none, args[:5]))
3400 if len(args) != 8 or not all(counts):
3401 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3402 continue
3403 width, height, frame_count, cols, rows = counts
3404 N, sigh = args[6:]
3405
3406 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3407 fragment_count = frame_count / (cols * rows)
3408 fragment_duration = duration / fragment_count
3409 yield {
3410 'format_id': f'sb{i}',
3411 'format_note': 'storyboard',
3412 'ext': 'mhtml',
3413 'protocol': 'mhtml',
3414 'acodec': 'none',
3415 'vcodec': 'none',
3416 'url': url,
3417 'width': width,
3418 'height': height,
45e8a04e 3419 'fps': frame_count / duration,
3420 'rows': rows,
3421 'columns': cols,
720c3099 3422 'fragments': [{
b3edc806 3423 'url': url.replace('$M', str(j)),
720c3099 3424 'duration': min(fragment_duration, duration - (j * fragment_duration)),
3425 } for j in range(math.ceil(fragment_count))],
3426 }
3427
adbc4ec4 3428 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
b6de707d 3429 webpage = None
3430 if 'webpage' not in self._configuration_arg('player_skip'):
3431 webpage = self._download_webpage(
6e634cbe 3432 webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False)
11f9be09 3433
3434 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 3435
b6de707d 3436 player_responses, player_url = self._extract_player_responses(
11f9be09 3437 self._get_requested_clients(url, smuggled_data),
99e9e001 3438 video_id, webpage, master_ytcfg)
11f9be09 3439
adbc4ec4
THD
3440 return webpage, master_ytcfg, player_responses, player_url
3441
a1b2d843 3442 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
adbc4ec4
THD
3443 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3444 is_live = get_first(video_details, 'isLive')
3445 if is_live is None:
3446 is_live = get_first(live_broadcast_details, 'isLiveNow')
3447
3448 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
c646d76f 3449 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, is_live, duration)
adbc4ec4 3450
c646d76f 3451 return live_broadcast_details, is_live, streaming_data, formats, subtitles
adbc4ec4
THD
3452
3453 def _real_extract(self, url):
3454 url, smuggled_data = unsmuggle_url(url, {})
3455 video_id = self._match_id(url)
3456
3457 base_url = self.http_scheme() + '//www.youtube.com/'
3458 webpage_url = base_url + 'watch?v=' + video_id
3459
3460 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3461
11f9be09 3462 playability_statuses = traverse_obj(
3463 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
3464
3465 trailer_video_id = get_first(
3466 playability_statuses,
3467 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
3468 expected_type=str)
3469 if trailer_video_id:
3470 return self.url_result(
3471 trailer_video_id, self.ie_key(), trailer_video_id)
3472
3473 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
3474 if webpage else (lambda x: None))
3475
3476 video_details = traverse_obj(
3477 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
3478 microformats = traverse_obj(
3479 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
3480 expected_type=dict, default=[])
3481 video_title = (
3482 get_first(video_details, 'title')
3483 or self._get_text(microformats, (..., 'title'))
3484 or search_meta(['og:title', 'twitter:title', 'title']))
3485 video_description = get_first(video_details, 'shortDescription')
3486
d89257f3 3487 multifeed_metadata_list = get_first(
3488 player_responses,
3489 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
3490 expected_type=str)
3491 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
3492 if self.get_param('noplaylist'):
11f9be09 3493 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
d89257f3 3494 else:
3495 entries = []
3496 feed_ids = []
3497 for feed in multifeed_metadata_list.split(','):
3498 # Unquote should take place before split on comma (,) since textual
3499 # fields may contain comma as well (see
3500 # https://github.com/ytdl-org/youtube-dl/issues/8536)
14f25df2 3501 feed_data = urllib.parse.parse_qs(
ac668111 3502 urllib.parse.unquote_plus(feed))
d89257f3 3503
3504 def feed_entry(name):
3505 return try_get(
14f25df2 3506 feed_data, lambda x: x[name][0], str)
d89257f3 3507
3508 feed_id = feed_entry('id')
3509 if not feed_id:
3510 continue
3511 feed_title = feed_entry('title')
3512 title = video_title
3513 if feed_title:
3514 title += ' (%s)' % feed_title
3515 entries.append({
3516 '_type': 'url_transparent',
3517 'ie_key': 'Youtube',
3518 'url': smuggle_url(
3519 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
3520 {'force_singlefeed': True}),
3521 'title': title,
3522 })
3523 feed_ids.append(feed_id)
3524 self.to_screen(
3525 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
3526 % (', '.join(feed_ids), video_id))
3527 return self.playlist_result(
3528 entries, video_id, video_title, video_description)
11f9be09 3529
a1b2d843 3530 duration = int_or_none(
3531 get_first(video_details, 'lengthSeconds')
3532 or get_first(microformats, 'lengthSeconds')
3533 or parse_duration(search_meta('duration'))) or None
3534
c646d76f 3535 live_broadcast_details, is_live, streaming_data, formats, automatic_captions = \
3536 self._list_formats(video_id, microformats, video_details, player_responses, player_url)
bf1317d2 3537
545cc85d 3538 if not formats:
11f9be09 3539 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 3540 self.report_drm(video_id)
11f9be09 3541 pemr = get_first(
3542 playability_statuses,
3543 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
3544 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
3545 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 3546 if subreason:
545cc85d 3547 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 3548 countries = get_first(microformats, 'availableCountries')
545cc85d 3549 if not countries:
3550 regions_allowed = search_meta('regionsAllowed')
3551 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 3552 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 3553 reason += f'. {subreason}'
545cc85d 3554 if reason:
b7da73eb 3555 self.raise_no_formats(reason, expected=True)
bf1317d2 3556
11f9be09 3557 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 3558 if not keywords and webpage:
3559 keywords = [
3560 unescapeHTML(m.group('content'))
3561 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
3562 for keyword in keywords:
3563 if keyword.startswith('yt:stretch='):
201c1459 3564 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
3565 if mobj:
3566 # NB: float is intentional for forcing float division
3567 w, h = (float(v) for v in mobj.groups())
3568 if w > 0 and h > 0:
3569 ratio = w / h
3570 for f in formats:
3571 if f.get('vcodec') != 'none':
3572 f['stretched_ratio'] = ratio
3573 break
a709d873 3574 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
ff2751ac 3575 thumbnail_url = search_meta(['og:image', 'twitter:image'])
3576 if thumbnail_url:
3577 thumbnails.append({
3578 'url': thumbnail_url,
ff2751ac 3579 })
fccf5021 3580 original_thumbnails = thumbnails.copy()
3581
0ba692ac 3582 # The best resolution thumbnails sometimes does not appear in the webpage
bfec31be 3583 # See: https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 3584 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
e820fbaa 3585 thumbnail_names = [
962ffcf8 3586 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
bfec31be 3587 # in resolution, these are not the custom thumbnail. So de-prioritize them
3588 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
3589 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
cca80fe6 3590 ]
cca80fe6 3591 n_thumbnail_names = len(thumbnail_names)
0ba692ac 3592 thumbnails.extend({
3593 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
3594 video_id=video_id, name=name, ext=ext,
3595 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 3596 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 3597 for thumb in thumbnails:
cca80fe6 3598 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 3599 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 3600 self._remove_duplicate_formats(thumbnails)
fccf5021 3601 self._downloader._sort_thumbnails(original_thumbnails)
545cc85d 3602
7ea65411 3603 category = get_first(microformats, 'category') or search_meta('genre')
3604 channel_id = str_or_none(
3605 get_first(video_details, 'channelId')
3606 or get_first(microformats, 'externalChannelId')
3607 or search_meta('channelId'))
7ea65411 3608 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
3609
3610 live_content = get_first(video_details, 'isLiveContent')
3611 is_upcoming = get_first(video_details, 'isUpcoming')
3612 if is_live is None:
3613 if is_upcoming or live_content is False:
3614 is_live = False
3615 if is_upcoming is None and (live_content or is_live):
3616 is_upcoming = False
adbc4ec4
THD
3617 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
3618 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
3619 if not duration and live_end_time and live_start_time:
3620 duration = live_end_time - live_start_time
3621
3622 if is_live and self.get_param('live_from_start'):
3623 self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
7ea65411 3624
720c3099 3625 formats.extend(self._extract_storyboard(player_responses, duration))
3626
31b532a1 3627 # source_preference is lower for throttled/potentially damaged formats
7e798d72 3628 self._sort_formats(formats, (
3629 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))
720c3099 3630
545cc85d 3631 info = {
3632 'id': video_id,
39ca3b5c 3633 'title': video_title,
545cc85d 3634 'formats': formats,
3635 'thumbnails': thumbnails,
fccf5021 3636 # The best thumbnail that we are sure exists. Prevents unnecessary
3637 # URL checking if user don't care about getting the best possible thumbnail
3638 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
545cc85d 3639 'description': video_description,
11f9be09 3640 'uploader': get_first(video_details, 'author'),
545cc85d 3641 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3642 'uploader_url': owner_profile_url,
3643 'channel_id': channel_id,
a70635b8 3644 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),
545cc85d 3645 'duration': duration,
3646 'view_count': int_or_none(
11f9be09 3647 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 3648 or search_meta('interactionCount')),
11f9be09 3649 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 3650 'age_limit': 18 if (
11f9be09 3651 get_first(microformats, 'isFamilySafe') is False
545cc85d 3652 or search_meta('isFamilyFriendly') == 'false'
3653 or search_meta('og:restrictions:age') == '18+') else 0,
3654 'webpage_url': webpage_url,
3655 'categories': [category] if category else None,
3656 'tags': keywords,
11f9be09 3657 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 3658 'is_live': is_live,
3659 'was_live': (False if is_live or is_upcoming or live_content is False
3660 else None if is_live is None or is_upcoming is None
3661 else live_content),
3662 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
adbc4ec4 3663 'release_timestamp': live_start_time,
545cc85d 3664 }
b477fc13 3665
e325a21a 3666 if get_first(video_details, 'isPostLiveDvr'):
3667 self.write_debug('Video is in Post-Live Manifestless mode')
3668 info['live_status'] = 'post_live'
3669 if (duration or 0) > 4 * 3600:
3670 self.report_warning(
3671 'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '
3672 'This is a known issue and patches are welcome')
3673
c646d76f 3674 subtitles = {}
3944e7af 3675 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 3676 if pctr:
ecdc9049 3677 def get_lang_code(track):
3678 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3679 or track.get('languageCode'))
3680
3681 # Converted into dicts to remove duplicates
3682 captions = {
3683 get_lang_code(sub): sub
3684 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3685 translation_languages = {
3686 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3687 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3688
774d79cc 3689 def process_language(container, base_url, lang_code, sub_name, query):
120916da 3690 lang_subs = container.setdefault(lang_code, [])
545cc85d 3691 for fmt in self._SUBTITLE_FORMATS:
3692 query.update({
3693 'fmt': fmt,
3694 })
3695 lang_subs.append({
3696 'ext': fmt,
60f393e4 3697 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
774d79cc 3698 'name': sub_name,
545cc85d 3699 })
7e72694b 3700
07b47084 3701 # NB: Constructing the full subtitle dictionary is slow
3702 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
3703 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
ecdc9049 3704 for lang_code, caption_track in captions.items():
3705 base_url = caption_track.get('baseUrl')
1235d333 3706 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
545cc85d 3707 if not base_url:
3708 continue
ecdc9049 3709 lang_name = self._get_text(caption_track, 'name', max_runs=1)
545cc85d 3710 if caption_track.get('kind') != 'asr':
545cc85d 3711 if not lang_code:
3712 continue
3713 process_language(
ecdc9049 3714 subtitles, base_url, lang_code, lang_name, {})
3715 if not caption_track.get('isTranslatable'):
3716 continue
3944e7af 3717 for trans_code, trans_name in translation_languages.items():
3718 if not trans_code:
545cc85d 3719 continue
1235d333 3720 orig_trans_code = trans_code
ecdc9049 3721 if caption_track.get('kind') != 'asr':
07b47084 3722 if not get_translated_subs:
18e49408 3723 continue
ecdc9049 3724 trans_code += f'-{lang_code}'
a70635b8 3725 trans_name += format_field(lang_name, None, ' from %s')
d49669ac 3726 # Add an "-orig" label to the original language so that it can be distinguished.
3727 # The subs are returned without "-orig" as well for compatibility
1235d333 3728 if lang_code == f'a-{orig_trans_code}':
0c8d9e5f 3729 process_language(
d49669ac 3730 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
3731 # Setting tlang=lang returns damaged subtitles.
d49669ac 3732 process_language(automatic_captions, base_url, trans_code, trans_name,
1235d333 3733 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
c646d76f 3734
3735 info['automatic_captions'] = automatic_captions
3736 info['subtitles'] = subtitles
7e72694b 3737
14f25df2 3738 parsed_url = urllib.parse.urlparse(url)
545cc85d 3739 for component in [parsed_url.fragment, parsed_url.query]:
14f25df2 3740 query = urllib.parse.parse_qs(component)
545cc85d 3741 for k, v in query.items():
3742 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3743 d_k += '_time'
3744 if d_k not in info and k in s_ks:
3745 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
3746
3747 # Youtube Music Auto-generated description
822b9d9c 3748 if video_description:
1890fc63 3749 mobj = re.search(
3750 r'''(?xs)
3751 (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
3752 (?P<album>[^\n]+)
3753 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
3754 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
3755 (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
3756 .+\nAuto-generated\ by\ YouTube\.\s*$
3757 ''', video_description)
822b9d9c 3758 if mobj:
822b9d9c
RA
3759 release_year = mobj.group('release_year')
3760 release_date = mobj.group('release_date')
3761 if release_date:
3762 release_date = release_date.replace('-', '')
3763 if not release_year:
545cc85d 3764 release_year = release_date[:4]
3765 info.update({
3766 'album': mobj.group('album'.strip()),
3767 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3768 'track': mobj.group('track').strip(),
3769 'release_date': release_date,
cc2db878 3770 'release_year': int_or_none(release_year),
545cc85d 3771 })
7e72694b 3772
545cc85d 3773 initial_data = None
3774 if webpage:
56ba69e4 3775 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
545cc85d 3776 if not initial_data:
99e9e001 3777 query = {'videoId': video_id}
3778 query.update(self._get_checkok_params())
109dd3b2 3779 initial_data = self._extract_response(
3780 item_id=video_id, ep='next', fatal=False,
99e9e001 3781 ytcfg=master_ytcfg, query=query,
3782 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 3783 note='Downloading initial data API JSON')
545cc85d 3784
0df111a3 3785 info['comment_count'] = traverse_obj(initial_data, (
3786 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
3787 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'
3788 ), (
3789 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
3790 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'
3791 ), expected_type=int_or_none, get_all=False)
3792
19a03940 3793 try: # This will error if there is no livechat
c60ee3a2 3794 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
19a03940 3795 except (KeyError, IndexError, TypeError):
3796 pass
3797 else:
ecdc9049 3798 info.setdefault('subtitles', {})['live_chat'] = [{
4ce05f57 3799 # url is needed to set cookies
3800 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
c60ee3a2 3801 'video_id': video_id,
3802 'ext': 'json',
f6745c49 3803 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 3804 }]
545cc85d 3805
3806 if initial_data:
7c365c21 3807 info['chapters'] = (
3808 self._extract_chapters_from_json(initial_data, duration)
3809 or self._extract_chapters_from_engagement_panel(initial_data, duration)
0fe51254 3810 or self._extract_chapters_from_description(video_description, duration)
7c365c21 3811 or None)
545cc85d 3812
17322130 3813 contents = traverse_obj(
3814 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
3815 expected_type=list, default=[])
3816
3817 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
3818 if vpir:
3819 stl = vpir.get('superTitleLink')
3820 if stl:
3821 stl = self._get_text(stl)
3822 if try_get(
3823 vpir,
3824 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3825 info['location'] = stl
3826 else:
affc4fef 3827 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
17322130 3828 if mobj:
545cc85d 3829 info.update({
17322130 3830 'series': mobj.group(1),
3831 'season_number': int(mobj.group(2)),
3832 'episode_number': int(mobj.group(3)),
545cc85d 3833 })
17322130 3834 for tlb in (try_get(
3835 vpir,
3836 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3837 list) or []):
3838 tbr = tlb.get('toggleButtonRenderer') or {}
3839 for getter, regex in [(
3840 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3841 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3842 lambda x: x['accessibility'],
3843 lambda x: x['accessibilityData']['accessibilityData'],
3844 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3845 label = (try_get(tbr, getter, dict) or {}).get('label')
3846 if label:
3847 mobj = re.match(regex, label)
3848 if mobj:
3849 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
545cc85d 3850 break
17322130 3851 sbr_tooltip = try_get(
3852 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3853 if sbr_tooltip:
3854 like_count, dislike_count = sbr_tooltip.split(' / ')
3855 info.update({
3856 'like_count': str_to_int(like_count),
3857 'dislike_count': str_to_int(dislike_count),
3858 })
3859 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
3860 if vsir:
3861 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
3862 info.update({
3863 'channel': self._get_text(vor, 'title'),
3864 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
3865
3866 rows = try_get(
3867 vsir,
3868 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3869 list) or []
3870 multiple_songs = False
3871 for row in rows:
3872 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3873 multiple_songs = True
3874 break
3875 for row in rows:
3876 mrr = row.get('metadataRowRenderer') or {}
3877 mrr_title = mrr.get('title')
3878 if not mrr_title:
3879 continue
3880 mrr_title = self._get_text(mrr, 'title')
3881 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3882 if mrr_title == 'License':
3883 info['license'] = mrr_contents_text
3884 elif not multiple_songs:
3885 if mrr_title == 'Album':
3886 info['album'] = mrr_contents_text
3887 elif mrr_title == 'Artist':
3888 info['artist'] = mrr_contents_text
3889 elif mrr_title == 'Song':
3890 info['track'] = mrr_contents_text
545cc85d 3891
3892 fallbacks = {
3893 'channel': 'uploader',
3894 'channel_id': 'uploader_id',
3895 'channel_url': 'uploader_url',
3896 }
992f9a73 3897
17322130 3898 # The upload date for scheduled, live and past live streams / premieres in microformats
3899 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
992f9a73 3900 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
17322130 3901 upload_date = (
3902 unified_strdate(get_first(microformats, 'uploadDate'))
3903 or unified_strdate(search_meta('uploadDate')))
3904 if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):
6e634cbe 3905 upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date
17322130 3906 info['upload_date'] = upload_date
992f9a73 3907
545cc85d 3908 for to, frm in fallbacks.items():
3909 if not info.get(to):
3910 info[to] = info.get(frm)
3911
3912 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3913 v = info.get(s_k)
3914 if v:
3915 info[d_k] = v
b84071c0 3916
11f9be09 3917 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3918 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3919 is_membersonly = None
b28f8d24 3920 is_premium = None
c224251a
M
3921 if initial_data and is_private is not None:
3922 is_membersonly = False
b28f8d24 3923 is_premium = False
47193e02 3924 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3925 badge_labels = set()
3926 for content in contents:
3927 if not isinstance(content, dict):
3928 continue
3929 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3930 for badge_label in badge_labels:
3931 if badge_label.lower() == 'members only':
3932 is_membersonly = True
3933 elif badge_label.lower() == 'premium':
3934 is_premium = True
3935 elif badge_label.lower() == 'unlisted':
3936 is_unlisted = True
c224251a 3937
c224251a
M
3938 info['availability'] = self._availability(
3939 is_private=is_private,
b28f8d24 3940 needs_premium=is_premium,
c224251a
M
3941 needs_subscription=is_membersonly,
3942 needs_auth=info['age_limit'] >= 18,
3943 is_unlisted=None if is_private is None else is_unlisted)
3944
a2160aa4 3945 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3946
11f9be09 3947 self.mark_watched(video_id, player_responses)
d77ab8e2 3948
545cc85d 3949 return info
c5e8d7af 3950
a61fd4cf 3951
a6213a49 3952class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
5f6a1245 3953
182bda88 3954 @staticmethod
3955 def passthrough_smuggled_data(func):
3956 def _smuggle(entries, smuggled_data):
3957 for entry in entries:
3958 # TODO: Convert URL to music.youtube instead.
3959 # Do we need to passthrough any other smuggled_data?
3960 entry['url'] = smuggle_url(entry['url'], smuggled_data)
3961 yield entry
3962
3963 @functools.wraps(func)
3964 def wrapper(self, url):
3965 url, smuggled_data = unsmuggle_url(url, {})
3966 if self.is_music_url(url):
3967 smuggled_data['is_music_url'] = True
3968 info_dict = func(self, url, smuggled_data)
3969 if smuggled_data and info_dict.get('entries'):
3970 info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)
3971 return info_dict
3972 return wrapper
3973
a6213a49 3974 def _extract_channel_id(self, webpage):
3975 channel_id = self._html_search_meta(
3976 'channelId', webpage, 'channel id', default=None)
3977 if channel_id:
3978 return channel_id
3979 channel_url = self._html_search_meta(
3980 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3981 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3982 'twitter:app:url:googleplay'), webpage, 'channel url')
3983 return self._search_regex(
3984 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3985 channel_url, 'channel id')
15f6397c 3986
8bdd16b4 3987 @staticmethod
cd7c66cf 3988 def _extract_basic_item_renderer(item):
3989 # Modified from _extract_grid_item_renderer
201c1459 3990 known_basic_renderers = (
a17526e4 3991 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
cd7c66cf 3992 )
3993 for key, renderer in item.items():
201c1459 3994 if not isinstance(renderer, dict):
cd7c66cf 3995 continue
201c1459 3996 elif key in known_basic_renderers:
3997 return renderer
3998 elif key.startswith('grid') and key.endswith('Renderer'):
3999 return renderer
8bdd16b4 4000
8bdd16b4 4001 def _grid_entries(self, grid_renderer):
4002 for item in grid_renderer['items']:
4003 if not isinstance(item, dict):
39b62db1 4004 continue
cd7c66cf 4005 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 4006 if not isinstance(renderer, dict):
4007 continue
052e1350 4008 title = self._get_text(renderer, 'title')
fe93e2c4 4009
8bdd16b4 4010 # playlist
4011 playlist_id = renderer.get('playlistId')
4012 if playlist_id:
4013 yield self.url_result(
4014 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4015 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4016 video_title=title)
201c1459 4017 continue
8bdd16b4 4018 # video
4019 video_id = renderer.get('videoId')
4020 if video_id:
4021 yield self._extract_video(renderer)
201c1459 4022 continue
8bdd16b4 4023 # channel
4024 channel_id = renderer.get('channelId')
4025 if channel_id:
8bdd16b4 4026 yield self.url_result(
4027 'https://www.youtube.com/channel/%s' % channel_id,
4028 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 4029 continue
4030 # generic endpoint URL support
4031 ep_url = urljoin('https://www.youtube.com/', try_get(
4032 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4033 str))
201c1459 4034 if ep_url:
4035 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4036 if ie.suitable(ep_url):
4037 yield self.url_result(
4038 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4039 break
8bdd16b4 4040
16aa9ea4 4041 def _music_reponsive_list_entry(self, renderer):
4042 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4043 if video_id:
4044 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
4045 ie=YoutubeIE.ie_key(), video_id=video_id)
4046 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4047 if playlist_id:
4048 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4049 if video_id:
4050 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4051 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4052 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4053 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4054 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4055 if browse_id:
4056 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4057 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4058
3d3dddc9 4059 def _shelf_entries_from_content(self, shelf_renderer):
4060 content = shelf_renderer.get('content')
4061 if not isinstance(content, dict):
8bdd16b4 4062 return
cd7c66cf 4063 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 4064 if renderer:
4065 # TODO: add support for nested playlists so each shelf is processed
4066 # as separate playlist
4067 # TODO: this includes only first N items
86e5f3ed 4068 yield from self._grid_entries(renderer)
3d3dddc9 4069 renderer = content.get('horizontalListRenderer')
4070 if renderer:
4071 # TODO
4072 pass
8bdd16b4 4073
29f7c58a 4074 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 4075 ep = try_get(
4076 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4077 str)
8bdd16b4 4078 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 4079 if shelf_url:
29f7c58a 4080 # Skipping links to another channels, note that checking for
4081 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4082 # will not work
4083 if skip_channels and '/channels?' in shelf_url:
4084 return
052e1350 4085 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 4086 yield self.url_result(shelf_url, video_title=title)
4087 # Shelf may not contain shelf URL, fallback to extraction from content
86e5f3ed 4088 yield from self._shelf_entries_from_content(shelf_renderer)
c5e8d7af 4089
8bdd16b4 4090 def _playlist_entries(self, video_list_renderer):
4091 for content in video_list_renderer['contents']:
4092 if not isinstance(content, dict):
4093 continue
4094 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4095 if not isinstance(renderer, dict):
4096 continue
4097 video_id = renderer.get('videoId')
4098 if not video_id:
4099 continue
4100 yield self._extract_video(renderer)
07aeced6 4101
3462ffa8 4102 def _rich_entries(self, rich_grid_renderer):
4103 renderer = try_get(
70d5c17b 4104 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 4105 video_id = renderer.get('videoId')
4106 if not video_id:
4107 return
4108 yield self._extract_video(renderer)
4109
8bdd16b4 4110 def _video_entry(self, video_renderer):
4111 video_id = video_renderer.get('videoId')
4112 if video_id:
4113 return self._extract_video(video_renderer)
dacb3a86 4114
ad210f4f 4115 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4116 url = urljoin('https://youtube.com', traverse_obj(
4117 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4118 if url:
4119 return self.url_result(
4120 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4121
8bdd16b4 4122 def _post_thread_entries(self, post_thread_renderer):
4123 post_renderer = try_get(
4124 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4125 if not post_renderer:
4126 return
4127 # video attachment
4128 video_renderer = try_get(
895b0931 4129 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4130 video_id = video_renderer.get('videoId')
4131 if video_id:
4132 entry = self._extract_video(video_renderer)
8bdd16b4 4133 if entry:
4134 yield entry
895b0931 4135 # playlist attachment
4136 playlist_id = try_get(
14f25df2 4137 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
895b0931 4138 if playlist_id:
4139 yield self.url_result(
e28f1c0a 4140 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4141 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4142 # inline video links
4143 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4144 for run in runs:
4145 if not isinstance(run, dict):
4146 continue
4147 ep_url = try_get(
14f25df2 4148 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
8bdd16b4 4149 if not ep_url:
4150 continue
4151 if not YoutubeIE.suitable(ep_url):
4152 continue
4153 ep_video_id = YoutubeIE._match_id(ep_url)
4154 if video_id == ep_video_id:
4155 continue
895b0931 4156 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 4157
8bdd16b4 4158 def _post_thread_continuation_entries(self, post_thread_continuation):
4159 contents = post_thread_continuation.get('contents')
4160 if not isinstance(contents, list):
4161 return
4162 for content in contents:
4163 renderer = content.get('backstagePostThreadRenderer')
6b0b0a28 4164 if isinstance(renderer, dict):
4165 yield from self._post_thread_entries(renderer)
8bdd16b4 4166 continue
6b0b0a28 4167 renderer = content.get('videoRenderer')
4168 if isinstance(renderer, dict):
4169 yield self._video_entry(renderer)
07aeced6 4170
39ed931e 4171 r''' # unused
4172 def _rich_grid_entries(self, contents):
4173 for content in contents:
4174 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4175 if video_renderer:
4176 entry = self._video_entry(video_renderer)
4177 if entry:
4178 yield entry
4179 '''
52efa4b3 4180
a6213a49 4181 def _extract_entries(self, parent_renderer, continuation_list):
4182 # continuation_list is modified in-place with continuation_list = [continuation_token]
4183 continuation_list[:] = [None]
4184 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4185 for content in contents:
4186 if not isinstance(content, dict):
4187 continue
16aa9ea4 4188 is_renderer = traverse_obj(
4189 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4190 expected_type=dict)
a6213a49 4191 if not is_renderer:
4192 renderer = content.get('richItemRenderer')
4193 if renderer:
4194 for entry in self._rich_entries(renderer):
4195 yield entry
4196 continuation_list[0] = self._extract_continuation(parent_renderer)
4197 continue
4198 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4199 for isr_content in isr_contents:
4200 if not isinstance(isr_content, dict):
8bdd16b4 4201 continue
69184e41 4202
a6213a49 4203 known_renderers = {
4204 'playlistVideoListRenderer': self._playlist_entries,
4205 'gridRenderer': self._grid_entries,
a17526e4 4206 'reelShelfRenderer': self._grid_entries,
4207 'shelfRenderer': self._shelf_entries,
16aa9ea4 4208 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
a6213a49 4209 'backstagePostThreadRenderer': self._post_thread_entries,
4210 'videoRenderer': lambda x: [self._video_entry(x)],
a61fd4cf 4211 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4212 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
ad210f4f 4213 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
a6213a49 4214 }
4215 for key, renderer in isr_content.items():
4216 if key not in known_renderers:
4217 continue
4218 for entry in known_renderers[key](renderer):
4219 if entry:
4220 yield entry
4221 continuation_list[0] = self._extract_continuation(renderer)
4222 break
70d5c17b 4223
4224 if not continuation_list[0]:
a6213a49 4225 continuation_list[0] = self._extract_continuation(is_renderer)
3462ffa8 4226
a6213a49 4227 if not continuation_list[0]:
4228 continuation_list[0] = self._extract_continuation(parent_renderer)
4229
4230 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4231 continuation_list = [None]
4232 extract_entries = lambda x: self._extract_entries(x, continuation_list)
29f7c58a 4233 tab_content = try_get(tab, lambda x: x['content'], dict)
4234 if not tab_content:
4235 return
3462ffa8 4236 parent_renderer = (
29f7c58a 4237 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4238 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
86e5f3ed 4239 yield from extract_entries(parent_renderer)
3462ffa8 4240 continuation = continuation_list[0]
d069eca7 4241
8bdd16b4 4242 for page_num in itertools.count(1):
4243 if not continuation:
4244 break
99e9e001 4245 headers = self.generate_api_headers(
4246 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 4247 response = self._extract_response(
86e5f3ed 4248 item_id=f'{item_id} page {page_num}',
fe93e2c4 4249 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 4250 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
4251
4252 if not response:
8bdd16b4 4253 break
ac56cf38 4254 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4255 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4256 visitor_data = self._extract_visitor_data(response) or visitor_data
ebf1b291 4257
69184e41 4258 known_continuation_renderers = {
4259 'playlistVideoListContinuation': self._playlist_entries,
4260 'gridContinuation': self._grid_entries,
4261 'itemSectionContinuation': self._post_thread_continuation_entries,
4262 'sectionListContinuation': extract_entries, # for feeds
4263 }
8bdd16b4 4264 continuation_contents = try_get(
69184e41 4265 response, lambda x: x['continuationContents'], dict) or {}
4266 continuation_renderer = None
4267 for key, value in continuation_contents.items():
4268 if key not in known_continuation_renderers:
3462ffa8 4269 continue
69184e41 4270 continuation_renderer = value
4271 continuation_list = [None]
86e5f3ed 4272 yield from known_continuation_renderers[key](continuation_renderer)
69184e41 4273 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
4274 break
4275 if continuation_renderer:
4276 continue
c5e8d7af 4277
a1b535bd 4278 known_renderers = {
e4b98809 4279 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
a1b535bd 4280 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4281 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 4282 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 4283 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 4284 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 4285 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 4286 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 4287 }
cce889b9 4288 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 4289 continuation_items = try_get(
cce889b9 4290 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 4291 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
4292 video_items_renderer = None
4293 for key, value in continuation_item.items():
4294 if key not in known_renderers:
8bdd16b4 4295 continue
a1b535bd 4296 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 4297 continuation_list = [None]
86e5f3ed 4298 yield from known_renderers[key][0](video_items_renderer)
9ba5705a 4299 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 4300 break
4301 if video_items_renderer:
4302 continue
8bdd16b4 4303 break
9558dcec 4304
8bdd16b4 4305 @staticmethod
7c219ea6 4306 def _extract_selected_tab(tabs, fatal=True):
8bdd16b4 4307 for tab in tabs:
cd684175 4308 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
4309 if renderer.get('selected') is True:
4310 return renderer
2b3c2546 4311 else:
7c219ea6 4312 if fatal:
4313 raise ExtractorError('Unable to find selected tab')
b82f815f 4314
61d3665d 4315 def _extract_uploader(self, data):
8bdd16b4 4316 uploader = {}
61d3665d 4317 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
47193e02 4318 owner = try_get(
4319 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
4320 if owner:
61d3665d 4321 owner_text = owner.get('text')
4322 uploader['uploader'] = self._search_regex(
4323 r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)
47193e02 4324 uploader['uploader_id'] = try_get(
14f25df2 4325 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)
47193e02 4326 uploader['uploader_url'] = urljoin(
4327 'https://www.youtube.com/',
14f25df2 4328 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))
9c3fe2ef 4329 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 4330
ac56cf38 4331 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
b60419c5 4332 playlist_id = title = description = channel_url = channel_name = channel_id = None
ac56cf38 4333 tags = []
b60419c5 4334
8bdd16b4 4335 selected_tab = self._extract_selected_tab(tabs)
f0d785d3 4336 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
8bdd16b4 4337 renderer = try_get(
4338 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
4339 if renderer:
b60419c5 4340 channel_name = renderer.get('title')
4341 channel_url = renderer.get('channelUrl')
4342 channel_id = renderer.get('externalId')
39ed931e 4343 else:
64c0d954 4344 renderer = try_get(
4345 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 4346
8bdd16b4 4347 if renderer:
4348 title = renderer.get('title')
ecc97af3 4349 description = renderer.get('description', '')
b60419c5 4350 playlist_id = channel_id
4351 tags = renderer.get('keywords', '').split()
b60419c5 4352
301d07fc 4353 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
4354 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
4355 def _get_uncropped(url):
4356 return url_or_none((url or '').split('=')[0] + '=s0')
4357
4358 avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')
4359 if avatar_thumbnails:
4360 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
4361 if uncropped_avatar:
4362 avatar_thumbnails.append({
4363 'url': uncropped_avatar,
4364 'id': 'avatar_uncropped',
4365 'preference': 1
4366 })
4367
4368 channel_banners = self._extract_thumbnails(
4369 data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))
4370 for banner in channel_banners:
4371 banner['preference'] = -10
4372
4373 if channel_banners:
4374 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
4375 if uncropped_banner:
4376 channel_banners.append({
4377 'url': uncropped_banner,
4378 'id': 'banner_uncropped',
4379 'preference': -5
4380 })
4381
4382 primary_thumbnails = self._extract_thumbnails(
a17526e4 4383 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
a709d873 4384
3462ffa8 4385 if playlist_id is None:
70d5c17b 4386 playlist_id = item_id
f0d785d3 4387
4388 playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')
4389 last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)
70d5c17b 4390 if title is None:
f0d785d3 4391 title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id
b60419c5 4392 title += format_field(selected_tab, 'title', ' - %s')
cd684175 4393 title += format_field(selected_tab, 'expandedText', ' - %s')
f0d785d3 4394
b60419c5 4395 metadata = {
4396 'playlist_id': playlist_id,
4397 'playlist_title': title,
4398 'playlist_description': description,
4399 'uploader': channel_name,
4400 'uploader_id': channel_id,
4401 'uploader_url': channel_url,
301d07fc 4402 'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,
b60419c5 4403 'tags': tags,
f0d785d3 4404 'view_count': self._get_count(playlist_stats, 1),
4405 'availability': self._extract_availability(data),
4406 'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),
6c73052c 4407 'playlist_count': self._get_count(playlist_stats, 0),
4408 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
b60419c5 4409 }
4410 if not channel_id:
4411 metadata.update(self._extract_uploader(data))
4412 metadata.update({
4413 'channel': metadata['uploader'],
4414 'channel_id': metadata['uploader_id'],
4415 'channel_url': metadata['uploader_url']})
4416 return self.playlist_result(
d069eca7 4417 self._entries(
ac56cf38 4418 selected_tab, playlist_id, ytcfg,
4419 self._extract_account_syncid(ytcfg, data),
4420 self._extract_visitor_data(data, ytcfg)),
b60419c5 4421 **metadata)
73c4ac2c 4422
6e634cbe 4423 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
ac56cf38 4424 first_id = last_id = response = None
2be71994 4425 for page_num in itertools.count(1):
cd7c66cf 4426 videos = list(self._playlist_entries(playlist))
4427 if not videos:
4428 return
2be71994 4429 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4430 if start >= len(videos):
4431 return
24146491 4432 yield from videos[start:]
2be71994 4433 first_id = first_id or videos[0]['id']
4434 last_id = videos[-1]['id']
79360d99 4435 watch_endpoint = try_get(
4436 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
ac56cf38 4437 headers = self.generate_api_headers(
4438 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4439 visitor_data=self._extract_visitor_data(response, data, ytcfg))
79360d99 4440 query = {
4441 'playlistId': playlist_id,
4442 'videoId': watch_endpoint.get('videoId') or last_id,
4443 'index': watch_endpoint.get('index') or len(videos),
4444 'params': watch_endpoint.get('params') or 'OAE%3D'
4445 }
4446 response = self._extract_response(
4447 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 4448 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 4449 check_get_keys='contents'
4450 )
cd7c66cf 4451 playlist = try_get(
79360d99 4452 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 4453
ac56cf38 4454 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
8bdd16b4 4455 title = playlist.get('title') or try_get(
14f25df2 4456 data, lambda x: x['titleText']['simpleText'], str)
8bdd16b4 4457 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 4458
4459 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 4460 playlist_url = urljoin(url, try_get(
4461 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4462 str))
6e634cbe 4463
4464 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
4465 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
4466 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
4467
4468 if playlist_url and playlist_url != url and not is_known_unviewable:
29f7c58a 4469 return self.url_result(
4470 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4471 video_title=title)
cd7c66cf 4472
8bdd16b4 4473 return self.playlist_result(
6e634cbe 4474 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
cd7c66cf 4475 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4476
47193e02 4477 def _extract_availability(self, data):
4478 """
4479 Gets the availability of a given playlist/tab.
4480 Note: Unless YouTube tells us explicitly, we do not assume it is public
4481 @param data: response
4482 """
4483 is_private = is_unlisted = None
4484 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4485 badge_labels = self._extract_badges(renderer)
4486
4487 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4488 privacy_dropdown_entries = try_get(
4489 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4490 for renderer_dict in privacy_dropdown_entries:
4491 is_selected = try_get(
4492 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4493 if not is_selected:
4494 continue
052e1350 4495 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
47193e02 4496 if label:
4497 badge_labels.add(label.lower())
4498 break
4499
4500 for badge_label in badge_labels:
4501 if badge_label == 'unlisted':
4502 is_unlisted = True
4503 elif badge_label == 'private':
4504 is_private = True
4505 elif badge_label == 'public':
4506 is_unlisted = is_private = False
4507 return self._availability(is_private, False, False, False, is_unlisted)
4508
4509 @staticmethod
4510 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4511 sidebar_renderer = try_get(
4512 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4513 for item in sidebar_renderer:
4514 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4515 if renderer:
4516 return renderer
4517
ac56cf38 4518 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
358de58c 4519 """
4520 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4521 """
5d342002 4522 browse_id = params = None
47193e02 4523 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4524 if not renderer:
4525 return
4526 menu_renderer = try_get(
4527 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4528 for menu_item in menu_renderer:
4529 if not isinstance(menu_item, dict):
358de58c 4530 continue
47193e02 4531 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4532 text = try_get(
14f25df2 4533 nav_item_renderer, lambda x: x['text']['simpleText'], str)
47193e02 4534 if not text or text.lower() != 'show unavailable videos':
4535 continue
4536 browse_endpoint = try_get(
4537 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4538 browse_id = browse_endpoint.get('browseId')
4539 params = browse_endpoint.get('params')
4540 break
5d342002 4541
11f9be09 4542 headers = self.generate_api_headers(
99e9e001 4543 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
ac56cf38 4544 visitor_data=self._extract_visitor_data(data, ytcfg))
47193e02 4545 query = {
4546 'params': params or 'wgYCCAA=',
4547 'browseId': browse_id or 'VL%s' % item_id
4548 }
4549 return self._extract_response(
4550 item_id=item_id, headers=headers, query=query,
fe93e2c4 4551 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4552 note='Downloading API JSON with unavailable videos')
358de58c 4553
2762dbb1 4554 @functools.cached_property
a25bca9f 4555 def skip_webpage(self):
4556 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
4557
ac56cf38 4558 def _extract_webpage(self, url, item_id, fatal=True):
be5c1ae8 4559 webpage, data = None, None
4560 for retry in self.RetryManager(fatal=fatal):
ac56cf38 4561 try:
be5c1ae8 4562 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
ac56cf38 4563 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4564 except ExtractorError as e:
4565 if isinstance(e.cause, network_exceptions):
14f25df2 4566 if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
be5c1ae8 4567 retry.error = e
4568 continue
4569 self._error_or_warning(e, fatal=fatal)
14fdfea9 4570 break
ac56cf38 4571
be5c1ae8 4572 try:
4573 self._extract_and_report_alerts(data)
4574 except ExtractorError as e:
4575 self._error_or_warning(e, fatal=fatal)
4576 break
ac56cf38 4577
be5c1ae8 4578 # Sometimes youtube returns a webpage with incomplete ytInitialData
4579 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4580 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
4581 retry.error = ExtractorError('Incomplete yt initial data received')
4582 continue
ac56cf38 4583
cd7c66cf 4584 return webpage, data
4585
a25bca9f 4586 def _report_playlist_authcheck(self, ytcfg, fatal=True):
4587 """Use if failed to extract ytcfg (and data) from initial webpage"""
4588 if not ytcfg and self.is_authenticated:
4589 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
4590 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
4591 raise ExtractorError(
4592 f'{msg}. If you are not downloading private content, or '
4593 'your cookies are only for the first account and channel,'
4594 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4595 expected=True)
4596 self.report_warning(msg, only_once=True)
4597
ac56cf38 4598 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4599 data = None
a25bca9f 4600 if not self.skip_webpage:
ac56cf38 4601 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4602 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
1108613f 4603 # Reject webpage data if redirected to home page without explicitly requesting
4604 selected_tab = self._extract_selected_tab(traverse_obj(
7c219ea6 4605 data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}
1108613f 4606 if (url != 'https://www.youtube.com/feed/recommended'
4607 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
4608 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
4609 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
4610 if fatal:
4611 raise ExtractorError(msg, expected=True)
4612 self.report_warning(msg, only_once=True)
ac56cf38 4613 if not data:
a25bca9f 4614 self._report_playlist_authcheck(ytcfg, fatal=fatal)
ac56cf38 4615 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4616 return data, ytcfg
4617
4618 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4619 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4620 resolve_response = self._extract_response(
4621 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4622 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4623 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4624 for ep_key, ep in endpoints.items():
4625 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4626 if params:
4627 return self._extract_response(
4628 item_id=item_id, query=params, ep=ep, headers=headers,
4629 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
7c219ea6 4630 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
ac56cf38 4631 err_note = 'Failed to resolve url (does the playlist exist?)'
4632 if fatal:
4633 raise ExtractorError(err_note, expected=True)
4634 self.report_warning(err_note, item_id)
4635
a6213a49 4636 _SEARCH_PARAMS = None
4637
af5c1c55 4638 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
a6213a49 4639 data = {'query': query}
4640 if params is NO_DEFAULT:
4641 params = self._SEARCH_PARAMS
4642 if params:
4643 data['params'] = params
16aa9ea4 4644
4645 content_keys = (
4646 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
4647 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
4648 # ytmusic search
4649 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
4650 ('continuationContents', ),
4651 )
a25bca9f 4652 display_id = f'query "{query}"'
86e5f3ed 4653 check_get_keys = tuple({keys[0] for keys in content_keys})
a25bca9f 4654 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
4655 self._report_playlist_authcheck(ytcfg, fatal=False)
16aa9ea4 4656
a61fd4cf 4657 continuation_list = [None]
a25bca9f 4658 search = None
a6213a49 4659 for page_num in itertools.count(1):
a61fd4cf 4660 data.update(continuation_list[0] or {})
a25bca9f 4661 headers = self.generate_api_headers(
4662 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
a6213a49 4663 search = self._extract_response(
a25bca9f 4664 item_id=f'{display_id} page {page_num}', ep='search', query=data,
4665 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
16aa9ea4 4666 slr_contents = traverse_obj(search, *content_keys)
4667 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
a61fd4cf 4668 if not continuation_list[0]:
a6213a49 4669 break
4670
4671
4672class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
4673 IE_DESC = 'YouTube Tabs'
4674 _VALID_URL = r'''(?x:
4675 https?://
4676 (?:\w+\.)?
4677 (?:
4678 youtube(?:kids)?\.com|
4679 %(invidious)s
4680 )/
4681 (?:
4682 (?P<channel_type>channel|c|user|browse)/|
4683 (?P<not_channel>
4684 feed/|hashtag/|
4685 (?:playlist|watch)\?.*?\blist=
4686 )|
4687 (?!(?:%(reserved_names)s)\b) # Direct URLs
4688 )
4689 (?P<id>[^/?\#&]+)
4690 )''' % {
4691 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
4692 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4693 }
4694 IE_NAME = 'youtube:tab'
4695
4696 _TESTS = [{
4697 'note': 'playlists, multipage',
4698 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
4699 'playlist_mincount': 94,
4700 'info_dict': {
4701 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4702 'title': 'Igor Kleiner - Playlists',
a6213a49 4703 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
976ae3ea 4704 'uploader': 'Igor Kleiner',
a6213a49 4705 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4706 'channel': 'Igor Kleiner',
4707 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4708 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4709 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4710 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 4711 'channel_follower_count': int
a6213a49 4712 },
4713 }, {
4714 'note': 'playlists, multipage, different order',
4715 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
4716 'playlist_mincount': 94,
4717 'info_dict': {
4718 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4719 'title': 'Igor Kleiner - Playlists',
a6213a49 4720 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4721 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4722 'uploader': 'Igor Kleiner',
4723 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4724 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4725 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4726 'channel': 'Igor Kleiner',
4727 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 4728 'channel_follower_count': int
a6213a49 4729 },
4730 }, {
4731 'note': 'playlists, series',
4732 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
4733 'playlist_mincount': 5,
4734 'info_dict': {
4735 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4736 'title': '3Blue1Brown - Playlists',
4737 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4738 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4739 'uploader': '3Blue1Brown',
976ae3ea 4740 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4741 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4742 'channel': '3Blue1Brown',
4743 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
4744 'tags': ['Mathematics'],
6c73052c 4745 'channel_follower_count': int
a6213a49 4746 },
4747 }, {
4748 'note': 'playlists, singlepage',
4749 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
4750 'playlist_mincount': 4,
4751 'info_dict': {
4752 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4753 'title': 'ThirstForScience - Playlists',
4754 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
4755 'uploader': 'ThirstForScience',
4756 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
976ae3ea 4757 'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4758 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4759 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4760 'tags': 'count:13',
4761 'channel': 'ThirstForScience',
6c73052c 4762 'channel_follower_count': int
a6213a49 4763 }
4764 }, {
4765 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
4766 'only_matching': True,
4767 }, {
4768 'note': 'basic, single video playlist',
4769 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4770 'info_dict': {
4771 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4772 'uploader': 'Sergey M.',
4773 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4774 'title': 'youtube-dl public playlist',
976ae3ea 4775 'description': '',
4776 'tags': [],
4777 'view_count': int,
4778 'modified_date': '20201130',
4779 'channel': 'Sergey M.',
4780 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4781 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4782 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
a6213a49 4783 },
4784 'playlist_count': 1,
4785 }, {
4786 'note': 'empty playlist',
4787 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4788 'info_dict': {
4789 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4790 'uploader': 'Sergey M.',
4791 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4792 'title': 'youtube-dl empty playlist',
976ae3ea 4793 'tags': [],
4794 'channel': 'Sergey M.',
4795 'description': '',
4796 'modified_date': '20160902',
4797 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4798 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4799 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
a6213a49 4800 },
4801 'playlist_count': 0,
4802 }, {
4803 'note': 'Home tab',
4804 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
4805 'info_dict': {
4806 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4807 'title': 'lex will - Home',
4808 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4809 'uploader': 'lex will',
4810 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4811 'channel': 'lex will',
4812 'tags': ['bible', 'history', 'prophesy'],
4813 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4814 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4815 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 4816 'channel_follower_count': int
a6213a49 4817 },
4818 'playlist_mincount': 2,
4819 }, {
4820 'note': 'Videos tab',
4821 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
4822 'info_dict': {
4823 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4824 'title': 'lex will - Videos',
4825 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4826 'uploader': 'lex will',
4827 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4828 'tags': ['bible', 'history', 'prophesy'],
4829 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4830 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4831 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4832 'channel': 'lex will',
6c73052c 4833 'channel_follower_count': int
a6213a49 4834 },
4835 'playlist_mincount': 975,
4836 }, {
4837 'note': 'Videos tab, sorted by popular',
4838 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
4839 'info_dict': {
4840 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4841 'title': 'lex will - Videos',
4842 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4843 'uploader': 'lex will',
4844 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4845 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4846 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4847 'channel': 'lex will',
4848 'tags': ['bible', 'history', 'prophesy'],
4849 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 4850 'channel_follower_count': int
a6213a49 4851 },
4852 'playlist_mincount': 199,
4853 }, {
4854 'note': 'Playlists tab',
4855 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
4856 'info_dict': {
4857 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4858 'title': 'lex will - Playlists',
4859 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4860 'uploader': 'lex will',
4861 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4862 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4863 'channel': 'lex will',
4864 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4865 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4866 'tags': ['bible', 'history', 'prophesy'],
6c73052c 4867 'channel_follower_count': int
a6213a49 4868 },
4869 'playlist_mincount': 17,
4870 }, {
4871 'note': 'Community tab',
4872 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
4873 'info_dict': {
4874 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4875 'title': 'lex will - Community',
4876 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4877 'uploader': 'lex will',
4878 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4879 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4880 'channel': 'lex will',
4881 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4882 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4883 'tags': ['bible', 'history', 'prophesy'],
6c73052c 4884 'channel_follower_count': int
a6213a49 4885 },
4886 'playlist_mincount': 18,
4887 }, {
4888 'note': 'Channels tab',
4889 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
4890 'info_dict': {
4891 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4892 'title': 'lex will - Channels',
4893 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4894 'uploader': 'lex will',
4895 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4896 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4897 'channel': 'lex will',
4898 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4899 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4900 'tags': ['bible', 'history', 'prophesy'],
6c73052c 4901 'channel_follower_count': int
a6213a49 4902 },
4903 'playlist_mincount': 12,
4904 }, {
4905 'note': 'Search tab',
4906 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
4907 'playlist_mincount': 40,
4908 'info_dict': {
4909 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4910 'title': '3Blue1Brown - Search - linear algebra',
4911 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4912 'uploader': '3Blue1Brown',
4913 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
976ae3ea 4914 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4915 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4916 'tags': ['Mathematics'],
4917 'channel': '3Blue1Brown',
4918 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
6c73052c 4919 'channel_follower_count': int
a6213a49 4920 },
4921 }, {
4922 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4923 'only_matching': True,
4924 }, {
4925 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4926 'only_matching': True,
4927 }, {
4928 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4929 'only_matching': True,
4930 }, {
4931 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
4932 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4933 'info_dict': {
4934 'title': '29C3: Not my department',
4935 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4936 'uploader': 'Christiaan008',
4937 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4938 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
976ae3ea 4939 'tags': [],
4940 'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
4941 'view_count': int,
4942 'modified_date': '20150605',
4943 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4944 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
4945 'channel': 'Christiaan008',
a6213a49 4946 },
4947 'playlist_count': 96,
4948 }, {
4949 'note': 'Large playlist',
4950 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
4951 'info_dict': {
4952 'title': 'Uploads from Cauchemar',
4953 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
4954 'uploader': 'Cauchemar',
4955 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
976ae3ea 4956 'channel_url': 'https://www.youtube.com/c/Cauchemar89',
4957 'tags': [],
4958 'modified_date': r're:\d{8}',
4959 'channel': 'Cauchemar',
4960 'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
4961 'view_count': int,
4962 'description': '',
4963 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
a6213a49 4964 },
4965 'playlist_mincount': 1123,
976ae3ea 4966 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 4967 }, {
4968 'note': 'even larger playlist, 8832 videos',
4969 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
4970 'only_matching': True,
4971 }, {
4972 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
4973 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
4974 'info_dict': {
4975 'title': 'Uploads from Interstellar Movie',
4976 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4977 'uploader': 'Interstellar Movie',
4978 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
976ae3ea 4979 'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
4980 'tags': [],
4981 'view_count': int,
4982 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4983 'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
4984 'channel': 'Interstellar Movie',
4985 'description': '',
4986 'modified_date': r're:\d{8}',
a6213a49 4987 },
4988 'playlist_mincount': 21,
4989 }, {
4990 'note': 'Playlist with "show unavailable videos" button',
4991 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
4992 'info_dict': {
4993 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
4994 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
4995 'uploader': 'Phim Siêu Nhân Nhật Bản',
4996 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
976ae3ea 4997 'view_count': int,
4998 'channel': 'Phim Siêu Nhân Nhật Bản',
4999 'tags': [],
5000 'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5001 'description': '',
5002 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5003 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5004 'modified_date': r're:\d{8}',
a6213a49 5005 },
5006 'playlist_mincount': 200,
976ae3ea 5007 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5008 }, {
5009 'note': 'Playlist with unavailable videos in page 7',
5010 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5011 'info_dict': {
5012 'title': 'Uploads from BlankTV',
5013 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5014 'uploader': 'BlankTV',
5015 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
976ae3ea 5016 'channel': 'BlankTV',
5017 'channel_url': 'https://www.youtube.com/c/blanktv',
5018 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5019 'view_count': int,
5020 'tags': [],
5021 'uploader_url': 'https://www.youtube.com/c/blanktv',
5022 'modified_date': r're:\d{8}',
5023 'description': '',
a6213a49 5024 },
5025 'playlist_mincount': 1000,
976ae3ea 5026 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5027 }, {
5028 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5029 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5030 'info_dict': {
5031 'title': 'Data Analysis with Dr Mike Pound',
5032 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5033 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5034 'uploader': 'Computerphile',
5035 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
976ae3ea 5036 'uploader_url': 'https://www.youtube.com/user/Computerphile',
5037 'tags': [],
5038 'view_count': int,
5039 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5040 'channel_url': 'https://www.youtube.com/user/Computerphile',
5041 'channel': 'Computerphile',
a6213a49 5042 },
5043 'playlist_mincount': 11,
5044 }, {
5045 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5046 'only_matching': True,
5047 }, {
5048 'note': 'Playlist URL that does not actually serve a playlist',
5049 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5050 'info_dict': {
5051 'id': 'FqZTN594JQw',
5052 'ext': 'webm',
5053 'title': "Smiley's People 01 detective, Adventure Series, Action",
5054 'uploader': 'STREEM',
5055 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
5056 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
5057 'upload_date': '20150526',
5058 'license': 'Standard YouTube License',
5059 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5060 'categories': ['People & Blogs'],
5061 'tags': list,
5062 'view_count': int,
5063 'like_count': int,
a6213a49 5064 },
5065 'params': {
5066 'skip_download': True,
5067 },
5068 'skip': 'This video is not available.',
5069 'add_ie': [YoutubeIE.ie_key()],
5070 }, {
5071 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5072 'only_matching': True,
5073 }, {
5074 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5075 'only_matching': True,
5076 }, {
5077 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5078 'info_dict': {
12a1b225 5079 'id': 'Wq15eF5vCbI', # This will keep changing
a6213a49 5080 'ext': 'mp4',
976ae3ea 5081 'title': str,
a6213a49 5082 'uploader': 'Sky News',
5083 'uploader_id': 'skynews',
5084 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
5085 'upload_date': r're:\d{8}',
976ae3ea 5086 'description': str,
a6213a49 5087 'categories': ['News & Politics'],
5088 'tags': list,
5089 'like_count': int,
6c73052c 5090 'release_timestamp': 1642502819,
976ae3ea 5091 'channel': 'Sky News',
5092 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5093 'age_limit': 0,
5094 'view_count': int,
6c73052c 5095 'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',
976ae3ea 5096 'playable_in_embed': True,
6c73052c 5097 'release_date': '20220118',
976ae3ea 5098 'availability': 'public',
5099 'live_status': 'is_live',
5100 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
6c73052c 5101 'channel_follower_count': int
a6213a49 5102 },
5103 'params': {
5104 'skip_download': True,
5105 },
976ae3ea 5106 'expected_warnings': ['Ignoring subtitle tracks found in '],
a6213a49 5107 }, {
5108 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5109 'info_dict': {
5110 'id': 'a48o2S1cPoo',
5111 'ext': 'mp4',
5112 'title': 'The Young Turks - Live Main Show',
5113 'uploader': 'The Young Turks',
5114 'uploader_id': 'TheYoungTurks',
5115 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
5116 'upload_date': '20150715',
5117 'license': 'Standard YouTube License',
5118 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5119 'categories': ['News & Politics'],
5120 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5121 'like_count': int,
a6213a49 5122 },
5123 'params': {
5124 'skip_download': True,
5125 },
5126 'only_matching': True,
5127 }, {
5128 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5129 'only_matching': True,
5130 }, {
5131 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5132 'only_matching': True,
5133 }, {
5134 'note': 'A channel that is not live. Should raise error',
5135 'url': 'https://www.youtube.com/user/numberphile/live',
5136 'only_matching': True,
5137 }, {
5138 'url': 'https://www.youtube.com/feed/trending',
5139 'only_matching': True,
5140 }, {
5141 'url': 'https://www.youtube.com/feed/library',
5142 'only_matching': True,
5143 }, {
5144 'url': 'https://www.youtube.com/feed/history',
5145 'only_matching': True,
5146 }, {
5147 'url': 'https://www.youtube.com/feed/subscriptions',
5148 'only_matching': True,
5149 }, {
5150 'url': 'https://www.youtube.com/feed/watch_later',
5151 'only_matching': True,
5152 }, {
5153 'note': 'Recommended - redirects to home page.',
5154 'url': 'https://www.youtube.com/feed/recommended',
5155 'only_matching': True,
5156 }, {
5157 'note': 'inline playlist with not always working continuations',
5158 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5159 'only_matching': True,
5160 }, {
5161 'url': 'https://www.youtube.com/course',
5162 'only_matching': True,
5163 }, {
5164 'url': 'https://www.youtube.com/zsecurity',
5165 'only_matching': True,
5166 }, {
5167 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5168 'only_matching': True,
5169 }, {
5170 'url': 'https://www.youtube.com/TheYoungTurks/live',
5171 'only_matching': True,
5172 }, {
5173 'url': 'https://www.youtube.com/hashtag/cctv9',
5174 'info_dict': {
5175 'id': 'cctv9',
5176 'title': '#cctv9',
976ae3ea 5177 'tags': [],
a6213a49 5178 },
5179 'playlist_mincount': 350,
5180 }, {
5181 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5182 'only_matching': True,
5183 }, {
5184 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5185 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5186 'only_matching': True
5187 }, {
5188 'note': '/browse/ should redirect to /channel/',
5189 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5190 'only_matching': True
5191 }, {
5192 'note': 'VLPL, should redirect to playlist?list=PL...',
5193 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5194 'info_dict': {
5195 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5196 'uploader': 'NoCopyrightSounds',
5197 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
5198 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
12a1b225 5199 'title': 'NCS : All Releases 💿',
976ae3ea 5200 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5201 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5202 'modified_date': r're:\d{8}',
5203 'view_count': int,
5204 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5205 'tags': [],
5206 'channel': 'NoCopyrightSounds',
a6213a49 5207 },
5208 'playlist_mincount': 166,
976ae3ea 5209 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5210 }, {
5211 'note': 'Topic, should redirect to playlist?list=UU...',
5212 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5213 'info_dict': {
5214 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5215 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5216 'title': 'Uploads from Royalty Free Music - Topic',
5217 'uploader': 'Royalty Free Music - Topic',
976ae3ea 5218 'tags': [],
5219 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5220 'channel': 'Royalty Free Music - Topic',
5221 'view_count': int,
5222 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5223 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5224 'modified_date': r're:\d{8}',
5225 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5226 'description': '',
a6213a49 5227 },
5228 'expected_warnings': [
a6213a49 5229 'The URL does not have a videos tab',
976ae3ea 5230 r'[Uu]navailable videos (are|will be) hidden',
a6213a49 5231 ],
5232 'playlist_mincount': 101,
5233 }, {
5234 'note': 'Topic without a UU playlist',
5235 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5236 'info_dict': {
5237 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5238 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
976ae3ea 5239 'tags': [],
a6213a49 5240 },
5241 'expected_warnings': [
976ae3ea 5242 'the playlist redirect gave error',
a6213a49 5243 ],
5244 'playlist_mincount': 9,
5245 }, {
5246 'note': 'Youtube music Album',
5247 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5248 'info_dict': {
5249 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5250 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
976ae3ea 5251 'tags': [],
5252 'view_count': int,
5253 'description': '',
5254 'availability': 'unlisted',
5255 'modified_date': r're:\d{8}',
a6213a49 5256 },
5257 'playlist_count': 50,
5258 }, {
5259 'note': 'unlisted single video playlist',
5260 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5261 'info_dict': {
5262 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5263 'uploader': 'colethedj',
5264 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5265 'title': 'yt-dlp unlisted playlist test',
976ae3ea 5266 'availability': 'unlisted',
5267 'tags': [],
12a1b225 5268 'modified_date': '20220418',
976ae3ea 5269 'channel': 'colethedj',
5270 'view_count': int,
5271 'description': '',
5272 'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5273 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5274 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
a6213a49 5275 },
5276 'playlist_count': 1,
5277 }, {
5278 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
5279 'url': 'https://www.youtube.com/feed/recommended',
5280 'info_dict': {
5281 'id': 'recommended',
5282 'title': 'recommended',
6c73052c 5283 'tags': [],
a6213a49 5284 },
5285 'playlist_mincount': 50,
5286 'params': {
5287 'skip_download': True,
5288 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5289 },
5290 }, {
5291 'note': 'API Fallback: /videos tab, sorted by oldest first',
5292 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
5293 'info_dict': {
5294 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5295 'title': 'Cody\'sLab - Videos',
5296 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
5297 'uploader': 'Cody\'sLab',
5298 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
976ae3ea 5299 'channel': 'Cody\'sLab',
5300 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5301 'tags': [],
5302 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5303 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
6c73052c 5304 'channel_follower_count': int
a6213a49 5305 },
5306 'playlist_mincount': 650,
5307 'params': {
5308 'skip_download': True,
5309 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5310 },
5311 }, {
5312 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
5313 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5314 'info_dict': {
5315 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5316 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5317 'title': 'Uploads from Royalty Free Music - Topic',
5318 'uploader': 'Royalty Free Music - Topic',
976ae3ea 5319 'modified_date': r're:\d{8}',
5320 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5321 'description': '',
5322 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5323 'tags': [],
5324 'channel': 'Royalty Free Music - Topic',
5325 'view_count': int,
5326 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
a6213a49 5327 },
5328 'expected_warnings': [
976ae3ea 5329 'does not have a videos tab',
5330 r'[Uu]navailable videos (are|will be) hidden',
a6213a49 5331 ],
5332 'playlist_mincount': 101,
5333 'params': {
5334 'skip_download': True,
5335 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5336 },
7c219ea6 5337 }, {
5338 'note': 'non-standard redirect to regional channel',
5339 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
5340 'only_matching': True
61d3665d 5341 }, {
5342 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
5343 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5344 'info_dict': {
5345 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5346 'modified_date': '20220407',
5347 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5348 'tags': [],
5349 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5350 'uploader': 'pukkandan',
5351 'availability': 'unlisted',
5352 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5353 'channel': 'pukkandan',
5354 'description': 'Test for collaborative playlist',
5355 'title': 'yt-dlp test - collaborative playlist',
12a1b225 5356 'view_count': int,
61d3665d 5357 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5358 },
5359 'playlist_mincount': 2
a6213a49 5360 }]
5361
5362 @classmethod
5363 def suitable(cls, url):
86e5f3ed 5364 return False if YoutubeIE.suitable(url) else super().suitable(url)
9297939e 5365
64f36541 5366 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')
fe03a6cd 5367
182bda88 5368 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
5369 def _real_extract(self, url, smuggled_data):
cd7c66cf 5370 item_id = self._match_id(url)
14f25df2 5371 url = urllib.parse.urlunparse(
5372 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 5373 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 5374
fe03a6cd 5375 def get_mobj(url):
37e57a9f 5376 mobj = self._URL_RE.match(url).groupdict()
07cce701 5377 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 5378 return mobj
5379
37e57a9f 5380 mobj, redirect_warning = get_mobj(url), None
fe03a6cd 5381 # Youtube returns incomplete data if tabname is not lower case
5382 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
fe03a6cd 5383 if is_channel:
5384 if smuggled_data.get('is_music_url'):
37e57a9f 5385 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
fe03a6cd 5386 item_id = item_id[2:]
37e57a9f 5387 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
5388 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
ac56cf38 5389 mdata = self._extract_tab_endpoint(
37e57a9f 5390 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
5391 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
14f25df2 5392 get_all=False, expected_type=str)
ac56cf38 5393 if not murl:
37e57a9f 5394 raise ExtractorError('Failed to resolve album to playlist')
ac56cf38 5395 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
37e57a9f 5396 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
5397 pre = f'https://www.youtube.com/channel/{item_id}'
5398
64f36541 5399 original_tab_name = tab
fe03a6cd 5400 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
5401 # Home URLs should redirect to /videos/
37e57a9f 5402 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
5403 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 5404 tab = '/videos'
5405
5406 url = ''.join((pre, tab, post))
5407 mobj = get_mobj(url)
cd7c66cf 5408
5409 # Handle both video/playlist URLs
201c1459 5410 qs = parse_qs(url)
86e5f3ed 5411 video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))
cd7c66cf 5412
fe03a6cd 5413 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 5414 if not playlist_id:
fe03a6cd 5415 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 5416 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 5417 # Common mistake: https://www.youtube.com/watch?list=playlist_id
37e57a9f 5418 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
5419 url = f'https://www.youtube.com/playlist?list={playlist_id}'
18db7548 5420 mobj = get_mobj(url)
cd7c66cf 5421
5422 if video_id and playlist_id:
a06916d9 5423 if self.get_param('noplaylist'):
37e57a9f 5424 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
5425 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5426 ie=YoutubeIE.ie_key(), video_id=video_id)
5427 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
cd7c66cf 5428
ac56cf38 5429 data, ytcfg = self._extract_data(url, item_id)
14fdfea9 5430
7c219ea6 5431 # YouTube may provide a non-standard redirect to the regional channel
5432 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
5433 redirect_url = traverse_obj(
5434 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
5435 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
5436 redirect_url = ''.join((
5437 urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))
5438 self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')
5439 return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())
5440
37e57a9f 5441 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
18db7548 5442 if tabs:
5443 selected_tab = self._extract_selected_tab(tabs)
64f36541 5444 selected_tab_name = selected_tab.get('title', '').lower()
5445 if selected_tab_name == 'home':
5446 selected_tab_name = 'featured'
5447 requested_tab_name = mobj['tab'][1:]
09f1580e 5448 if 'no-youtube-channel-redirect' not in compat_opts:
693f0600 5449 if requested_tab_name == 'live': # Live tab should have redirected to the video
5450 raise UserNotLive(video_id=mobj['id'])
64f36541 5451 if requested_tab_name not in ('', selected_tab_name):
5452 redirect_warning = f'The channel does not have a {requested_tab_name} tab'
5453 if not original_tab_name:
5454 if item_id[:2] == 'UC':
5455 # Topic channels don't have /videos. Use the equivalent playlist instead
5456 pl_id = f'UU{item_id[2:]}'
5457 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
5458 try:
5459 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
5460 except ExtractorError:
5461 redirect_warning += ' and the playlist redirect gave error'
5462 else:
5463 item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name
5464 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
5465 if selected_tab_name and selected_tab_name != requested_tab_name:
5466 redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'
5467 else:
5468 raise ExtractorError(redirect_warning, expected=True)
18db7548 5469
37e57a9f 5470 if redirect_warning:
64f36541 5471 self.to_screen(redirect_warning)
37e57a9f 5472 self.write_debug(f'Final URL: {url}')
18db7548 5473
358de58c 5474 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 5475 if 'no-youtube-unavailable-videos' not in compat_opts:
ac56cf38 5476 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
c0ac49bc 5477 self._extract_and_report_alerts(data, only_once=True)
37e57a9f 5478 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
8bdd16b4 5479 if tabs:
ac56cf38 5480 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
cd7c66cf 5481
37e57a9f 5482 playlist = traverse_obj(
5483 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
8bdd16b4 5484 if playlist:
ac56cf38 5485 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
cd7c66cf 5486
37e57a9f 5487 video_id = traverse_obj(
5488 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
8bdd16b4 5489 if video_id:
09f1580e 5490 if mobj['tab'] != '/live': # live tab is expected to redirect to video
37e57a9f 5491 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
5492 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5493 ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 5494
8bdd16b4 5495 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 5496
c5e8d7af 5497
8bdd16b4 5498class YoutubePlaylistIE(InfoExtractor):
96565c7e 5499 IE_DESC = 'YouTube playlists'
8bdd16b4 5500 _VALID_URL = r'''(?x)(?:
5501 (?:https?://)?
5502 (?:\w+\.)?
5503 (?:
5504 (?:
5505 youtube(?:kids)?\.com|
d9190e44 5506 %(invidious)s
8bdd16b4 5507 )
5508 /.*?\?.*?\blist=
5509 )?
5510 (?P<id>%(playlist_id)s)
d9190e44
RH
5511 )''' % {
5512 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
5513 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5514 }
8bdd16b4 5515 IE_NAME = 'youtube:playlist'
cdc628a4 5516 _TESTS = [{
8bdd16b4 5517 'note': 'issue #673',
5518 'url': 'PLBB231211A4F62143',
cdc628a4 5519 'info_dict': {
8bdd16b4 5520 'title': '[OLD]Team Fortress 2 (Class-based LP)',
5521 'id': 'PLBB231211A4F62143',
976ae3ea 5522 'uploader': 'Wickman',
8bdd16b4 5523 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 5524 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
976ae3ea 5525 'view_count': int,
5526 'uploader_url': 'https://www.youtube.com/user/Wickydoo',
5527 'modified_date': r're:\d{8}',
5528 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5529 'channel': 'Wickman',
5530 'tags': [],
5531 'channel_url': 'https://www.youtube.com/user/Wickydoo',
8bdd16b4 5532 },
5533 'playlist_mincount': 29,
5534 }, {
5535 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5536 'info_dict': {
5537 'title': 'YDL_safe_search',
5538 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5539 },
5540 'playlist_count': 2,
5541 'skip': 'This playlist is private',
9558dcec 5542 }, {
8bdd16b4 5543 'note': 'embedded',
5544 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5545 'playlist_count': 4,
9558dcec 5546 'info_dict': {
8bdd16b4 5547 'title': 'JODA15',
5548 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5549 'uploader': 'milan',
5550 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
976ae3ea 5551 'description': '',
5552 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5553 'tags': [],
5554 'modified_date': '20140919',
5555 'view_count': int,
5556 'channel': 'milan',
5557 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5558 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5559 },
5560 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
cdc628a4 5561 }, {
8bdd16b4 5562 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
12a1b225 5563 'playlist_mincount': 455,
8bdd16b4 5564 'info_dict': {
5565 'title': '2018 Chinese New Singles (11/6 updated)',
5566 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5567 'uploader': 'LBK',
5568 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 5569 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
976ae3ea 5570 'channel': 'LBK',
5571 'view_count': int,
5572 'channel_url': 'https://www.youtube.com/c/愛低音的國王',
5573 'tags': [],
5574 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
5575 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5576 'modified_date': r're:\d{8}',
5577 },
5578 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
daa0df9e 5579 }, {
29f7c58a 5580 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
5581 'only_matching': True,
5582 }, {
5583 # music album playlist
5584 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
5585 'only_matching': True,
5586 }]
5587
5588 @classmethod
5589 def suitable(cls, url):
201c1459 5590 if YoutubeTabIE.suitable(url):
5591 return False
49a57e70 5592 from ..utils import parse_qs
201c1459 5593 qs = parse_qs(url)
5594 if qs.get('v', [None])[0]:
5595 return False
86e5f3ed 5596 return super().suitable(url)
29f7c58a 5597
5598 def _real_extract(self, url):
5599 playlist_id = self._match_id(url)
46953e7e 5600 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 5601 url = update_url_query(
5602 'https://www.youtube.com/playlist',
5603 parse_qs(url) or {'list': playlist_id})
5604 if is_music_url:
5605 url = smuggle_url(url, {'is_music_url': True})
5606 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 5607
5608
5609class YoutubeYtBeIE(InfoExtractor):
c76eb41b 5610 IE_DESC = 'youtu.be'
29f7c58a 5611 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
5612 _TESTS = [{
8bdd16b4 5613 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
5614 'info_dict': {
5615 'id': 'yeWKywCrFtk',
5616 'ext': 'mp4',
5617 'title': 'Small Scale Baler and Braiding Rugs',
5618 'uploader': 'Backus-Page House Museum',
5619 'uploader_id': 'backuspagemuseum',
5620 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
5621 'upload_date': '20161008',
5622 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
5623 'categories': ['Nonprofits & Activism'],
5624 'tags': list,
5625 'like_count': int,
976ae3ea 5626 'age_limit': 0,
5627 'playable_in_embed': True,
5628 'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
5629 'channel': 'Backus-Page House Museum',
5630 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
5631 'live_status': 'not_live',
5632 'view_count': int,
5633 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
5634 'availability': 'public',
5635 'duration': 59,
12a1b225
A
5636 'comment_count': int,
5637 'channel_follower_count': int
8bdd16b4 5638 },
5639 'params': {
5640 'noplaylist': True,
5641 'skip_download': True,
5642 },
39e7107d 5643 }, {
8bdd16b4 5644 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 5645 'only_matching': True,
cdc628a4
PH
5646 }]
5647
8bdd16b4 5648 def _real_extract(self, url):
5ad28e7f 5649 mobj = self._match_valid_url(url)
29f7c58a 5650 video_id = mobj.group('id')
5651 playlist_id = mobj.group('playlist_id')
8bdd16b4 5652 return self.url_result(
29f7c58a 5653 update_url_query('https://www.youtube.com/watch', {
5654 'v': video_id,
5655 'list': playlist_id,
5656 'feature': 'youtu.be',
5657 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 5658
5659
b6ce9bb0 5660class YoutubeLivestreamEmbedIE(InfoExtractor):
5661 IE_DESC = 'YouTube livestream embeds'
5662 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
5663 _TESTS = [{
5664 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
5665 'only_matching': True,
5666 }]
5667
5668 def _real_extract(self, url):
5669 channel_id = self._match_id(url)
5670 return self.url_result(
5671 f'https://www.youtube.com/channel/{channel_id}/live',
5672 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
5673
5674
8bdd16b4 5675class YoutubeYtUserIE(InfoExtractor):
96565c7e 5676 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
b6ce9bb0 5677 IE_NAME = 'youtube:user'
8bdd16b4 5678 _VALID_URL = r'ytuser:(?P<id>.+)'
5679 _TESTS = [{
5680 'url': 'ytuser:phihag',
5681 'only_matching': True,
5682 }]
5683
5684 def _real_extract(self, url):
5685 user_id = self._match_id(url)
5686 return self.url_result(
c586f9e8 5687 'https://www.youtube.com/user/%s/videos' % user_id,
8bdd16b4 5688 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 5689
b05654f0 5690
3d3dddc9 5691class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 5692 IE_NAME = 'youtube:favorites'
96565c7e 5693 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
70d5c17b 5694 _VALID_URL = r':ytfav(?:ou?rite)?s?'
5695 _LOGIN_REQUIRED = True
5696 _TESTS = [{
5697 'url': ':ytfav',
5698 'only_matching': True,
5699 }, {
5700 'url': ':ytfavorites',
5701 'only_matching': True,
5702 }]
5703
5704 def _real_extract(self, url):
5705 return self.url_result(
5706 'https://www.youtube.com/playlist?list=LL',
5707 ie=YoutubeTabIE.ie_key())
5708
5709
ca5300c7 5710class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
5711 IE_NAME = 'youtube:notif'
5712 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
5713 _VALID_URL = r':ytnotif(?:ication)?s?'
5714 _LOGIN_REQUIRED = True
5715 _TESTS = [{
5716 'url': ':ytnotif',
5717 'only_matching': True,
5718 }, {
5719 'url': ':ytnotifications',
5720 'only_matching': True,
5721 }]
5722
5723 def _extract_notification_menu(self, response, continuation_list):
5724 notification_list = traverse_obj(
5725 response,
5726 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
5727 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
5728 expected_type=list) or []
5729 continuation_list[0] = None
5730 for item in notification_list:
5731 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
5732 if entry:
5733 yield entry
5734 continuation = item.get('continuationItemRenderer')
5735 if continuation:
5736 continuation_list[0] = continuation
5737
5738 def _extract_notification_renderer(self, notification):
5739 video_id = traverse_obj(
5740 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
5741 url = f'https://www.youtube.com/watch?v={video_id}'
5742 channel_id = None
5743 if not video_id:
5744 browse_ep = traverse_obj(
5745 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
5746 channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
5747 post_id = self._search_regex(
5748 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
5749 'post id', default=None)
5750 if not channel_id or not post_id:
5751 return
5752 # The direct /post url redirects to this in the browser
5753 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
5754
5755 channel = traverse_obj(
5756 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
5757 expected_type=str)
c7a7baaa 5758 notification_title = self._get_text(notification, 'shortMessage')
5759 if notification_title:
5760 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
5761 # TODO: handle recommended videos
ca5300c7 5762 title = self._search_regex(
c7a7baaa 5763 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
ca5300c7 5764 'video title', default=None)
ca5300c7 5765 upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')
5766 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())
5767 else None)
5768 return {
5769 '_type': 'url',
5770 'url': url,
5771 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
5772 'video_id': video_id,
5773 'title': title,
5774 'channel_id': channel_id,
5775 'channel': channel,
5776 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
5777 'upload_date': upload_date,
5778 }
5779
5780 def _notification_menu_entries(self, ytcfg):
5781 continuation_list = [None]
5782 response = None
5783 for page in itertools.count(1):
5784 ctoken = traverse_obj(
5785 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
5786 response = self._extract_response(
5787 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
5788 ep='notification/get_notification_menu', check_get_keys='actions',
5789 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
5790 yield from self._extract_notification_menu(response, continuation_list)
5791 if not continuation_list[0]:
5792 break
5793
5794 def _real_extract(self, url):
5795 display_id = 'notifications'
5796 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
5797 self._report_playlist_authcheck(ytcfg)
5798 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
5799
5800
a6213a49 5801class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
5802 IE_DESC = 'YouTube search'
78caa52a 5803 IE_NAME = 'youtube:search'
b05654f0 5804 _SEARCH_KEY = 'ytsearch'
a61fd4cf 5805 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
84bbc545 5806 _TESTS = [{
5807 'url': 'ytsearch5:youtube-dl test video',
5808 'playlist_count': 5,
5809 'info_dict': {
5810 'id': 'youtube-dl test video',
5811 'title': 'youtube-dl test video',
5812 }
5813 }]
b05654f0 5814
a61fd4cf 5815
5f7cb91a 5816class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
cb7fb546 5817 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 5818 _SEARCH_KEY = 'ytsearchdate'
a6213a49 5819 IE_DESC = 'YouTube search, newest videos first'
a61fd4cf 5820 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
84bbc545 5821 _TESTS = [{
5822 'url': 'ytsearchdate5:youtube-dl test video',
5823 'playlist_count': 5,
5824 'info_dict': {
5825 'id': 'youtube-dl test video',
5826 'title': 'youtube-dl test video',
5827 }
5828 }]
75dff0ee 5829
c9ae7b95 5830
a6213a49 5831class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
96565c7e 5832 IE_DESC = 'YouTube search URLs with sorting and filter support'
386e1dd9 5833 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
182bda88 5834 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
3462ffa8 5835 _TESTS = [{
5836 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
5837 'playlist_mincount': 5,
5838 'info_dict': {
11f9be09 5839 'id': 'youtube-dl test video',
3462ffa8 5840 'title': 'youtube-dl test video',
5841 }
a61fd4cf 5842 }, {
5843 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
5844 'playlist_mincount': 5,
5845 'info_dict': {
5846 'id': 'python',
5847 'title': 'python',
5848 }
ad210f4f 5849 }, {
5850 'url': 'https://www.youtube.com/results?search_query=%23cats',
5851 'playlist_mincount': 1,
5852 'info_dict': {
5853 'id': '#cats',
5854 'title': '#cats',
12a1b225
A
5855 # The test suite does not have support for nested playlists
5856 # 'entries': [{
5857 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
5858 # 'title': '#cats',
5859 # }],
ad210f4f 5860 },
3462ffa8 5861 }, {
5862 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
5863 'only_matching': True,
5864 }]
5865
5866 def _real_extract(self, url):
4dfbf869 5867 qs = parse_qs(url)
386e1dd9 5868 query = (qs.get('search_query') or qs.get('q'))[0]
a6213a49 5869 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
3462ffa8 5870
5871
16aa9ea4 5872class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
62b58c09 5873 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
16aa9ea4 5874 IE_NAME = 'youtube:music:search_url'
5875 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
5876 _TESTS = [{
5877 'url': 'https://music.youtube.com/search?q=royalty+free+music',
5878 'playlist_count': 16,
5879 'info_dict': {
5880 'id': 'royalty free music',
5881 'title': 'royalty free music',
5882 }
5883 }, {
5884 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
5885 'playlist_mincount': 30,
5886 'info_dict': {
5887 'id': 'royalty free music - songs',
5888 'title': 'royalty free music - songs',
5889 },
5890 'params': {'extract_flat': 'in_playlist'}
5891 }, {
5892 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
5893 'playlist_mincount': 30,
5894 'info_dict': {
5895 'id': 'royalty free music - community playlists',
5896 'title': 'royalty free music - community playlists',
5897 },
5898 'params': {'extract_flat': 'in_playlist'}
5899 }]
5900
5901 _SECTIONS = {
5902 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
5903 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
5904 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
5905 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
5906 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
5907 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
5908 }
5909
5910 def _real_extract(self, url):
5911 qs = parse_qs(url)
5912 query = (qs.get('search_query') or qs.get('q'))[0]
5913 params = qs.get('sp', (None,))[0]
5914 if params:
5915 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
5916 else:
ac668111 5917 section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
16aa9ea4 5918 params = self._SECTIONS.get(section)
5919 if not params:
5920 section = None
5921 title = join_nonempty(query, section, delim=' - ')
af5c1c55 5922 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
16aa9ea4 5923
5924
182bda88 5925class YoutubeFeedsInfoExtractor(InfoExtractor):
d7ae0639 5926 """
25f14e9f 5927 Base class for feed extractors
82d02080 5928 Subclasses must re-define the _FEED_NAME property.
d7ae0639 5929 """
b2e8bc1b 5930 _LOGIN_REQUIRED = True
82d02080 5931 _FEED_NAME = 'feeds'
a25bca9f 5932
5933 def _real_initialize(self):
5934 YoutubeBaseInfoExtractor._check_login_required(self)
d7ae0639 5935
82d02080 5936 @classproperty
d7ae0639 5937 def IE_NAME(self):
82d02080 5938 return f'youtube:{self._FEED_NAME}'
04cc9617 5939
3853309f 5940 def _real_extract(self, url):
3d3dddc9 5941 return self.url_result(
182bda88 5942 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
25f14e9f
S
5943
5944
ef2f3c7f 5945class YoutubeWatchLaterIE(InfoExtractor):
5946 IE_NAME = 'youtube:watchlater'
96565c7e 5947 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
3d3dddc9 5948 _VALID_URL = r':ytwatchlater'
bc7a9cd8 5949 _TESTS = [{
8bdd16b4 5950 'url': ':ytwatchlater',
bc7a9cd8
S
5951 'only_matching': True,
5952 }]
25f14e9f
S
5953
5954 def _real_extract(self, url):
ef2f3c7f 5955 return self.url_result(
5956 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 5957
5958
25f14e9f 5959class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
96565c7e 5960 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
3d3dddc9 5961 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 5962 _FEED_NAME = 'recommended'
45db527f 5963 _LOGIN_REQUIRED = False
3d3dddc9 5964 _TESTS = [{
5965 'url': ':ytrec',
5966 'only_matching': True,
5967 }, {
5968 'url': ':ytrecommended',
5969 'only_matching': True,
5970 }, {
5971 'url': 'https://youtube.com',
5972 'only_matching': True,
5973 }]
1ed5b5c9 5974
1ed5b5c9 5975
25f14e9f 5976class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
96565c7e 5977 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
3d3dddc9 5978 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 5979 _FEED_NAME = 'subscriptions'
3d3dddc9 5980 _TESTS = [{
5981 'url': ':ytsubs',
5982 'only_matching': True,
5983 }, {
5984 'url': ':ytsubscriptions',
5985 'only_matching': True,
5986 }]
1ed5b5c9 5987
1ed5b5c9 5988
25f14e9f 5989class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
96565c7e 5990 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
a5c56234 5991 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 5992 _FEED_NAME = 'history'
3d3dddc9 5993 _TESTS = [{
5994 'url': ':ythistory',
5995 'only_matching': True,
5996 }]
1ed5b5c9
JMF
5997
5998
6e634cbe 5999class YoutubeStoriesIE(InfoExtractor):
6000 IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
6001 IE_NAME = 'youtube:stories'
6002 _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
6003 _TESTS = [{
6004 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
6005 'only_matching': True,
6006 }]
6007
6008 def _real_extract(self, url):
6009 playlist_id = f'RLTD{self._match_id(url)}'
6010 return self.url_result(
6011 f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1',
6012 ie=YoutubeTabIE, video_id=playlist_id)
6013
6014
15870e90
PH
6015class YoutubeTruncatedURLIE(InfoExtractor):
6016 IE_NAME = 'youtube:truncated_url'
6017 IE_DESC = False # Do not list
975d35db 6018 _VALID_URL = r'''(?x)
b95aab84
PH
6019 (?:https?://)?
6020 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
6021 (?:watch\?(?:
c4808c60 6022 feature=[a-z_]+|
b95aab84
PH
6023 annotation_id=annotation_[^&]+|
6024 x-yt-cl=[0-9]+|
c1708b89 6025 hl=[^&]*|
287be8c6 6026 t=[0-9]+
b95aab84
PH
6027 )?
6028 |
6029 attribution_link\?a=[^&]+
6030 )
6031 $
975d35db 6032 '''
15870e90 6033
c4808c60 6034 _TESTS = [{
2d3d2997 6035 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 6036 'only_matching': True,
dc2fc736 6037 }, {
2d3d2997 6038 'url': 'https://www.youtube.com/watch?',
dc2fc736 6039 'only_matching': True,
b95aab84
PH
6040 }, {
6041 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
6042 'only_matching': True,
6043 }, {
6044 'url': 'https://www.youtube.com/watch?feature=foo',
6045 'only_matching': True,
c1708b89
PH
6046 }, {
6047 'url': 'https://www.youtube.com/watch?hl=en-GB',
6048 'only_matching': True,
287be8c6
PH
6049 }, {
6050 'url': 'https://www.youtube.com/watch?t=2372',
6051 'only_matching': True,
c4808c60
PH
6052 }]
6053
15870e90
PH
6054 def _real_extract(self, url):
6055 raise ExtractorError(
78caa52a
PH
6056 'Did you forget to quote the URL? Remember that & is a meta '
6057 'character in most shells, so you want to put the URL in quotes, '
3867038a 6058 'like youtube-dl '
2d3d2997 6059 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 6060 ' or simply youtube-dl BaW_jenozKc .',
15870e90 6061 expected=True)
772fd5cc
PH
6062
6063
471d0367 6064class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
3cd786db 6065 IE_NAME = 'youtube:clip'
471d0367 6066 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
6067 _TESTS = [{
6068 # FIXME: Other metadata should be extracted from the clip, not from the base video
6069 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
6070 'info_dict': {
6071 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
6072 'ext': 'mp4',
6073 'section_start': 29.0,
6074 'section_end': 39.7,
6075 'duration': 10.7,
12a1b225
A
6076 'age_limit': 0,
6077 'availability': 'public',
6078 'categories': ['Gaming'],
6079 'channel': 'Scott The Woz',
6080 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
6081 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
6082 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
6083 'like_count': int,
6084 'playable_in_embed': True,
6085 'tags': 'count:17',
6086 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
6087 'title': 'Mobile Games on Console - Scott The Woz',
6088 'upload_date': '20210920',
6089 'uploader': 'Scott The Woz',
6090 'uploader_id': 'scottthewoz',
6091 'uploader_url': 'http://www.youtube.com/user/scottthewoz',
6092 'view_count': int,
6093 'live_status': 'not_live',
6094 'channel_follower_count': int
471d0367 6095 }
6096 }]
3cd786db 6097
6098 def _real_extract(self, url):
471d0367 6099 clip_id = self._match_id(url)
6100 _, data = self._extract_webpage(url, clip_id)
6101
6102 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
6103 if not video_id:
6104 raise ExtractorError('Unable to find video ID')
6105
6106 clip_data = traverse_obj(data, (
6107 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
6108 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
6109 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
6110 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
6111
6112 return {
6113 '_type': 'url_transparent',
6114 'url': f'https://www.youtube.com/watch?v={video_id}',
6115 'ie_key': YoutubeIE.ie_key(),
6116 'id': clip_id,
6117 'section_start': int(clip_data['startTimeMs']) / 1000,
6118 'section_end': int(clip_data['endTimeMs']) / 1000,
6119 }
3cd786db 6120
6121
772fd5cc
PH
6122class YoutubeTruncatedIDIE(InfoExtractor):
6123 IE_NAME = 'youtube:truncated_id'
6124 IE_DESC = False # Do not list
b95aab84 6125 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
6126
6127 _TESTS = [{
6128 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
6129 'only_matching': True,
6130 }]
6131
6132 def _real_extract(self, url):
6133 video_id = self._match_id(url)
6134 raise ExtractorError(
86e5f3ed 6135 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
772fd5cc 6136 expected=True)