]>
Commit | Line | Data |
---|---|---|
db137867 AC |
1 | /* |
2 | * ircd-ratbox: A slightly useful ircd. | |
3 | * epoll.c: Linux epoll compatible network routines. | |
4 | * | |
5 | * Copyright (C) 1990 Jarkko Oikarinen and University of Oulu, Co Center | |
6 | * Copyright (C) 1996-2002 Hybrid Development Team | |
7 | * Copyright (C) 2001 Adrian Chadd <adrian@creative.net.au> | |
8 | * Copyright (C) 2002-2005 ircd-ratbox development team | |
9 | * Copyright (C) 2002 Aaron Sethman <androsyn@ratbox.org> | |
10 | * | |
11 | * This program is free software; you can redistribute it and/or modify | |
12 | * it under the terms of the GNU General Public License as published by | |
13 | * the Free Software Foundation; either version 2 of the License, or | |
14 | * (at your option) any later version. | |
15 | * | |
16 | * This program is distributed in the hope that it will be useful, | |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | * GNU General Public License for more details. | |
20 | * | |
21 | * You should have received a copy of the GNU General Public License | |
22 | * along with this program; if not, write to the Free Software | |
23 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 | |
24 | * USA | |
25 | * | |
030272f3 | 26 | * $Id: epoll.c 26294 2008-12-13 03:01:19Z androsyn $ |
db137867 AC |
27 | */ |
28 | #define _GNU_SOURCE 1 | |
29 | ||
30 | #include <libratbox_config.h> | |
31 | #include <ratbox_lib.h> | |
32 | #include <commio-int.h> | |
33 | #include <event-int.h> | |
db137867 AC |
34 | #if defined(HAVE_EPOLL_CTL) && (HAVE_SYS_EPOLL_H) |
35 | #define USING_EPOLL | |
36 | #include <fcntl.h> | |
37 | #include <sys/epoll.h> | |
38 | ||
39 | #if defined(HAVE_SIGNALFD) && (HAVE_SYS_SIGNALFD_H) && (USE_TIMER_CREATE) && (HAVE_SYS_UIO_H) | |
40 | #include <signal.h> | |
41 | #include <sys/signalfd.h> | |
42 | #include <sys/uio.h> | |
43 | #define EPOLL_SCHED_EVENT 1 | |
44 | #endif | |
45 | ||
3202e249 VY |
46 | #if defined(USE_TIMERFD_CREATE) |
47 | #include <sys/timerfd.h> | |
48 | #endif | |
49 | ||
db137867 AC |
50 | #define RTSIGNAL SIGRTMIN |
51 | struct epoll_info | |
52 | { | |
53 | int ep; | |
54 | struct epoll_event *pfd; | |
55 | int pfd_size; | |
56 | }; | |
57 | ||
58 | static struct epoll_info *ep_info; | |
59 | static int can_do_event; | |
3202e249 | 60 | static int can_do_timerfd; |
db137867 AC |
61 | |
62 | /* | |
63 | * rb_init_netio | |
64 | * | |
65 | * This is a needed exported function which will be called to initialise | |
66 | * the network loop code. | |
67 | */ | |
68 | int | |
69 | rb_init_netio_epoll(void) | |
70 | { | |
3202e249 | 71 | can_do_event = 0; /* shut up gcc */ |
030272f3 | 72 | can_do_timerfd = 0; |
db137867 AC |
73 | ep_info = rb_malloc(sizeof(struct epoll_info)); |
74 | ep_info->pfd_size = getdtablesize(); | |
75 | ep_info->ep = epoll_create(ep_info->pfd_size); | |
76 | if(ep_info->ep < 0) | |
77 | { | |
78 | return -1; | |
79 | } | |
80 | rb_open(ep_info->ep, RB_FD_UNKNOWN, "epoll file descriptor"); | |
81 | ep_info->pfd = rb_malloc(sizeof(struct epoll_event) * ep_info->pfd_size); | |
82 | ||
83 | return 0; | |
84 | } | |
85 | ||
86 | int | |
3202e249 | 87 | rb_setup_fd_epoll(rb_fde_t *F) |
db137867 AC |
88 | { |
89 | return 0; | |
90 | } | |
91 | ||
92 | ||
93 | /* | |
94 | * rb_setselect | |
95 | * | |
96 | * This is a needed exported function which will be called to register | |
97 | * and deregister interest in a pending IO state for a given FD. | |
98 | */ | |
99 | void | |
3202e249 | 100 | rb_setselect_epoll(rb_fde_t *F, unsigned int type, PF * handler, void *client_data) |
db137867 AC |
101 | { |
102 | struct epoll_event ep_event; | |
103 | int old_flags = F->pflags; | |
104 | int op = -1; | |
105 | ||
106 | lrb_assert(IsFDOpen(F)); | |
107 | ||
108 | /* Update the list, even though we're not using it .. */ | |
109 | if(type & RB_SELECT_READ) | |
110 | { | |
111 | if(handler != NULL) | |
112 | F->pflags |= EPOLLIN; | |
113 | else | |
114 | F->pflags &= ~EPOLLIN; | |
115 | F->read_handler = handler; | |
116 | F->read_data = client_data; | |
117 | } | |
118 | ||
119 | if(type & RB_SELECT_WRITE) | |
120 | { | |
121 | if(handler != NULL) | |
122 | F->pflags |= EPOLLOUT; | |
123 | else | |
124 | F->pflags &= ~EPOLLOUT; | |
125 | F->write_handler = handler; | |
126 | F->write_data = client_data; | |
127 | } | |
128 | ||
129 | if(old_flags == 0 && F->pflags == 0) | |
130 | return; | |
131 | else if(F->pflags <= 0) | |
132 | op = EPOLL_CTL_DEL; | |
133 | else if(old_flags == 0 && F->pflags > 0) | |
134 | op = EPOLL_CTL_ADD; | |
135 | else if(F->pflags != old_flags) | |
136 | op = EPOLL_CTL_MOD; | |
137 | ||
138 | if(op == -1) | |
139 | return; | |
140 | ||
141 | ep_event.events = F->pflags; | |
142 | ep_event.data.ptr = F; | |
143 | ||
144 | if(op == EPOLL_CTL_ADD || op == EPOLL_CTL_MOD) | |
145 | ep_event.events |= EPOLLET; | |
146 | ||
147 | if(epoll_ctl(ep_info->ep, op, F->fd, &ep_event) != 0) | |
148 | { | |
149 | rb_lib_log("rb_setselect_epoll(): epoll_ctl failed: %s", strerror(errno)); | |
150 | abort(); | |
151 | } | |
152 | ||
153 | ||
154 | } | |
155 | ||
156 | /* | |
157 | * rb_select | |
158 | * | |
159 | * Called to do the new-style IO, courtesy of squid (like most of this | |
160 | * new IO code). This routine handles the stuff we've hidden in | |
161 | * rb_setselect and fd_table[] and calls callbacks for IO ready | |
162 | * events. | |
163 | */ | |
164 | ||
165 | int | |
166 | rb_select_epoll(long delay) | |
167 | { | |
168 | int num, i, flags, old_flags, op; | |
169 | struct epoll_event ep_event; | |
170 | int o_errno; | |
171 | void *data; | |
172 | ||
173 | num = epoll_wait(ep_info->ep, ep_info->pfd, ep_info->pfd_size, delay); | |
174 | ||
175 | /* save errno as rb_set_time() will likely clobber it */ | |
176 | o_errno = errno; | |
177 | rb_set_time(); | |
178 | errno = o_errno; | |
179 | ||
180 | if(num < 0 && !rb_ignore_errno(o_errno)) | |
181 | return RB_ERROR; | |
182 | ||
183 | if(num <= 0) | |
184 | return RB_OK; | |
185 | ||
3202e249 | 186 | for(i = 0; i < num; i++) |
db137867 AC |
187 | { |
188 | PF *hdl; | |
189 | rb_fde_t *F = ep_info->pfd[i].data.ptr; | |
190 | old_flags = F->pflags; | |
191 | if(ep_info->pfd[i].events & (EPOLLIN | EPOLLHUP | EPOLLERR)) | |
192 | { | |
193 | hdl = F->read_handler; | |
194 | data = F->read_data; | |
195 | F->read_handler = NULL; | |
196 | F->read_data = NULL; | |
197 | if(hdl) | |
198 | { | |
199 | hdl(F, data); | |
200 | } | |
201 | } | |
202 | ||
203 | if(!IsFDOpen(F)) | |
204 | continue; | |
205 | if(ep_info->pfd[i].events & (EPOLLOUT | EPOLLHUP | EPOLLERR)) | |
206 | { | |
207 | hdl = F->write_handler; | |
208 | data = F->write_data; | |
209 | F->write_handler = NULL; | |
210 | F->write_data = NULL; | |
211 | ||
212 | if(hdl) | |
213 | { | |
214 | hdl(F, data); | |
215 | } | |
216 | } | |
217 | ||
218 | if(!IsFDOpen(F)) | |
219 | continue; | |
220 | ||
221 | flags = 0; | |
222 | ||
223 | if(F->read_handler != NULL) | |
224 | flags |= EPOLLIN; | |
225 | if(F->write_handler != NULL) | |
226 | flags |= EPOLLOUT; | |
227 | ||
228 | if(old_flags != flags) | |
229 | { | |
230 | if(flags == 0) | |
231 | op = EPOLL_CTL_DEL; | |
232 | else | |
233 | op = EPOLL_CTL_MOD; | |
234 | F->pflags = ep_event.events = flags; | |
235 | ep_event.data.ptr = F; | |
236 | if(op == EPOLL_CTL_MOD || op == EPOLL_CTL_ADD) | |
237 | ep_event.events |= EPOLLET; | |
238 | ||
239 | if(epoll_ctl(ep_info->ep, op, F->fd, &ep_event) != 0) | |
240 | { | |
241 | rb_lib_log("rb_select_epoll(): epoll_ctl failed: %s", | |
242 | strerror(errno)); | |
243 | } | |
244 | } | |
245 | ||
246 | } | |
247 | return RB_OK; | |
248 | } | |
249 | ||
3202e249 | 250 | #ifdef EPOLL_SCHED_EVENT |
db137867 AC |
251 | int |
252 | rb_epoll_supports_event(void) | |
253 | { | |
254 | /* try to detect at runtime if everything we need actually works */ | |
255 | timer_t timer; | |
256 | struct sigevent ev; | |
257 | int fd; | |
258 | sigset_t set; | |
3202e249 | 259 | |
db137867 AC |
260 | if(can_do_event == 1) |
261 | return 1; | |
262 | if(can_do_event == -1) | |
263 | return 0; | |
3202e249 VY |
264 | |
265 | #ifdef USE_TIMERFD_CREATE | |
266 | if((fd = timerfd_create(CLOCK_REALTIME, 0)) >= 0) | |
267 | { | |
268 | close(fd); | |
269 | can_do_event = 1; | |
270 | can_do_timerfd = 1; | |
030272f3 | 271 | return 1; |
3202e249 VY |
272 | } |
273 | #endif | |
274 | ||
db137867 AC |
275 | ev.sigev_signo = SIGVTALRM; |
276 | ev.sigev_notify = SIGEV_SIGNAL; | |
277 | if(timer_create(CLOCK_REALTIME, &ev, &timer) != 0) | |
278 | { | |
279 | can_do_event = -1; | |
280 | return 0; | |
281 | } | |
282 | timer_delete(timer); | |
283 | sigemptyset(&set); | |
284 | fd = signalfd(-1, &set, 0); | |
3202e249 | 285 | if(fd < 0) |
db137867 AC |
286 | { |
287 | can_do_event = -1; | |
288 | return 0; | |
289 | } | |
290 | close(fd); | |
291 | can_do_event = 1; | |
292 | return 1; | |
293 | } | |
294 | ||
295 | ||
296 | /* bleh..work around a glibc header bug on 32bit systems */ | |
3202e249 VY |
297 | struct our_signalfd_siginfo |
298 | { | |
299 | uint32_t signo; | |
300 | int32_t err; | |
301 | int32_t code; | |
302 | uint32_t pid; | |
303 | uint32_t uid; | |
304 | int32_t fd; | |
305 | uint32_t tid; | |
306 | uint32_t band; | |
307 | uint32_t overrun; | |
308 | uint32_t trapno; | |
309 | int32_t status; | |
310 | int32_t svint; | |
311 | uint64_t svptr; | |
312 | uint64_t utime; | |
313 | uint64_t stime; | |
314 | uint64_t addr; | |
315 | uint8_t pad[48]; | |
db137867 AC |
316 | }; |
317 | ||
318 | ||
319 | #define SIGFDIOV_COUNT 16 | |
3202e249 | 320 | static void |
db137867 AC |
321 | signalfd_handler(rb_fde_t *F, void *data) |
322 | { | |
323 | static struct our_signalfd_siginfo fdsig[SIGFDIOV_COUNT]; | |
324 | static struct iovec iov[SIGFDIOV_COUNT]; | |
325 | struct ev_entry *ev; | |
326 | int ret, x; | |
3202e249 | 327 | |
db137867 AC |
328 | for(x = 0; x < SIGFDIOV_COUNT; x++) |
329 | { | |
330 | iov[x].iov_base = &fdsig[x]; | |
331 | iov[x].iov_len = sizeof(struct our_signalfd_siginfo); | |
332 | } | |
333 | ||
334 | while(1) | |
335 | { | |
336 | ret = readv(rb_get_fd(F), iov, SIGFDIOV_COUNT); | |
030272f3 | 337 | |
db137867 AC |
338 | if(ret == 0 || (ret < 0 && !rb_ignore_errno(errno))) |
339 | { | |
340 | rb_close(F); | |
341 | rb_epoll_init_event(); | |
342 | return; | |
343 | } | |
3202e249 VY |
344 | |
345 | if(ret < 0) | |
db137867 AC |
346 | { |
347 | rb_setselect(F, RB_SELECT_READ, signalfd_handler, NULL); | |
348 | return; | |
349 | } | |
3202e249 | 350 | for(x = 0; x < ret / (int)sizeof(struct our_signalfd_siginfo); x++) |
db137867 | 351 | { |
030272f3 VY |
352 | #if __WORDSIZE == 32 && defined(__sparc__) |
353 | uint32_t *q = (uint32_t *)&fdsig[x].svptr; | |
354 | ev = (struct ev_entry *)q[0]; | |
355 | #else | |
356 | ev = (struct ev_entry *)(uintptr_t)(fdsig[x].svptr); | |
357 | ||
358 | #endif | |
db137867 AC |
359 | if(ev == NULL) |
360 | continue; | |
361 | rb_run_event(ev); | |
362 | } | |
363 | } | |
364 | } | |
365 | ||
366 | void | |
367 | rb_epoll_init_event(void) | |
368 | { | |
3202e249 | 369 | |
db137867 AC |
370 | sigset_t ss; |
371 | rb_fde_t *F; | |
372 | int sfd; | |
3202e249 VY |
373 | rb_epoll_supports_event(); |
374 | if(!can_do_timerfd) | |
375 | { | |
376 | sigemptyset(&ss); | |
377 | sigaddset(&ss, RTSIGNAL); | |
378 | sigprocmask(SIG_BLOCK, &ss, 0); | |
379 | sigemptyset(&ss); | |
380 | sigaddset(&ss, RTSIGNAL); | |
381 | sfd = signalfd(-1, &ss, 0); | |
382 | if(sfd == -1) | |
383 | { | |
384 | can_do_event = -1; | |
385 | return; | |
386 | } | |
387 | F = rb_open(sfd, RB_FD_UNKNOWN, "signalfd"); | |
388 | rb_set_nb(F); | |
389 | signalfd_handler(F, NULL); | |
db137867 | 390 | } |
db137867 AC |
391 | } |
392 | ||
3202e249 VY |
393 | static int |
394 | rb_epoll_sched_event_signalfd(struct ev_entry *event, int when) | |
db137867 AC |
395 | { |
396 | timer_t *id; | |
397 | struct sigevent ev; | |
398 | struct itimerspec ts; | |
399 | ||
400 | memset(&ev, 0, sizeof(&ev)); | |
401 | event->comm_ptr = rb_malloc(sizeof(timer_t)); | |
402 | id = event->comm_ptr; | |
403 | ev.sigev_notify = SIGEV_SIGNAL; | |
404 | ev.sigev_signo = RTSIGNAL; | |
405 | ev.sigev_value.sival_ptr = event; | |
406 | ||
3202e249 | 407 | if(timer_create(CLOCK_REALTIME, &ev, id) < 0) |
db137867 AC |
408 | { |
409 | rb_lib_log("timer_create: %s\n", strerror(errno)); | |
410 | return 0; | |
411 | } | |
412 | memset(&ts, 0, sizeof(ts)); | |
413 | ts.it_value.tv_sec = when; | |
414 | ts.it_value.tv_nsec = 0; | |
415 | if(event->frequency != 0) | |
416 | ts.it_interval = ts.it_value; | |
3202e249 VY |
417 | |
418 | if(timer_settime(*id, 0, &ts, NULL) < 0) | |
db137867 AC |
419 | { |
420 | rb_lib_log("timer_settime: %s\n", strerror(errno)); | |
421 | return 0; | |
422 | } | |
423 | return 1; | |
424 | } | |
425 | ||
3202e249 VY |
426 | #ifdef USE_TIMERFD_CREATE |
427 | static void | |
428 | rb_read_timerfd(rb_fde_t *F, void *data) | |
429 | { | |
430 | struct ev_entry *event = (struct ev_entry *)data; | |
431 | int retlen; | |
432 | uint64_t count; | |
433 | ||
434 | if(event == NULL) | |
435 | { | |
436 | rb_close(F); | |
437 | return; | |
438 | } | |
439 | ||
440 | retlen = rb_read(F, &count, sizeof(count)); | |
030272f3 | 441 | |
3202e249 VY |
442 | if(retlen == 0 || (retlen < 0 && !rb_ignore_errno(errno))) |
443 | { | |
444 | rb_close(F); | |
445 | rb_lib_log("rb_read_timerfd: timerfd[%s] closed on error: %s", event->name, | |
446 | strerror(errno)); | |
447 | return; | |
448 | } | |
449 | rb_setselect(F, RB_SELECT_READ, rb_read_timerfd, event); | |
450 | rb_run_event(event); | |
451 | } | |
452 | ||
453 | ||
454 | static int | |
455 | rb_epoll_sched_event_timerfd(struct ev_entry *event, int when) | |
456 | { | |
457 | struct itimerspec ts; | |
458 | static char buf[FD_DESC_SZ + 8]; | |
459 | int fd; | |
460 | rb_fde_t *F; | |
461 | ||
462 | if((fd = timerfd_create(CLOCK_REALTIME, 0)) < 0) | |
463 | { | |
464 | rb_lib_log("timerfd_create: %s\n", strerror(errno)); | |
465 | return 0; | |
466 | } | |
467 | ||
468 | memset(&ts, 0, sizeof(ts)); | |
469 | ts.it_value.tv_sec = when; | |
470 | ts.it_value.tv_nsec = 0; | |
471 | if(event->frequency != 0) | |
472 | ts.it_interval = ts.it_value; | |
473 | ||
474 | if(timerfd_settime(fd, 0, &ts, NULL) < 0) | |
475 | { | |
476 | rb_lib_log("timerfd_settime: %s\n", strerror(errno)); | |
477 | close(fd); | |
478 | return 0; | |
479 | } | |
480 | rb_snprintf(buf, sizeof(buf), "timerfd: %s", event->name); | |
481 | F = rb_open(fd, RB_FD_UNKNOWN, buf); | |
482 | rb_set_nb(F); | |
483 | event->comm_ptr = F; | |
484 | rb_setselect(F, RB_SELECT_READ, rb_read_timerfd, event); | |
485 | return 1; | |
486 | } | |
487 | #endif | |
488 | ||
489 | ||
490 | ||
491 | int | |
492 | rb_epoll_sched_event(struct ev_entry *event, int when) | |
493 | { | |
494 | #ifdef USE_TIMERFD_CREATE | |
495 | if(can_do_timerfd) | |
496 | { | |
497 | return rb_epoll_sched_event_timerfd(event, when); | |
498 | } | |
499 | #endif | |
500 | return rb_epoll_sched_event_signalfd(event, when); | |
501 | } | |
502 | ||
db137867 AC |
503 | void |
504 | rb_epoll_unsched_event(struct ev_entry *event) | |
505 | { | |
3202e249 VY |
506 | #ifdef USE_TIMERFD_CREATE |
507 | if(can_do_timerfd) | |
508 | { | |
509 | rb_close((rb_fde_t *)event->comm_ptr); | |
510 | event->comm_ptr = NULL; | |
511 | return; | |
512 | } | |
513 | #endif | |
514 | timer_delete(*((timer_t *) event->comm_ptr)); | |
db137867 AC |
515 | rb_free(event->comm_ptr); |
516 | event->comm_ptr = NULL; | |
517 | } | |
518 | #endif /* EPOLL_SCHED_EVENT */ | |
519 | ||
520 | #else /* epoll not supported here */ | |
521 | int | |
522 | rb_init_netio_epoll(void) | |
523 | { | |
524 | return ENOSYS; | |
525 | } | |
526 | ||
527 | void | |
3202e249 | 528 | rb_setselect_epoll(rb_fde_t *F, unsigned int type, PF * handler, void *client_data) |
db137867 AC |
529 | { |
530 | errno = ENOSYS; | |
531 | return; | |
532 | } | |
533 | ||
534 | int | |
535 | rb_select_epoll(long delay) | |
536 | { | |
537 | errno = ENOSYS; | |
538 | return -1; | |
539 | } | |
540 | ||
541 | int | |
3202e249 | 542 | rb_setup_fd_epoll(rb_fde_t *F) |
db137867 AC |
543 | { |
544 | errno = ENOSYS; | |
545 | return -1; | |
546 | } | |
547 | ||
548 | ||
549 | #endif | |
550 | ||
551 | #if !defined(USING_EPOLL) || !defined(EPOLL_SCHED_EVENT) | |
3202e249 VY |
552 | void |
553 | rb_epoll_init_event(void) | |
db137867 AC |
554 | { |
555 | return; | |
556 | } | |
557 | ||
558 | int | |
559 | rb_epoll_sched_event(struct ev_entry *event, int when) | |
560 | { | |
561 | errno = ENOSYS; | |
562 | return -1; | |
563 | } | |
564 | ||
565 | void | |
566 | rb_epoll_unsched_event(struct ev_entry *event) | |
567 | { | |
568 | return; | |
569 | } | |
570 | ||
571 | int | |
572 | rb_epoll_supports_event(void) | |
573 | { | |
574 | errno = ENOSYS; | |
575 | return 0; | |
576 | } | |
577 | #endif /* !USING_EPOLL || !EPOLL_SCHED_EVENT */ |