]>
Commit | Line | Data |
---|---|---|
b57f37fb WP |
1 | /* |
2 | * ircd-ratbox: A slightly useful ircd. | |
3 | * epoll.c: Linux epoll compatible network routines. | |
4 | * | |
5 | * Copyright (C) 1990 Jarkko Oikarinen and University of Oulu, Co Center | |
6 | * Copyright (C) 1996-2002 Hybrid Development Team | |
7 | * Copyright (C) 2001 Adrian Chadd <adrian@creative.net.au> | |
8 | * Copyright (C) 2002-2005 ircd-ratbox development team | |
9 | * Copyright (C) 2002 Aaron Sethman <androsyn@ratbox.org> | |
10 | * | |
11 | * This program is free software; you can redistribute it and/or modify | |
12 | * it under the terms of the GNU General Public License as published by | |
13 | * the Free Software Foundation; either version 2 of the License, or | |
14 | * (at your option) any later version. | |
15 | * | |
16 | * This program is distributed in the hope that it will be useful, | |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | * GNU General Public License for more details. | |
20 | * | |
21 | * You should have received a copy of the GNU General Public License | |
22 | * along with this program; if not, write to the Free Software | |
23 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 | |
24 | * USA | |
25 | * | |
94b4fbf9 | 26 | * $Id: epoll.c 26092 2008-09-19 15:13:52Z androsyn $ |
b57f37fb WP |
27 | */ |
28 | #define _GNU_SOURCE 1 | |
29 | ||
30 | #include <libratbox_config.h> | |
31 | #include <ratbox_lib.h> | |
32 | #include <commio-int.h> | |
33 | #include <event-int.h> | |
b57f37fb WP |
34 | #if defined(HAVE_EPOLL_CTL) && (HAVE_SYS_EPOLL_H) |
35 | #define USING_EPOLL | |
36 | #include <fcntl.h> | |
37 | #include <sys/epoll.h> | |
38 | ||
39 | #if defined(HAVE_SIGNALFD) && (HAVE_SYS_SIGNALFD_H) && (USE_TIMER_CREATE) && (HAVE_SYS_UIO_H) | |
40 | #include <signal.h> | |
41 | #include <sys/signalfd.h> | |
42 | #include <sys/uio.h> | |
43 | #define EPOLL_SCHED_EVENT 1 | |
44 | #endif | |
45 | ||
94b4fbf9 VY |
46 | #if defined(USE_TIMERFD_CREATE) |
47 | #include <sys/timerfd.h> | |
48 | #endif | |
49 | ||
b57f37fb WP |
50 | #define RTSIGNAL SIGRTMIN |
51 | struct epoll_info | |
52 | { | |
53 | int ep; | |
54 | struct epoll_event *pfd; | |
55 | int pfd_size; | |
56 | }; | |
57 | ||
58 | static struct epoll_info *ep_info; | |
59 | static int can_do_event; | |
94b4fbf9 | 60 | static int can_do_timerfd; |
b57f37fb WP |
61 | |
62 | /* | |
63 | * rb_init_netio | |
64 | * | |
65 | * This is a needed exported function which will be called to initialise | |
66 | * the network loop code. | |
67 | */ | |
68 | int | |
69 | rb_init_netio_epoll(void) | |
70 | { | |
94b4fbf9 | 71 | can_do_event = 0; /* shut up gcc */ |
b57f37fb WP |
72 | ep_info = rb_malloc(sizeof(struct epoll_info)); |
73 | ep_info->pfd_size = getdtablesize(); | |
74 | ep_info->ep = epoll_create(ep_info->pfd_size); | |
75 | if(ep_info->ep < 0) | |
76 | { | |
77 | return -1; | |
78 | } | |
79 | rb_open(ep_info->ep, RB_FD_UNKNOWN, "epoll file descriptor"); | |
80 | ep_info->pfd = rb_malloc(sizeof(struct epoll_event) * ep_info->pfd_size); | |
81 | ||
82 | return 0; | |
83 | } | |
84 | ||
85 | int | |
94b4fbf9 | 86 | rb_setup_fd_epoll(rb_fde_t *F) |
b57f37fb WP |
87 | { |
88 | return 0; | |
89 | } | |
90 | ||
91 | ||
92 | /* | |
93 | * rb_setselect | |
94 | * | |
95 | * This is a needed exported function which will be called to register | |
96 | * and deregister interest in a pending IO state for a given FD. | |
97 | */ | |
98 | void | |
94b4fbf9 | 99 | rb_setselect_epoll(rb_fde_t *F, unsigned int type, PF * handler, void *client_data) |
b57f37fb WP |
100 | { |
101 | struct epoll_event ep_event; | |
102 | int old_flags = F->pflags; | |
103 | int op = -1; | |
104 | ||
105 | lrb_assert(IsFDOpen(F)); | |
106 | ||
107 | /* Update the list, even though we're not using it .. */ | |
108 | if(type & RB_SELECT_READ) | |
109 | { | |
110 | if(handler != NULL) | |
111 | F->pflags |= EPOLLIN; | |
112 | else | |
113 | F->pflags &= ~EPOLLIN; | |
114 | F->read_handler = handler; | |
115 | F->read_data = client_data; | |
116 | } | |
117 | ||
118 | if(type & RB_SELECT_WRITE) | |
119 | { | |
120 | if(handler != NULL) | |
121 | F->pflags |= EPOLLOUT; | |
122 | else | |
123 | F->pflags &= ~EPOLLOUT; | |
124 | F->write_handler = handler; | |
125 | F->write_data = client_data; | |
126 | } | |
127 | ||
128 | if(old_flags == 0 && F->pflags == 0) | |
129 | return; | |
130 | else if(F->pflags <= 0) | |
131 | op = EPOLL_CTL_DEL; | |
132 | else if(old_flags == 0 && F->pflags > 0) | |
133 | op = EPOLL_CTL_ADD; | |
134 | else if(F->pflags != old_flags) | |
135 | op = EPOLL_CTL_MOD; | |
136 | ||
137 | if(op == -1) | |
138 | return; | |
139 | ||
140 | ep_event.events = F->pflags; | |
141 | ep_event.data.ptr = F; | |
142 | ||
143 | if(op == EPOLL_CTL_ADD || op == EPOLL_CTL_MOD) | |
144 | ep_event.events |= EPOLLET; | |
145 | ||
146 | if(epoll_ctl(ep_info->ep, op, F->fd, &ep_event) != 0) | |
147 | { | |
148 | rb_lib_log("rb_setselect_epoll(): epoll_ctl failed: %s", strerror(errno)); | |
149 | abort(); | |
150 | } | |
151 | ||
152 | ||
153 | } | |
154 | ||
155 | /* | |
156 | * rb_select | |
157 | * | |
158 | * Called to do the new-style IO, courtesy of squid (like most of this | |
159 | * new IO code). This routine handles the stuff we've hidden in | |
160 | * rb_setselect and fd_table[] and calls callbacks for IO ready | |
161 | * events. | |
162 | */ | |
163 | ||
164 | int | |
165 | rb_select_epoll(long delay) | |
166 | { | |
167 | int num, i, flags, old_flags, op; | |
168 | struct epoll_event ep_event; | |
169 | int o_errno; | |
170 | void *data; | |
171 | ||
172 | num = epoll_wait(ep_info->ep, ep_info->pfd, ep_info->pfd_size, delay); | |
173 | ||
174 | /* save errno as rb_set_time() will likely clobber it */ | |
175 | o_errno = errno; | |
176 | rb_set_time(); | |
177 | errno = o_errno; | |
178 | ||
179 | if(num < 0 && !rb_ignore_errno(o_errno)) | |
180 | return RB_ERROR; | |
181 | ||
182 | if(num <= 0) | |
183 | return RB_OK; | |
184 | ||
94b4fbf9 | 185 | for(i = 0; i < num; i++) |
b57f37fb WP |
186 | { |
187 | PF *hdl; | |
188 | rb_fde_t *F = ep_info->pfd[i].data.ptr; | |
189 | old_flags = F->pflags; | |
190 | if(ep_info->pfd[i].events & (EPOLLIN | EPOLLHUP | EPOLLERR)) | |
191 | { | |
192 | hdl = F->read_handler; | |
193 | data = F->read_data; | |
194 | F->read_handler = NULL; | |
195 | F->read_data = NULL; | |
196 | if(hdl) | |
197 | { | |
198 | hdl(F, data); | |
199 | } | |
200 | } | |
201 | ||
202 | if(!IsFDOpen(F)) | |
203 | continue; | |
204 | if(ep_info->pfd[i].events & (EPOLLOUT | EPOLLHUP | EPOLLERR)) | |
205 | { | |
206 | hdl = F->write_handler; | |
207 | data = F->write_data; | |
208 | F->write_handler = NULL; | |
209 | F->write_data = NULL; | |
210 | ||
211 | if(hdl) | |
212 | { | |
213 | hdl(F, data); | |
214 | } | |
215 | } | |
216 | ||
217 | if(!IsFDOpen(F)) | |
218 | continue; | |
219 | ||
220 | flags = 0; | |
221 | ||
222 | if(F->read_handler != NULL) | |
223 | flags |= EPOLLIN; | |
224 | if(F->write_handler != NULL) | |
225 | flags |= EPOLLOUT; | |
226 | ||
227 | if(old_flags != flags) | |
228 | { | |
229 | if(flags == 0) | |
230 | op = EPOLL_CTL_DEL; | |
231 | else | |
232 | op = EPOLL_CTL_MOD; | |
233 | F->pflags = ep_event.events = flags; | |
234 | ep_event.data.ptr = F; | |
235 | if(op == EPOLL_CTL_MOD || op == EPOLL_CTL_ADD) | |
236 | ep_event.events |= EPOLLET; | |
237 | ||
238 | if(epoll_ctl(ep_info->ep, op, F->fd, &ep_event) != 0) | |
239 | { | |
240 | rb_lib_log("rb_select_epoll(): epoll_ctl failed: %s", | |
241 | strerror(errno)); | |
242 | } | |
243 | } | |
244 | ||
245 | } | |
246 | return RB_OK; | |
247 | } | |
248 | ||
94b4fbf9 | 249 | #ifdef EPOLL_SCHED_EVENT |
b57f37fb WP |
250 | int |
251 | rb_epoll_supports_event(void) | |
252 | { | |
253 | /* try to detect at runtime if everything we need actually works */ | |
254 | timer_t timer; | |
255 | struct sigevent ev; | |
256 | int fd; | |
257 | sigset_t set; | |
94b4fbf9 | 258 | |
b57f37fb WP |
259 | if(can_do_event == 1) |
260 | return 1; | |
261 | if(can_do_event == -1) | |
262 | return 0; | |
94b4fbf9 VY |
263 | |
264 | #ifdef USE_TIMERFD_CREATE | |
265 | if((fd = timerfd_create(CLOCK_REALTIME, 0)) >= 0) | |
266 | { | |
267 | close(fd); | |
268 | can_do_event = 1; | |
269 | can_do_timerfd = 1; | |
270 | return 0; | |
271 | } | |
272 | #endif | |
273 | ||
b57f37fb WP |
274 | ev.sigev_signo = SIGVTALRM; |
275 | ev.sigev_notify = SIGEV_SIGNAL; | |
276 | if(timer_create(CLOCK_REALTIME, &ev, &timer) != 0) | |
277 | { | |
278 | can_do_event = -1; | |
279 | return 0; | |
280 | } | |
281 | timer_delete(timer); | |
282 | sigemptyset(&set); | |
283 | fd = signalfd(-1, &set, 0); | |
94b4fbf9 | 284 | if(fd < 0) |
b57f37fb WP |
285 | { |
286 | can_do_event = -1; | |
287 | return 0; | |
288 | } | |
289 | close(fd); | |
290 | can_do_event = 1; | |
291 | return 1; | |
292 | } | |
293 | ||
294 | ||
295 | /* bleh..work around a glibc header bug on 32bit systems */ | |
94b4fbf9 VY |
296 | struct our_signalfd_siginfo |
297 | { | |
298 | uint32_t signo; | |
299 | int32_t err; | |
300 | int32_t code; | |
301 | uint32_t pid; | |
302 | uint32_t uid; | |
303 | int32_t fd; | |
304 | uint32_t tid; | |
305 | uint32_t band; | |
306 | uint32_t overrun; | |
307 | uint32_t trapno; | |
308 | int32_t status; | |
309 | int32_t svint; | |
310 | uint64_t svptr; | |
311 | uint64_t utime; | |
312 | uint64_t stime; | |
313 | uint64_t addr; | |
314 | uint8_t pad[48]; | |
b57f37fb WP |
315 | }; |
316 | ||
317 | ||
318 | #define SIGFDIOV_COUNT 16 | |
94b4fbf9 | 319 | static void |
b57f37fb WP |
320 | signalfd_handler(rb_fde_t *F, void *data) |
321 | { | |
322 | static struct our_signalfd_siginfo fdsig[SIGFDIOV_COUNT]; | |
323 | static struct iovec iov[SIGFDIOV_COUNT]; | |
324 | struct ev_entry *ev; | |
325 | int ret, x; | |
94b4fbf9 | 326 | |
b57f37fb WP |
327 | for(x = 0; x < SIGFDIOV_COUNT; x++) |
328 | { | |
329 | iov[x].iov_base = &fdsig[x]; | |
330 | iov[x].iov_len = sizeof(struct our_signalfd_siginfo); | |
331 | } | |
332 | ||
333 | while(1) | |
334 | { | |
335 | ret = readv(rb_get_fd(F), iov, SIGFDIOV_COUNT); | |
336 | if(ret == 0 || (ret < 0 && !rb_ignore_errno(errno))) | |
337 | { | |
338 | rb_close(F); | |
339 | rb_epoll_init_event(); | |
340 | return; | |
341 | } | |
94b4fbf9 VY |
342 | |
343 | if(ret < 0) | |
b57f37fb WP |
344 | { |
345 | rb_setselect(F, RB_SELECT_READ, signalfd_handler, NULL); | |
346 | return; | |
347 | } | |
94b4fbf9 | 348 | for(x = 0; x < ret / (int)sizeof(struct our_signalfd_siginfo); x++) |
b57f37fb | 349 | { |
94b4fbf9 | 350 | ev = (struct ev_entry *)((uintptr_t)fdsig[x].svptr); |
b57f37fb WP |
351 | if(ev == NULL) |
352 | continue; | |
353 | rb_run_event(ev); | |
354 | } | |
355 | } | |
356 | } | |
357 | ||
358 | void | |
359 | rb_epoll_init_event(void) | |
360 | { | |
94b4fbf9 | 361 | |
b57f37fb WP |
362 | sigset_t ss; |
363 | rb_fde_t *F; | |
364 | int sfd; | |
94b4fbf9 VY |
365 | rb_epoll_supports_event(); |
366 | if(!can_do_timerfd) | |
367 | { | |
368 | sigemptyset(&ss); | |
369 | sigaddset(&ss, RTSIGNAL); | |
370 | sigprocmask(SIG_BLOCK, &ss, 0); | |
371 | sigemptyset(&ss); | |
372 | sigaddset(&ss, RTSIGNAL); | |
373 | sfd = signalfd(-1, &ss, 0); | |
374 | if(sfd == -1) | |
375 | { | |
376 | can_do_event = -1; | |
377 | return; | |
378 | } | |
379 | F = rb_open(sfd, RB_FD_UNKNOWN, "signalfd"); | |
380 | rb_set_nb(F); | |
381 | signalfd_handler(F, NULL); | |
b57f37fb | 382 | } |
b57f37fb WP |
383 | } |
384 | ||
94b4fbf9 VY |
385 | static int |
386 | rb_epoll_sched_event_signalfd(struct ev_entry *event, int when) | |
b57f37fb WP |
387 | { |
388 | timer_t *id; | |
389 | struct sigevent ev; | |
390 | struct itimerspec ts; | |
391 | ||
392 | memset(&ev, 0, sizeof(&ev)); | |
393 | event->comm_ptr = rb_malloc(sizeof(timer_t)); | |
394 | id = event->comm_ptr; | |
395 | ev.sigev_notify = SIGEV_SIGNAL; | |
396 | ev.sigev_signo = RTSIGNAL; | |
397 | ev.sigev_value.sival_ptr = event; | |
398 | ||
94b4fbf9 | 399 | if(timer_create(CLOCK_REALTIME, &ev, id) < 0) |
b57f37fb WP |
400 | { |
401 | rb_lib_log("timer_create: %s\n", strerror(errno)); | |
402 | return 0; | |
403 | } | |
404 | memset(&ts, 0, sizeof(ts)); | |
405 | ts.it_value.tv_sec = when; | |
406 | ts.it_value.tv_nsec = 0; | |
407 | if(event->frequency != 0) | |
408 | ts.it_interval = ts.it_value; | |
94b4fbf9 VY |
409 | |
410 | if(timer_settime(*id, 0, &ts, NULL) < 0) | |
b57f37fb WP |
411 | { |
412 | rb_lib_log("timer_settime: %s\n", strerror(errno)); | |
413 | return 0; | |
414 | } | |
415 | return 1; | |
416 | } | |
417 | ||
94b4fbf9 VY |
418 | #ifdef USE_TIMERFD_CREATE |
419 | static void | |
420 | rb_read_timerfd(rb_fde_t *F, void *data) | |
421 | { | |
422 | struct ev_entry *event = (struct ev_entry *)data; | |
423 | int retlen; | |
424 | uint64_t count; | |
425 | ||
426 | if(event == NULL) | |
427 | { | |
428 | rb_close(F); | |
429 | return; | |
430 | } | |
431 | ||
432 | retlen = rb_read(F, &count, sizeof(count)); | |
433 | if(retlen == 0 || (retlen < 0 && !rb_ignore_errno(errno))) | |
434 | { | |
435 | rb_close(F); | |
436 | rb_lib_log("rb_read_timerfd: timerfd[%s] closed on error: %s", event->name, | |
437 | strerror(errno)); | |
438 | return; | |
439 | } | |
440 | rb_setselect(F, RB_SELECT_READ, rb_read_timerfd, event); | |
441 | rb_run_event(event); | |
442 | } | |
443 | ||
444 | ||
445 | static int | |
446 | rb_epoll_sched_event_timerfd(struct ev_entry *event, int when) | |
447 | { | |
448 | struct itimerspec ts; | |
449 | static char buf[FD_DESC_SZ + 8]; | |
450 | int fd; | |
451 | rb_fde_t *F; | |
452 | ||
453 | if((fd = timerfd_create(CLOCK_REALTIME, 0)) < 0) | |
454 | { | |
455 | rb_lib_log("timerfd_create: %s\n", strerror(errno)); | |
456 | return 0; | |
457 | } | |
458 | ||
459 | memset(&ts, 0, sizeof(ts)); | |
460 | ts.it_value.tv_sec = when; | |
461 | ts.it_value.tv_nsec = 0; | |
462 | if(event->frequency != 0) | |
463 | ts.it_interval = ts.it_value; | |
464 | ||
465 | if(timerfd_settime(fd, 0, &ts, NULL) < 0) | |
466 | { | |
467 | rb_lib_log("timerfd_settime: %s\n", strerror(errno)); | |
468 | close(fd); | |
469 | return 0; | |
470 | } | |
471 | rb_snprintf(buf, sizeof(buf), "timerfd: %s", event->name); | |
472 | F = rb_open(fd, RB_FD_UNKNOWN, buf); | |
473 | rb_set_nb(F); | |
474 | event->comm_ptr = F; | |
475 | rb_setselect(F, RB_SELECT_READ, rb_read_timerfd, event); | |
476 | return 1; | |
477 | } | |
478 | #endif | |
479 | ||
480 | ||
481 | ||
482 | int | |
483 | rb_epoll_sched_event(struct ev_entry *event, int when) | |
484 | { | |
485 | #ifdef USE_TIMERFD_CREATE | |
486 | if(can_do_timerfd) | |
487 | { | |
488 | return rb_epoll_sched_event_timerfd(event, when); | |
489 | } | |
490 | #endif | |
491 | return rb_epoll_sched_event_signalfd(event, when); | |
492 | } | |
493 | ||
b57f37fb WP |
494 | void |
495 | rb_epoll_unsched_event(struct ev_entry *event) | |
496 | { | |
94b4fbf9 VY |
497 | #ifdef USE_TIMERFD_CREATE |
498 | if(can_do_timerfd) | |
499 | { | |
500 | rb_close((rb_fde_t *)event->comm_ptr); | |
501 | event->comm_ptr = NULL; | |
502 | return; | |
503 | } | |
504 | #endif | |
505 | timer_delete(*((timer_t *) event->comm_ptr)); | |
b57f37fb WP |
506 | rb_free(event->comm_ptr); |
507 | event->comm_ptr = NULL; | |
508 | } | |
509 | #endif /* EPOLL_SCHED_EVENT */ | |
510 | ||
511 | #else /* epoll not supported here */ | |
512 | int | |
513 | rb_init_netio_epoll(void) | |
514 | { | |
515 | return ENOSYS; | |
516 | } | |
517 | ||
518 | void | |
94b4fbf9 | 519 | rb_setselect_epoll(rb_fde_t *F, unsigned int type, PF * handler, void *client_data) |
b57f37fb WP |
520 | { |
521 | errno = ENOSYS; | |
522 | return; | |
523 | } | |
524 | ||
525 | int | |
526 | rb_select_epoll(long delay) | |
527 | { | |
528 | errno = ENOSYS; | |
529 | return -1; | |
530 | } | |
531 | ||
532 | int | |
94b4fbf9 | 533 | rb_setup_fd_epoll(rb_fde_t *F) |
b57f37fb WP |
534 | { |
535 | errno = ENOSYS; | |
536 | return -1; | |
537 | } | |
538 | ||
539 | ||
540 | #endif | |
541 | ||
542 | #if !defined(USING_EPOLL) || !defined(EPOLL_SCHED_EVENT) | |
94b4fbf9 VY |
543 | void |
544 | rb_epoll_init_event(void) | |
b57f37fb WP |
545 | { |
546 | return; | |
547 | } | |
548 | ||
549 | int | |
550 | rb_epoll_sched_event(struct ev_entry *event, int when) | |
551 | { | |
552 | errno = ENOSYS; | |
553 | return -1; | |
554 | } | |
555 | ||
556 | void | |
557 | rb_epoll_unsched_event(struct ev_entry *event) | |
558 | { | |
559 | return; | |
560 | } | |
561 | ||
562 | int | |
563 | rb_epoll_supports_event(void) | |
564 | { | |
565 | errno = ENOSYS; | |
566 | return 0; | |
567 | } | |
568 | #endif /* !USING_EPOLL || !EPOLL_SCHED_EVENT */ |