]>
Commit | Line | Data |
---|---|---|
db137867 AC |
1 | /* |
2 | * ircd-ratbox: A slightly useful ircd. | |
3 | * epoll.c: Linux epoll compatible network routines. | |
4 | * | |
5 | * Copyright (C) 1990 Jarkko Oikarinen and University of Oulu, Co Center | |
6 | * Copyright (C) 1996-2002 Hybrid Development Team | |
7 | * Copyright (C) 2001 Adrian Chadd <adrian@creative.net.au> | |
8 | * Copyright (C) 2002-2005 ircd-ratbox development team | |
9 | * Copyright (C) 2002 Aaron Sethman <androsyn@ratbox.org> | |
10 | * | |
11 | * This program is free software; you can redistribute it and/or modify | |
12 | * it under the terms of the GNU General Public License as published by | |
13 | * the Free Software Foundation; either version 2 of the License, or | |
14 | * (at your option) any later version. | |
15 | * | |
16 | * This program is distributed in the hope that it will be useful, | |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | * GNU General Public License for more details. | |
20 | * | |
21 | * You should have received a copy of the GNU General Public License | |
22 | * along with this program; if not, write to the Free Software | |
23 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 | |
24 | * USA | |
25 | * | |
db137867 | 26 | */ |
c056dba2 | 27 | |
db137867 AC |
28 | #define _GNU_SOURCE 1 |
29 | ||
fe037171 EM |
30 | #include <librb_config.h> |
31 | #include <rb_lib.h> | |
db137867 AC |
32 | #include <commio-int.h> |
33 | #include <event-int.h> | |
3c586ccf | 34 | #if defined(HAVE_EPOLL_CTL) && defined(HAVE_SYS_EPOLL_H) |
db137867 AC |
35 | #define USING_EPOLL |
36 | #include <fcntl.h> | |
37 | #include <sys/epoll.h> | |
38 | ||
3c586ccf | 39 | #if defined(HAVE_SIGNALFD) && defined(HAVE_SYS_SIGNALFD_H) && defined(USE_TIMER_CREATE) |
db137867 AC |
40 | #include <signal.h> |
41 | #include <sys/signalfd.h> | |
42 | #include <sys/uio.h> | |
43 | #define EPOLL_SCHED_EVENT 1 | |
44 | #endif | |
45 | ||
3c586ccf | 46 | #ifdef USE_TIMERFD_CREATE |
3202e249 VY |
47 | #include <sys/timerfd.h> |
48 | #endif | |
49 | ||
db137867 AC |
50 | #define RTSIGNAL SIGRTMIN |
51 | struct epoll_info | |
52 | { | |
53 | int ep; | |
54 | struct epoll_event *pfd; | |
55 | int pfd_size; | |
56 | }; | |
57 | ||
58 | static struct epoll_info *ep_info; | |
59 | static int can_do_event; | |
3202e249 | 60 | static int can_do_timerfd; |
db137867 AC |
61 | |
62 | /* | |
63 | * rb_init_netio | |
64 | * | |
65 | * This is a needed exported function which will be called to initialise | |
66 | * the network loop code. | |
67 | */ | |
68 | int | |
69 | rb_init_netio_epoll(void) | |
70 | { | |
3202e249 | 71 | can_do_event = 0; /* shut up gcc */ |
030272f3 | 72 | can_do_timerfd = 0; |
db137867 AC |
73 | ep_info = rb_malloc(sizeof(struct epoll_info)); |
74 | ep_info->pfd_size = getdtablesize(); | |
75 | ep_info->ep = epoll_create(ep_info->pfd_size); | |
76 | if(ep_info->ep < 0) | |
77 | { | |
78 | return -1; | |
79 | } | |
80 | rb_open(ep_info->ep, RB_FD_UNKNOWN, "epoll file descriptor"); | |
81 | ep_info->pfd = rb_malloc(sizeof(struct epoll_event) * ep_info->pfd_size); | |
82 | ||
83 | return 0; | |
84 | } | |
85 | ||
86 | int | |
8679c0fe | 87 | rb_setup_fd_epoll(rb_fde_t *F __attribute__((unused))) |
db137867 AC |
88 | { |
89 | return 0; | |
90 | } | |
91 | ||
92 | ||
93 | /* | |
94 | * rb_setselect | |
95 | * | |
96 | * This is a needed exported function which will be called to register | |
97 | * and deregister interest in a pending IO state for a given FD. | |
98 | */ | |
99 | void | |
3202e249 | 100 | rb_setselect_epoll(rb_fde_t *F, unsigned int type, PF * handler, void *client_data) |
db137867 AC |
101 | { |
102 | struct epoll_event ep_event; | |
103 | int old_flags = F->pflags; | |
104 | int op = -1; | |
105 | ||
106 | lrb_assert(IsFDOpen(F)); | |
107 | ||
108 | /* Update the list, even though we're not using it .. */ | |
109 | if(type & RB_SELECT_READ) | |
110 | { | |
111 | if(handler != NULL) | |
112 | F->pflags |= EPOLLIN; | |
113 | else | |
114 | F->pflags &= ~EPOLLIN; | |
115 | F->read_handler = handler; | |
116 | F->read_data = client_data; | |
117 | } | |
118 | ||
119 | if(type & RB_SELECT_WRITE) | |
120 | { | |
121 | if(handler != NULL) | |
122 | F->pflags |= EPOLLOUT; | |
123 | else | |
124 | F->pflags &= ~EPOLLOUT; | |
125 | F->write_handler = handler; | |
126 | F->write_data = client_data; | |
127 | } | |
128 | ||
129 | if(old_flags == 0 && F->pflags == 0) | |
130 | return; | |
131 | else if(F->pflags <= 0) | |
132 | op = EPOLL_CTL_DEL; | |
133 | else if(old_flags == 0 && F->pflags > 0) | |
134 | op = EPOLL_CTL_ADD; | |
135 | else if(F->pflags != old_flags) | |
136 | op = EPOLL_CTL_MOD; | |
137 | ||
138 | if(op == -1) | |
139 | return; | |
140 | ||
141 | ep_event.events = F->pflags; | |
142 | ep_event.data.ptr = F; | |
143 | ||
144 | if(op == EPOLL_CTL_ADD || op == EPOLL_CTL_MOD) | |
145 | ep_event.events |= EPOLLET; | |
146 | ||
147 | if(epoll_ctl(ep_info->ep, op, F->fd, &ep_event) != 0) | |
148 | { | |
149 | rb_lib_log("rb_setselect_epoll(): epoll_ctl failed: %s", strerror(errno)); | |
150 | abort(); | |
151 | } | |
152 | ||
153 | ||
154 | } | |
155 | ||
156 | /* | |
157 | * rb_select | |
158 | * | |
159 | * Called to do the new-style IO, courtesy of squid (like most of this | |
160 | * new IO code). This routine handles the stuff we've hidden in | |
161 | * rb_setselect and fd_table[] and calls callbacks for IO ready | |
162 | * events. | |
163 | */ | |
164 | ||
165 | int | |
166 | rb_select_epoll(long delay) | |
167 | { | |
168 | int num, i, flags, old_flags, op; | |
169 | struct epoll_event ep_event; | |
170 | int o_errno; | |
171 | void *data; | |
172 | ||
173 | num = epoll_wait(ep_info->ep, ep_info->pfd, ep_info->pfd_size, delay); | |
174 | ||
175 | /* save errno as rb_set_time() will likely clobber it */ | |
176 | o_errno = errno; | |
177 | rb_set_time(); | |
178 | errno = o_errno; | |
179 | ||
180 | if(num < 0 && !rb_ignore_errno(o_errno)) | |
181 | return RB_ERROR; | |
182 | ||
183 | if(num <= 0) | |
184 | return RB_OK; | |
185 | ||
3202e249 | 186 | for(i = 0; i < num; i++) |
db137867 AC |
187 | { |
188 | PF *hdl; | |
189 | rb_fde_t *F = ep_info->pfd[i].data.ptr; | |
190 | old_flags = F->pflags; | |
191 | if(ep_info->pfd[i].events & (EPOLLIN | EPOLLHUP | EPOLLERR)) | |
192 | { | |
193 | hdl = F->read_handler; | |
194 | data = F->read_data; | |
195 | F->read_handler = NULL; | |
196 | F->read_data = NULL; | |
197 | if(hdl) | |
198 | { | |
199 | hdl(F, data); | |
200 | } | |
201 | } | |
202 | ||
203 | if(!IsFDOpen(F)) | |
204 | continue; | |
205 | if(ep_info->pfd[i].events & (EPOLLOUT | EPOLLHUP | EPOLLERR)) | |
206 | { | |
207 | hdl = F->write_handler; | |
208 | data = F->write_data; | |
209 | F->write_handler = NULL; | |
210 | F->write_data = NULL; | |
211 | ||
212 | if(hdl) | |
213 | { | |
214 | hdl(F, data); | |
215 | } | |
216 | } | |
217 | ||
218 | if(!IsFDOpen(F)) | |
219 | continue; | |
220 | ||
221 | flags = 0; | |
222 | ||
223 | if(F->read_handler != NULL) | |
224 | flags |= EPOLLIN; | |
225 | if(F->write_handler != NULL) | |
226 | flags |= EPOLLOUT; | |
227 | ||
228 | if(old_flags != flags) | |
229 | { | |
230 | if(flags == 0) | |
231 | op = EPOLL_CTL_DEL; | |
232 | else | |
233 | op = EPOLL_CTL_MOD; | |
234 | F->pflags = ep_event.events = flags; | |
235 | ep_event.data.ptr = F; | |
236 | if(op == EPOLL_CTL_MOD || op == EPOLL_CTL_ADD) | |
237 | ep_event.events |= EPOLLET; | |
238 | ||
239 | if(epoll_ctl(ep_info->ep, op, F->fd, &ep_event) != 0) | |
240 | { | |
241 | rb_lib_log("rb_select_epoll(): epoll_ctl failed: %s", | |
242 | strerror(errno)); | |
243 | } | |
244 | } | |
245 | ||
246 | } | |
247 | return RB_OK; | |
248 | } | |
249 | ||
3202e249 | 250 | #ifdef EPOLL_SCHED_EVENT |
db137867 AC |
251 | int |
252 | rb_epoll_supports_event(void) | |
253 | { | |
254 | /* try to detect at runtime if everything we need actually works */ | |
255 | timer_t timer; | |
256 | struct sigevent ev; | |
21acd096 | 257 | struct stat st; |
db137867 AC |
258 | int fd; |
259 | sigset_t set; | |
3202e249 | 260 | |
db137867 AC |
261 | if(can_do_event == 1) |
262 | return 1; | |
263 | if(can_do_event == -1) | |
264 | return 0; | |
3202e249 | 265 | |
21acd096 AS |
266 | /* Check for openvz..it has a broken timerfd.. */ |
267 | if(stat("/proc/user_beancounters", &st) == 0) | |
268 | { | |
269 | can_do_event = -1; | |
270 | return 0; | |
271 | } | |
272 | ||
3202e249 VY |
273 | #ifdef USE_TIMERFD_CREATE |
274 | if((fd = timerfd_create(CLOCK_REALTIME, 0)) >= 0) | |
275 | { | |
276 | close(fd); | |
277 | can_do_event = 1; | |
278 | can_do_timerfd = 1; | |
030272f3 | 279 | return 1; |
3202e249 VY |
280 | } |
281 | #endif | |
282 | ||
db137867 AC |
283 | ev.sigev_signo = SIGVTALRM; |
284 | ev.sigev_notify = SIGEV_SIGNAL; | |
285 | if(timer_create(CLOCK_REALTIME, &ev, &timer) != 0) | |
286 | { | |
287 | can_do_event = -1; | |
288 | return 0; | |
289 | } | |
290 | timer_delete(timer); | |
291 | sigemptyset(&set); | |
292 | fd = signalfd(-1, &set, 0); | |
3202e249 | 293 | if(fd < 0) |
db137867 AC |
294 | { |
295 | can_do_event = -1; | |
296 | return 0; | |
297 | } | |
298 | close(fd); | |
299 | can_do_event = 1; | |
300 | return 1; | |
301 | } | |
302 | ||
303 | ||
304 | /* bleh..work around a glibc header bug on 32bit systems */ | |
3202e249 VY |
305 | struct our_signalfd_siginfo |
306 | { | |
307 | uint32_t signo; | |
308 | int32_t err; | |
309 | int32_t code; | |
310 | uint32_t pid; | |
311 | uint32_t uid; | |
312 | int32_t fd; | |
313 | uint32_t tid; | |
314 | uint32_t band; | |
315 | uint32_t overrun; | |
316 | uint32_t trapno; | |
317 | int32_t status; | |
318 | int32_t svint; | |
319 | uint64_t svptr; | |
320 | uint64_t utime; | |
321 | uint64_t stime; | |
322 | uint64_t addr; | |
323 | uint8_t pad[48]; | |
db137867 AC |
324 | }; |
325 | ||
326 | ||
327 | #define SIGFDIOV_COUNT 16 | |
3202e249 | 328 | static void |
8679c0fe | 329 | signalfd_handler(rb_fde_t *F, void *data __attribute__((unused))) |
db137867 AC |
330 | { |
331 | static struct our_signalfd_siginfo fdsig[SIGFDIOV_COUNT]; | |
332 | static struct iovec iov[SIGFDIOV_COUNT]; | |
333 | struct ev_entry *ev; | |
334 | int ret, x; | |
3202e249 | 335 | |
db137867 AC |
336 | for(x = 0; x < SIGFDIOV_COUNT; x++) |
337 | { | |
338 | iov[x].iov_base = &fdsig[x]; | |
339 | iov[x].iov_len = sizeof(struct our_signalfd_siginfo); | |
340 | } | |
341 | ||
342 | while(1) | |
343 | { | |
344 | ret = readv(rb_get_fd(F), iov, SIGFDIOV_COUNT); | |
030272f3 | 345 | |
db137867 AC |
346 | if(ret == 0 || (ret < 0 && !rb_ignore_errno(errno))) |
347 | { | |
348 | rb_close(F); | |
349 | rb_epoll_init_event(); | |
350 | return; | |
351 | } | |
3202e249 VY |
352 | |
353 | if(ret < 0) | |
db137867 AC |
354 | { |
355 | rb_setselect(F, RB_SELECT_READ, signalfd_handler, NULL); | |
356 | return; | |
357 | } | |
3202e249 | 358 | for(x = 0; x < ret / (int)sizeof(struct our_signalfd_siginfo); x++) |
db137867 | 359 | { |
030272f3 VY |
360 | #if __WORDSIZE == 32 && defined(__sparc__) |
361 | uint32_t *q = (uint32_t *)&fdsig[x].svptr; | |
362 | ev = (struct ev_entry *)q[0]; | |
363 | #else | |
364 | ev = (struct ev_entry *)(uintptr_t)(fdsig[x].svptr); | |
365 | ||
366 | #endif | |
db137867 AC |
367 | if(ev == NULL) |
368 | continue; | |
0e651b14 | 369 | rb_run_one_event(ev); |
db137867 AC |
370 | } |
371 | } | |
372 | } | |
373 | ||
374 | void | |
375 | rb_epoll_init_event(void) | |
376 | { | |
3202e249 | 377 | |
db137867 AC |
378 | sigset_t ss; |
379 | rb_fde_t *F; | |
380 | int sfd; | |
3202e249 VY |
381 | rb_epoll_supports_event(); |
382 | if(!can_do_timerfd) | |
383 | { | |
384 | sigemptyset(&ss); | |
385 | sigaddset(&ss, RTSIGNAL); | |
386 | sigprocmask(SIG_BLOCK, &ss, 0); | |
387 | sigemptyset(&ss); | |
388 | sigaddset(&ss, RTSIGNAL); | |
389 | sfd = signalfd(-1, &ss, 0); | |
390 | if(sfd == -1) | |
391 | { | |
392 | can_do_event = -1; | |
393 | return; | |
394 | } | |
395 | F = rb_open(sfd, RB_FD_UNKNOWN, "signalfd"); | |
396 | rb_set_nb(F); | |
397 | signalfd_handler(F, NULL); | |
db137867 | 398 | } |
db137867 AC |
399 | } |
400 | ||
3202e249 VY |
401 | static int |
402 | rb_epoll_sched_event_signalfd(struct ev_entry *event, int when) | |
db137867 AC |
403 | { |
404 | timer_t *id; | |
405 | struct sigevent ev; | |
406 | struct itimerspec ts; | |
407 | ||
7db54a1f | 408 | memset(&ev, 0, sizeof(ev)); |
db137867 AC |
409 | event->comm_ptr = rb_malloc(sizeof(timer_t)); |
410 | id = event->comm_ptr; | |
411 | ev.sigev_notify = SIGEV_SIGNAL; | |
412 | ev.sigev_signo = RTSIGNAL; | |
413 | ev.sigev_value.sival_ptr = event; | |
414 | ||
3202e249 | 415 | if(timer_create(CLOCK_REALTIME, &ev, id) < 0) |
db137867 AC |
416 | { |
417 | rb_lib_log("timer_create: %s\n", strerror(errno)); | |
418 | return 0; | |
419 | } | |
420 | memset(&ts, 0, sizeof(ts)); | |
421 | ts.it_value.tv_sec = when; | |
422 | ts.it_value.tv_nsec = 0; | |
423 | if(event->frequency != 0) | |
424 | ts.it_interval = ts.it_value; | |
3202e249 VY |
425 | |
426 | if(timer_settime(*id, 0, &ts, NULL) < 0) | |
db137867 AC |
427 | { |
428 | rb_lib_log("timer_settime: %s\n", strerror(errno)); | |
429 | return 0; | |
430 | } | |
431 | return 1; | |
432 | } | |
433 | ||
3202e249 VY |
434 | #ifdef USE_TIMERFD_CREATE |
435 | static void | |
436 | rb_read_timerfd(rb_fde_t *F, void *data) | |
437 | { | |
438 | struct ev_entry *event = (struct ev_entry *)data; | |
439 | int retlen; | |
440 | uint64_t count; | |
441 | ||
442 | if(event == NULL) | |
443 | { | |
444 | rb_close(F); | |
445 | return; | |
446 | } | |
447 | ||
448 | retlen = rb_read(F, &count, sizeof(count)); | |
030272f3 | 449 | |
3202e249 VY |
450 | if(retlen == 0 || (retlen < 0 && !rb_ignore_errno(errno))) |
451 | { | |
452 | rb_close(F); | |
453 | rb_lib_log("rb_read_timerfd: timerfd[%s] closed on error: %s", event->name, | |
454 | strerror(errno)); | |
455 | return; | |
456 | } | |
457 | rb_setselect(F, RB_SELECT_READ, rb_read_timerfd, event); | |
0e651b14 | 458 | rb_run_one_event(event); |
3202e249 VY |
459 | } |
460 | ||
461 | ||
462 | static int | |
463 | rb_epoll_sched_event_timerfd(struct ev_entry *event, int when) | |
464 | { | |
465 | struct itimerspec ts; | |
466 | static char buf[FD_DESC_SZ + 8]; | |
467 | int fd; | |
468 | rb_fde_t *F; | |
469 | ||
470 | if((fd = timerfd_create(CLOCK_REALTIME, 0)) < 0) | |
471 | { | |
472 | rb_lib_log("timerfd_create: %s\n", strerror(errno)); | |
473 | return 0; | |
474 | } | |
475 | ||
476 | memset(&ts, 0, sizeof(ts)); | |
477 | ts.it_value.tv_sec = when; | |
478 | ts.it_value.tv_nsec = 0; | |
479 | if(event->frequency != 0) | |
480 | ts.it_interval = ts.it_value; | |
481 | ||
482 | if(timerfd_settime(fd, 0, &ts, NULL) < 0) | |
483 | { | |
484 | rb_lib_log("timerfd_settime: %s\n", strerror(errno)); | |
485 | close(fd); | |
486 | return 0; | |
487 | } | |
5203cba5 | 488 | snprintf(buf, sizeof(buf), "timerfd: %s", event->name); |
3202e249 VY |
489 | F = rb_open(fd, RB_FD_UNKNOWN, buf); |
490 | rb_set_nb(F); | |
491 | event->comm_ptr = F; | |
492 | rb_setselect(F, RB_SELECT_READ, rb_read_timerfd, event); | |
493 | return 1; | |
494 | } | |
495 | #endif | |
496 | ||
497 | ||
498 | ||
499 | int | |
500 | rb_epoll_sched_event(struct ev_entry *event, int when) | |
501 | { | |
502 | #ifdef USE_TIMERFD_CREATE | |
503 | if(can_do_timerfd) | |
504 | { | |
505 | return rb_epoll_sched_event_timerfd(event, when); | |
506 | } | |
507 | #endif | |
508 | return rb_epoll_sched_event_signalfd(event, when); | |
509 | } | |
510 | ||
db137867 AC |
511 | void |
512 | rb_epoll_unsched_event(struct ev_entry *event) | |
513 | { | |
3202e249 VY |
514 | #ifdef USE_TIMERFD_CREATE |
515 | if(can_do_timerfd) | |
516 | { | |
517 | rb_close((rb_fde_t *)event->comm_ptr); | |
518 | event->comm_ptr = NULL; | |
519 | return; | |
520 | } | |
521 | #endif | |
522 | timer_delete(*((timer_t *) event->comm_ptr)); | |
db137867 AC |
523 | rb_free(event->comm_ptr); |
524 | event->comm_ptr = NULL; | |
525 | } | |
526 | #endif /* EPOLL_SCHED_EVENT */ | |
527 | ||
528 | #else /* epoll not supported here */ | |
529 | int | |
530 | rb_init_netio_epoll(void) | |
531 | { | |
532 | return ENOSYS; | |
533 | } | |
534 | ||
535 | void | |
8679c0fe | 536 | rb_setselect_epoll(rb_fde_t *F __attribute__((unused)), unsigned int type __attribute__((unused)), PF * handler __attribute__((unused)), void *client_data __attribute__((unused))) |
db137867 AC |
537 | { |
538 | errno = ENOSYS; | |
539 | return; | |
540 | } | |
541 | ||
542 | int | |
8679c0fe | 543 | rb_select_epoll(long delay __attribute__((unused))) |
db137867 AC |
544 | { |
545 | errno = ENOSYS; | |
546 | return -1; | |
547 | } | |
548 | ||
549 | int | |
8679c0fe | 550 | rb_setup_fd_epoll(rb_fde_t *F __attribute__((unused))) |
db137867 AC |
551 | { |
552 | errno = ENOSYS; | |
553 | return -1; | |
554 | } | |
555 | ||
556 | ||
557 | #endif | |
558 | ||
559 | #if !defined(USING_EPOLL) || !defined(EPOLL_SCHED_EVENT) | |
3202e249 VY |
560 | void |
561 | rb_epoll_init_event(void) | |
db137867 AC |
562 | { |
563 | return; | |
564 | } | |
565 | ||
566 | int | |
8679c0fe | 567 | rb_epoll_sched_event(struct ev_entry *event __attribute__((unused)), int when __attribute__((unused))) |
db137867 AC |
568 | { |
569 | errno = ENOSYS; | |
570 | return -1; | |
571 | } | |
572 | ||
573 | void | |
8679c0fe | 574 | rb_epoll_unsched_event(struct ev_entry *event __attribute__((unused))) |
db137867 AC |
575 | { |
576 | return; | |
577 | } | |
578 | ||
579 | int | |
580 | rb_epoll_supports_event(void) | |
581 | { | |
582 | errno = ENOSYS; | |
583 | return 0; | |
584 | } | |
585 | #endif /* !USING_EPOLL || !EPOLL_SCHED_EVENT */ |