libosmocore/src/core/select.c

732 lines
20 KiB
C

/*! \file select.c
* select filedescriptor handling.
* Taken from:
* userspace logging daemon for the iptables ULOG target
* of the linux 2.4 netfilter subsystem. */
/*
* (C) 2000-2020 by Harald Welte <laforge@gnumonks.org>
* All Rights Reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdbool.h>
#include <errno.h>
#include <osmocom/core/select.h>
#include <osmocom/core/linuxlist.h>
#include <osmocom/core/timer.h>
#include <osmocom/core/logging.h>
#include <osmocom/core/talloc.h>
#include <osmocom/core/utils.h>
#include <osmocom/core/stat_item.h>
#include <osmocom/core/stats_tcp.h>
#include "config.h"
#if defined(HAVE_SYS_SELECT_H) && defined(HAVE_POLL_H)
#include <sys/select.h>
#include <poll.h>
/*! \addtogroup select
* @{
* select() loop abstraction
*
* \file select.c */
/* keep a set of file descriptors per-thread, so that each thread can have its own
* distinct set of file descriptors to interact with */
static __thread int maxfd = 0;
static __thread struct llist_head osmo_fds; /* TLS cannot use LLIST_HEAD() */
static __thread int unregistered_count;
/* Array of struct osmo_fd * (size "max_fd") ordered by "ofd->fd" */
static __thread struct {
struct osmo_fd **table;
unsigned int size;
} osmo_fd_lookup;
static void osmo_fd_lookup_table_extend(unsigned int new_max_fd)
{
unsigned int min_new_size;
unsigned int pw2;
unsigned int new_size;
/* First calculate the minimally required new size of the array in bytes: */
min_new_size = (new_max_fd + 1) * sizeof(struct osmo_fd *);
/* Now find the lower power of two of min_new_size (in bytes): */
pw2 = 32 - __builtin_clz(min_new_size);
/* get next (upper side) power of 2: */
pw2++;
OSMO_ASSERT(pw2 <= 31); /* Avoid shifting more than 31 bits */
new_size = 1 << (pw2 + 1);
/* Let's make it at least 1024B, to avoid reallocating quickly at startup */
if (new_size < 1024)
new_size = 1024;
if (new_size > osmo_fd_lookup.size) {
uint8_t *ptr = talloc_realloc_size(OTC_GLOBAL, osmo_fd_lookup.table, new_size);
OSMO_ASSERT(ptr);
memset(ptr + osmo_fd_lookup.size, 0, new_size - osmo_fd_lookup.size);
osmo_fd_lookup.table = (struct osmo_fd **)ptr;
osmo_fd_lookup.size = new_size;
}
}
#ifndef FORCE_IO_SELECT
struct poll_state {
/* array of pollfd */
struct pollfd *poll;
/* number of entries in pollfd allocated */
unsigned int poll_size;
/* number of osmo_fd registered */
unsigned int num_registered;
};
static __thread struct poll_state g_poll;
#endif /* FORCE_IO_SELECT */
/*! See osmo_select_shutdown_request() */
static int _osmo_select_shutdown_requested = 0;
/*! See osmo_select_shutdown_request() */
static bool _osmo_select_shutdown_done = false;
/*! Set up an osmo-fd. Will not register it.
* \param[inout] ofd Osmo FD to be set-up
* \param[in] fd OS-level file descriptor number
* \param[in] when bit-mask of OSMO_FD_{READ,WRITE,EXECEPT}
* \param[in] cb Call-back function to be called
* \param[in] data Private context pointer
* \param[in] priv_nr Private number
*/
void osmo_fd_setup(struct osmo_fd *ofd, int fd, unsigned int when,
int (*cb)(struct osmo_fd *fd, unsigned int what),
void *data, unsigned int priv_nr)
{
ofd->fd = fd;
ofd->when = when;
ofd->cb = cb;
ofd->data = data;
ofd->priv_nr = priv_nr;
}
/*! Update the 'when' field of osmo_fd. "ofd->when = (ofd->when & when_mask) | when".
* Use this function instead of directly modifying ofd->when, as the latter will be
* removed soon. */
void osmo_fd_update_when(struct osmo_fd *ofd, unsigned int when_mask, unsigned int when)
{
ofd->when &= when_mask;
ofd->when |= when;
}
/*! Check if a file descriptor is already registered
* \param[in] fd osmocom file descriptor to be checked
* \returns true if registered; otherwise false
*/
bool osmo_fd_is_registered(struct osmo_fd *fd)
{
struct osmo_fd *entry;
llist_for_each_entry(entry, &osmo_fds, list) {
if (entry == fd) {
return true;
}
}
return false;
}
/*! Register a new file descriptor with select loop abstraction
* \param[in] fd osmocom file descriptor to be registered
* \returns 0 on success; negative in case of error
*
* The API expects fd field of the struct osmo_fd to remain unchanged while
* registered, ie until osmo_fd_unregister() is called on it.
*/
int osmo_fd_register(struct osmo_fd *fd)
{
int flags;
/* make FD nonblocking */
flags = fcntl(fd->fd, F_GETFL);
if (flags < 0)
return flags;
flags |= O_NONBLOCK;
flags = fcntl(fd->fd, F_SETFL, flags);
if (flags < 0)
return flags;
/* set close-on-exec flag */
flags = fcntl(fd->fd, F_GETFD);
if (flags < 0)
return flags;
flags |= FD_CLOEXEC;
flags = fcntl(fd->fd, F_SETFD, flags);
if (flags < 0)
return flags;
/* Register FD */
if (fd->fd > maxfd) {
maxfd = fd->fd;
osmo_fd_lookup_table_extend(maxfd);
}
#ifdef OSMO_FD_CHECK
if (osmo_fd_is_registered(fd)) {
fprintf(stderr, "Adding a osmo_fd that is already in the list.\n");
return 0;
}
#endif
#ifndef FORCE_IO_SELECT
if (g_poll.num_registered + 1 > g_poll.poll_size) {
struct pollfd *p;
unsigned int new_size = g_poll.poll_size ? g_poll.poll_size * 2 : 1024;
p = talloc_realloc(OTC_GLOBAL, g_poll.poll, struct pollfd, new_size);
if (!p)
return -ENOMEM;
memset(p + g_poll.poll_size, 0, new_size - g_poll.poll_size);
g_poll.poll = p;
g_poll.poll_size = new_size;
}
g_poll.num_registered++;
#endif /* FORCE_IO_SELECT */
llist_add_tail(&fd->list, &osmo_fds);
osmo_fd_lookup.table[fd->fd] = fd;
return 0;
}
/*! Unregister a file descriptor from select loop abstraction
* \param[in] fd osmocom file descriptor to be unregistered
*
* Caller is responsible for ensuring the fd is really registered before calling this API.
* This function must be called before changing the value of the fd field in
* the struct osmo_fd.
*/
void osmo_fd_unregister(struct osmo_fd *fd)
{
/* Note: when fd is inside the osmo_fds list (not registered before)
* this function will crash! If in doubt, check file descriptor with
* osmo_fd_is_registered() */
unregistered_count++;
llist_del(&fd->list);
#ifndef FORCE_IO_SELECT
g_poll.num_registered--;
#endif /* FORCE_IO_SELECT */
if (OSMO_UNLIKELY(fd->fd < 0 || fd->fd > maxfd)) {
/* Some old users used to incorrectly set fd = -1 *before* calling osmo_unregister().
* Hence, in order to keep backward compatibility it's not possible to assert() here.
* Instead, print an error message since this is actually a bug in the API user. */
#ifdef OSMO_FD_CHECK
osmo_panic("osmo_fd_unregister(fd=%u) out of expected range (0..%u), fix your code!!!\n",
fd->fd, maxfd);
#else
fprintf(stderr, "osmo_fd_unregister(fd=%u) out of expected range (0..%u), fix your code!!!\n",
fd->fd, maxfd);
return;
#endif
}
osmo_fd_lookup.table[fd->fd] = NULL;
/* If existent, free any statistical data */
osmo_stats_tcp_osmo_fd_unregister(fd);
}
/*! Close a file descriptor, mark it as closed + unregister from select loop abstraction
* \param[in] fd osmocom file descriptor to be unregistered + closed
*
* If \a fd is registered, we unregister it from the select() loop
* abstraction. We then close the fd and set it to -1, as well as
* unsetting any 'when' flags */
void osmo_fd_close(struct osmo_fd *fd)
{
if (osmo_fd_is_registered(fd))
osmo_fd_unregister(fd);
if (fd->fd != -1)
close(fd->fd);
fd->fd = -1;
fd->when = 0;
}
/*! Populate the fd_sets and return the highest fd number
* \param[in] _rset The readfds to populate
* \param[in] _wset The wrtiefds to populate
* \param[in] _eset The errorfds to populate
*
* \returns The highest file descriptor seen or 0 on an empty list
*/
inline int osmo_fd_fill_fds(void *_rset, void *_wset, void *_eset)
{
fd_set *readset = _rset, *writeset = _wset, *exceptset = _eset;
struct osmo_fd *ufd;
int highfd = 0;
llist_for_each_entry(ufd, &osmo_fds, list) {
if (ufd->when & OSMO_FD_READ)
FD_SET(ufd->fd, readset);
if (ufd->when & OSMO_FD_WRITE)
FD_SET(ufd->fd, writeset);
if (ufd->when & OSMO_FD_EXCEPT)
FD_SET(ufd->fd, exceptset);
if (ufd->fd > highfd)
highfd = ufd->fd;
}
return highfd;
}
inline int osmo_fd_disp_fds(void *_rset, void *_wset, void *_eset)
{
struct osmo_fd *ufd, *tmp;
int work = 0;
fd_set *readset = _rset, *writeset = _wset, *exceptset = _eset;
restart:
unregistered_count = 0;
llist_for_each_entry_safe(ufd, tmp, &osmo_fds, list) {
int flags = 0;
if (FD_ISSET(ufd->fd, readset)) {
flags |= OSMO_FD_READ;
FD_CLR(ufd->fd, readset);
}
if (FD_ISSET(ufd->fd, writeset)) {
flags |= OSMO_FD_WRITE;
FD_CLR(ufd->fd, writeset);
}
if (FD_ISSET(ufd->fd, exceptset)) {
flags |= OSMO_FD_EXCEPT;
FD_CLR(ufd->fd, exceptset);
}
if (flags) {
work = 1;
/* make sure to clear any log context before processing the next incoming message
* as part of some file descriptor callback. This effectively prevents "context
* leaking" from processing of one message into processing of the next message as part
* of one iteration through the list of file descriptors here. See OS#3813 */
log_reset_context();
ufd->cb(ufd, flags);
}
/* ugly, ugly hack. If more than one filedescriptor was
* unregistered, they might have been consecutive and
* llist_for_each_entry_safe() is no longer safe */
/* this seems to happen with the last element of the list as well */
if (unregistered_count >= 1)
goto restart;
}
return work;
}
#ifndef FORCE_IO_SELECT
/* fill g_poll.poll and return the number of entries filled */
static unsigned int poll_fill_fds(void)
{
struct osmo_fd *ufd;
unsigned int i = 0;
llist_for_each_entry(ufd, &osmo_fds, list) {
struct pollfd *p;
if (!ufd->when)
continue;
p = &g_poll.poll[i++];
p->fd = ufd->fd;
p->events = 0;
p->revents = 0;
/* use the same mapping as the Linux kernel does in fs/select.c */
if (ufd->when & OSMO_FD_READ)
p->events |= POLLIN | POLLHUP | POLLERR;
if (ufd->when & OSMO_FD_WRITE)
p->events |= POLLOUT | POLLERR;
if (ufd->when & OSMO_FD_EXCEPT)
p->events |= POLLPRI;
}
return i;
}
/* iterate over first n_fd entries of g_poll.poll + dispatch */
static int poll_disp_fds(unsigned int n_fd)
{
struct osmo_fd *ufd;
unsigned int i;
int work = 0;
int shutdown_pending_writes = 0;
for (i = 0; i < n_fd; i++) {
struct pollfd *p = &g_poll.poll[i];
int flags = 0;
if (!p->revents)
continue;
ufd = osmo_fd_get_by_fd(p->fd);
if (!ufd) {
/* FD might have been unregistered meanwhile */
continue;
}
/* use the same mapping as the Linux kernel does in fs/select.c */
if (p->revents & (POLLIN | POLLHUP | POLLERR))
flags |= OSMO_FD_READ;
if (p->revents & (POLLOUT | POLLERR))
flags |= OSMO_FD_WRITE;
if (p->revents & POLLPRI)
flags |= OSMO_FD_EXCEPT;
/* make sure we never report more than the user requested */
flags &= ufd->when;
if (_osmo_select_shutdown_requested > 0) {
if (ufd->when & OSMO_FD_WRITE)
shutdown_pending_writes++;
}
if (flags) {
work = 1;
/* make sure to clear any log context before processing the next incoming message
* as part of some file descriptor callback. This effectively prevents "context
* leaking" from processing of one message into processing of the next message as part
* of one iteration through the list of file descriptors here. See OS#3813 */
log_reset_context();
ufd->cb(ufd, flags);
}
}
if (_osmo_select_shutdown_requested > 0 && !shutdown_pending_writes)
_osmo_select_shutdown_done = true;
return work;
}
static int _osmo_select_main(int polling)
{
unsigned int n_poll;
int rc;
int timeout = 0;
/* prepare read and write fdsets */
n_poll = poll_fill_fds();
if (!polling) {
osmo_timers_prepare();
timeout = osmo_timers_nearest_ms();
if (_osmo_select_shutdown_requested && timeout == -1)
timeout = 0;
}
rc = poll(g_poll.poll, n_poll, timeout);
if (rc < 0)
return 0;
/* fire timers */
if (!_osmo_select_shutdown_requested)
osmo_timers_update();
OSMO_ASSERT(osmo_ctx->select);
/* call registered callback functions */
return poll_disp_fds(n_poll);
}
#else /* FORCE_IO_SELECT */
/* the old implementation based on select, used 2008-2020 */
static int _osmo_select_main(int polling)
{
fd_set readset, writeset, exceptset;
int rc;
struct timeval no_time = {0, 0};
FD_ZERO(&readset);
FD_ZERO(&writeset);
FD_ZERO(&exceptset);
/* prepare read and write fdsets */
osmo_fd_fill_fds(&readset, &writeset, &exceptset);
if (!polling)
osmo_timers_prepare();
rc = select(maxfd+1, &readset, &writeset, &exceptset, polling ? &no_time : osmo_timers_nearest());
if (rc < 0)
return 0;
/* fire timers */
osmo_timers_update();
OSMO_ASSERT(osmo_ctx->select);
/* call registered callback functions */
return osmo_fd_disp_fds(&readset, &writeset, &exceptset);
}
#endif /* FORCE_IO_SELECT */
/*! select main loop integration
* \param[in] polling should we pollonly (1) or block on select (0)
* \returns 0 if no fd handled; 1 if fd handled; negative in case of error
*/
int osmo_select_main(int polling)
{
int rc = _osmo_select_main(polling);
#ifndef EMBEDDED
if (talloc_total_size(osmo_ctx->select) != 0) {
osmo_panic("You cannot use the 'select' volatile "
"context if you don't use osmo_select_main_ctx()!\n");
}
#endif
return rc;
}
#ifndef EMBEDDED
/*! select main loop integration with temporary select-dispatch talloc context
* \param[in] polling should we pollonly (1) or block on select (0)
* \returns 0 if no fd handled; 1 if fd handled; negative in case of error
*/
int osmo_select_main_ctx(int polling)
{
int rc = _osmo_select_main(polling);
/* free all the children of the volatile 'select' scope context */
talloc_free_children(osmo_ctx->select);
return rc;
}
#endif
/*! find an osmo_fd based on the integer fd
* \param[in] fd file descriptor to use as search key
* \returns \ref osmo_fd for \ref fd; NULL in case it doesn't exist */
struct osmo_fd *osmo_fd_get_by_fd(int fd)
{
if (fd > maxfd || fd < 0)
return NULL;
return osmo_fd_lookup.table[fd];
}
/*! initialize the osmocom select abstraction for the current thread */
void osmo_select_init(void)
{
INIT_LLIST_HEAD(&osmo_fds);
osmo_fd_lookup_table_extend(0);
}
/* ensure main thread always has pre-initialized osmo_fds
* priority 102: must run after on_dso_load_ctx */
static __attribute__((constructor(102))) void on_dso_load_select(void)
{
osmo_select_init();
}
#ifdef HAVE_SYS_TIMERFD_H
#include <sys/timerfd.h>
/*! disable the osmocom-wrapped timerfd */
int osmo_timerfd_disable(struct osmo_fd *ofd)
{
const struct itimerspec its_null = {
.it_value = { 0, 0 },
.it_interval = { 0, 0 },
};
return timerfd_settime(ofd->fd, 0, &its_null, NULL);
}
/*! schedule the osmocom-wrapped timerfd to occur first at \a first, then periodically at \a interval
* \param[in] ofd Osmocom wrapped timerfd
* \param[in] first Relative time at which the timer should first execute (NULL = \a interval)
* \param[in] interval Time interval at which subsequent timer shall fire
* \returns 0 on success; negative on error */
int osmo_timerfd_schedule(struct osmo_fd *ofd, const struct timespec *first,
const struct timespec *interval)
{
struct itimerspec its;
if (ofd->fd < 0)
return -EINVAL;
/* first expiration */
if (first)
its.it_value = *first;
else
its.it_value = *interval;
/* repeating interval */
its.it_interval = *interval;
return timerfd_settime(ofd->fd, 0, &its, NULL);
}
/*! setup osmocom-wrapped timerfd
* \param[inout] ofd Osmocom-wrapped timerfd on which to operate
* \param[in] cb Call-back function called when timerfd becomes readable
* \param[in] data Opaque data to be passed on to call-back
* \returns 0 on success; negative on error
*
* We simply initialize the data structures here, but do not yet
* schedule the timer.
*/
int osmo_timerfd_setup(struct osmo_fd *ofd, int (*cb)(struct osmo_fd *, unsigned int), void *data)
{
ofd->cb = cb;
ofd->data = data;
ofd->when = OSMO_FD_READ;
if (ofd->fd < 0) {
int rc;
ofd->fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK);
if (ofd->fd < 0)
return ofd->fd;
rc = osmo_fd_register(ofd);
if (rc < 0) {
osmo_fd_unregister(ofd);
close(ofd->fd);
ofd->fd = -1;
return rc;
}
}
return 0;
}
#endif /* HAVE_SYS_TIMERFD_H */
#ifdef HAVE_SYS_SIGNALFD_H
#include <sys/signalfd.h>
static int signalfd_callback(struct osmo_fd *ofd, unsigned int what)
{
struct osmo_signalfd *osfd = ofd->data;
struct signalfd_siginfo fdsi;
int rc;
rc = read(ofd->fd, &fdsi, sizeof(fdsi));
if (rc < 0) {
osmo_fd_unregister(ofd);
close(ofd->fd);
ofd->fd = -1;
return rc;
}
osfd->cb(osfd, &fdsi);
return 0;
};
/*! create a signalfd and register it with osmocom select loop.
* \param[in] ctx talloc context from which osmo_signalfd is to be allocated
* \param[in] set of signals to be accept via this file descriptor
* \param[in] cb call-back function to be called for each arriving signal
* \param[in] data opaque user-provided data to pass to callback
* \returns pointer to newly-allocated + registered osmo_signalfd; NULL on error */
struct osmo_signalfd *
osmo_signalfd_setup(void *ctx, sigset_t set, osmo_signalfd_cb *cb, void *data)
{
struct osmo_signalfd *osfd = talloc_size(ctx, sizeof(*osfd));
int fd, rc;
if (!osfd)
return NULL;
osfd->data = data;
osfd->sigset = set;
osfd->cb = cb;
fd = signalfd(-1, &osfd->sigset, SFD_NONBLOCK);
if (fd < 0) {
talloc_free(osfd);
return NULL;
}
osmo_fd_setup(&osfd->ofd, fd, OSMO_FD_READ, signalfd_callback, osfd, 0);
rc = osmo_fd_register(&osfd->ofd);
if (rc < 0) {
close(fd);
talloc_free(osfd);
return NULL;
}
return osfd;
}
#endif /* HAVE_SYS_SIGNALFD_H */
/*! Request osmo_select_* to only service pending OSMO_FD_WRITE requests. Once all writes are done,
* osmo_select_shutdown_done() returns true. This allows for example to send all outbound packets before terminating the
* process.
*
* Usage example:
*
* static void signal_handler(int signum)
* {
* fprintf(stdout, "signal %u received\n", signum);
*
* switch (signum) {
* case SIGINT:
* case SIGTERM:
* // If the user hits Ctrl-C the third time, just terminate immediately.
* if (osmo_select_shutdown_requested() >= 2)
* exit(-1);
* // Request write-only mode in osmo_select_main_ctx()
* osmo_select_shutdown_request();
* break;
* [...]
* }
*
* main()
* {
* signal(SIGINT, &signal_handler);
* signal(SIGTERM, &signal_handler);
*
* [...]
*
* // After the signal_handler issued osmo_select_shutdown_request(), osmo_select_shutdown_done() returns true
* // as soon as all write queues are empty.
* while (!osmo_select_shutdown_done()) {
* osmo_select_main_ctx(0);
* }
* }
*/
void osmo_select_shutdown_request(void)
{
_osmo_select_shutdown_requested++;
};
/*! Return the number of times osmo_select_shutdown_request() was called before. */
int osmo_select_shutdown_requested(void)
{
return _osmo_select_shutdown_requested;
};
/*! Return true after osmo_select_shutdown_requested() was called, and after an osmo_select poll loop found no more
* pending OSMO_FD_WRITE on any registered socket. */
bool osmo_select_shutdown_done(void) {
return _osmo_select_shutdown_done;
};
/*! @} */
#endif /* _HAVE_SYS_SELECT_H */