select: Migrate over to poll()

select is an ancient interface with weird restrictions, such as
the fact that it cannot be used for file descriptor values > 1024.

This may have been sufficient 40 years ago, but certainly is not in
2020.  I wanted to migrate to epoll(), but unfortunately it doesn't
work well with the fact that existing programs simply set osmo_fd.flags
without making any API calls at the time they change those flags.

So let's do the migration to poll() as a first step, and then consider
epoll() as a second step further down the road, after introducing new
APIs and porting applications over.

The poll() code introduced in this patch is not extremely efficient,
as it needs to do extensive linked list iterations after poll() returns
in order to find the osmo_fd from the fd.  Optimization is possible,
but let's postpone that to a follow-up patch.

At compile time, a new --enable-force-io-select argument can be given
to configure, forcing the use of the old select() backend instead of the
new poll() based backend.

Change-Id: I9e80da68a144b36926066610d0d3df06abe09bca
This commit is contained in:
Harald Welte 2020-10-18 21:24:13 +02:00 committed by laforge
parent 6760845da6
commit b904e428aa
4 changed files with 164 additions and 3 deletions

View File

@ -62,7 +62,7 @@ AC_SUBST(LTLDFLAGS_OSMOCTRL)
dnl checks for header files
AC_HEADER_STDC
AC_CHECK_HEADERS(execinfo.h sys/select.h sys/socket.h sys/signalfd.h sys/timerfd.h syslog.h ctype.h netinet/tcp.h netinet/in.h)
AC_CHECK_HEADERS(execinfo.h poll.h sys/select.h sys/socket.h sys/signalfd.h sys/timerfd.h syslog.h ctype.h netinet/tcp.h netinet/in.h)
# for src/conv.c
AC_FUNC_ALLOCA
AC_SEARCH_LIBS([dlopen], [dl dld], [LIBRARY_DLOPEN="$LIBS";LIBS=""])
@ -252,6 +252,16 @@ then
AC_DEFINE([OSMO_FD_CHECK],[1],[Instrument the osmo_fd_register])
fi
AC_ARG_ENABLE([force_io_select],
[AS_HELP_STRING(
[--enable-force-io-select],
[Build with old select I/O instead of poll]
)],
[force_io_select=$enableval], [force_io_select="no"])
AS_IF([test "x$force_io_select" = "xyes"], [
AC_DEFINE([FORCE_IO_SELECT], [1], [Force the use of select() instaed of poll()])
])
AC_ARG_ENABLE(msgfile,
[AS_HELP_STRING(
[--disable-msgfile],

View File

@ -84,6 +84,7 @@ int osmo_timer_remaining(const struct osmo_timer_list *timer,
* internal timer list management
*/
struct timeval *osmo_timers_nearest(void);
int osmo_timers_nearest_ms(void);
void osmo_timers_prepare(void);
int osmo_timers_update(void);
int osmo_timers_check(void);

View File

@ -4,7 +4,7 @@
* userspace logging daemon for the iptables ULOG target
* of the linux 2.4 netfilter subsystem. */
/*
* (C) 2000-2009 by Harald Welte <laforge@gnumonks.org>
* (C) 2000-2020 by Harald Welte <laforge@gnumonks.org>
* All Rights Reserverd.
*
* SPDX-License-Identifier: GPL-2.0+
@ -41,8 +41,9 @@
#include "../config.h"
#ifdef HAVE_SYS_SELECT_H
#if defined(HAVE_SYS_SELECT_H) && defined(HAVE_POLL_H)
#include <sys/select.h>
#include <poll.h>
/*! \addtogroup select
* @{
@ -56,6 +57,18 @@ static __thread int maxfd = 0;
static __thread struct llist_head osmo_fds; /* TLS cannot use LLIST_HEAD() */
static __thread int unregistered_count;
#ifndef FORCE_IO_SELECT
struct poll_state {
/* array of pollfd */
struct pollfd *poll;
/* number of entries in pollfd allocated */
unsigned int poll_size;
/* number of osmo_fd registered */
unsigned int num_registered;
};
static __thread struct poll_state g_poll;
#endif /* FORCE_IO_SELECT */
/*! Set up an osmo-fd. Will not register it.
* \param[inout] ofd Osmo FD to be set-up
* \param[in] fd OS-level file descriptor number
@ -136,6 +149,19 @@ int osmo_fd_register(struct osmo_fd *fd)
return 0;
}
#endif
#ifndef FORCE_IO_SELECT
if (g_poll.num_registered + 1 > g_poll.poll_size) {
struct pollfd *p;
unsigned int new_size = g_poll.poll_size ? g_poll.poll_size * 2 : 1024;
p = talloc_realloc(OTC_GLOBAL, g_poll.poll, struct pollfd, new_size);
if (!p)
return -ENOMEM;
memset(p + g_poll.poll_size, 0, new_size - g_poll.poll_size);
g_poll.poll = p;
g_poll.poll_size = new_size;
}
g_poll.num_registered++;
#endif /* FORCE_IO_SELECT */
llist_add_tail(&fd->list, &osmo_fds);
@ -152,6 +178,9 @@ void osmo_fd_unregister(struct osmo_fd *fd)
* osmo_fd_is_registered() */
unregistered_count++;
llist_del(&fd->list);
#ifndef FORCE_IO_SELECT
g_poll.num_registered--;
#endif /* FORCE_IO_SELECT */
}
/*! Close a file descriptor, mark it as closed + unregister from select loop abstraction
@ -246,6 +275,110 @@ restart:
return work;
}
#ifndef FORCE_IO_SELECT
/* fill g_poll.poll and return the number of entries filled */
static unsigned int poll_fill_fds(void)
{
struct osmo_fd *ufd;
unsigned int i = 0;
llist_for_each_entry(ufd, &osmo_fds, list) {
struct pollfd *p;
if (!ufd->when)
continue;
p = &g_poll.poll[i++];
p->fd = ufd->fd;
p->events = 0;
p->revents = 0;
/* use the same mapping as the Linux kernel does in fs/select.c */
if (ufd->when & OSMO_FD_READ)
p->events |= POLLIN | POLLHUP | POLLERR;
if (ufd->when & OSMO_FD_WRITE)
p->events |= POLLOUT | POLLERR;
if (ufd->when & OSMO_FD_EXCEPT)
p->events |= POLLPRI;
}
return i;
}
/* iterate over first n_fd entries of g_poll.poll + dispatch */
static int poll_disp_fds(int n_fd)
{
struct osmo_fd *ufd;
unsigned int i;
int work = 0;
for (i = 0; i < n_fd; i++) {
struct pollfd *p = &g_poll.poll[i];
int flags = 0;
if (!p->revents)
continue;
ufd = osmo_fd_get_by_fd(p->fd);
if (!ufd) {
/* FD might have been unregistered meanwhile */
continue;
}
/* use the same mapping as the Linux kernel does in fs/select.c */
if (p->revents & (POLLIN | POLLHUP | POLLERR))
flags |= OSMO_FD_READ;
if (p->revents & (POLLOUT | POLLERR))
flags |= OSMO_FD_WRITE;
if (p->revents & POLLPRI)
flags |= OSMO_FD_EXCEPT;
/* make sure we never report more than the user requested */
flags &= ufd->when;
if (flags) {
work = 1;
/* make sure to clear any log context before processing the next incoming message
* as part of some file descriptor callback. This effectively prevents "context
* leaking" from processing of one message into processing of the next message as part
* of one iteration through the list of file descriptors here. See OS#3813 */
log_reset_context();
ufd->cb(ufd, flags);
}
}
return work;
}
static int _osmo_select_main(int polling)
{
unsigned int n_poll;
int rc;
/* prepare read and write fdsets */
n_poll = poll_fill_fds();
if (!polling)
osmo_timers_prepare();
rc = poll(g_poll.poll, n_poll, polling ? 0 : osmo_timers_nearest_ms());
if (rc < 0)
return 0;
/* fire timers */
osmo_timers_update();
OSMO_ASSERT(osmo_ctx->select);
/* call registered callback functions */
return poll_disp_fds(n_poll);
}
#else /* FORCE_IO_SELECT */
/* the old implementation based on select, used 2008-2020 */
static int _osmo_select_main(int polling)
{
fd_set readset, writeset, exceptset;
@ -273,6 +406,7 @@ static int _osmo_select_main(int polling)
/* call registered callback functions */
return osmo_fd_disp_fds(&readset, &writeset, &exceptset);
}
#endif /* FORCE_IO_SELECT */
/*! select main loop integration
* \param[in] polling should we pollonly (1) or block on select (0)

View File

@ -184,6 +184,22 @@ struct timeval *osmo_timers_nearest(void)
return nearest_p;
}
/*! Determine time between now and the nearest timer in milliseconds
* \returns number of milliseconds until nearest timer expires; -1 if no timers pending
*/
int osmo_timers_nearest_ms(void)
{
int nearest_ms;
if (!nearest_p)
return -1;
nearest_ms = nearest_p->tv_sec * 1000;
nearest_ms += nearest_p->tv_usec / 1000;
return nearest_ms;
}
static void update_nearest(struct timeval *cand, struct timeval *current)
{
if (cand->tv_sec != LONG_MAX) {