From b1ef8f5f691db8d245428568e4934795943f39fd Mon Sep 17 00:00:00 2001 From: Philipp Maier Date: Mon, 6 Dec 2021 16:31:02 +0100 Subject: [PATCH] select: gather statistics for TCP connections osmocom applications are deployed in a variety of different situations. Dependung on the medium that interconnects the network components unexpected behaviour may occur. To debug problems with the interconnection between network components it might help to monitor the health of the related TCP connections. Change-Id: I1416f95aff2adcf13689646b7574845de169fa3d Related: SYS#5701 --- configure.ac | 22 +++ include/Makefile.am | 1 + include/osmocom/core/stats_tcp.h | 16 ++ src/Makefile.am | 1 + src/select.c | 5 + src/stats.c | 5 + src/stats_tcp.c | 322 +++++++++++++++++++++++++++++++ src/vty/stats_vty.c | 33 ++++ tests/stats/stats_vty_test.vty | 1 + 9 files changed, 406 insertions(+) create mode 100644 include/osmocom/core/stats_tcp.h create mode 100644 src/stats_tcp.c diff --git a/configure.ac b/configure.ac index a69f793f9..8fb299b9e 100644 --- a/configure.ac +++ b/configure.ac @@ -516,6 +516,28 @@ dnl Check if the compiler supports runtime SIMD detection CHECK_BUILTIN_SUPPORT([__builtin_cpu_supports], [Runtime SIMD detection will be disabled]) +dnl There are some members in struct tcp_info that might not exist on all linux versions +AC_CHECK_MEMBER([struct tcp_info.tcpi_rwnd_limited], + AC_DEFINE([HAVE_TCP_INFO_TCPI_RWND_LIMITED], + [1], + [Define to 1 if your header file have the tcpi_rwnd_limited member in struct tcp_info]), + [], + [#include ]) + +AC_CHECK_MEMBER([struct tcp_info.tcpi_sndbuf_limited], + AC_DEFINE([HAVE_TCP_INFO_TCPI_SNDBUF_LIMITED], + [1], + [Define to 1 if your header file have the tcpi_sndbuf_limited member in struct tcp_info]), + [], + [#include ]) + +AC_CHECK_MEMBER([struct tcp_info.tcpi_reord_seen], + AC_DEFINE([HAVE_TCP_INFO_TCPI_REORD_SEEN], + [1], + [Define to 1 if your header file have the tcpi_reord_seen member in struct tcp_info]), + [], + [#include ]) + AC_MSG_RESULT([CFLAGS="$CFLAGS"]) AC_MSG_RESULT([CPPFLAGS="$CPPFLAGS"]) diff --git a/include/Makefile.am b/include/Makefile.am index eab04894c..a2340147d 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -46,6 +46,7 @@ nobase_include_HEADERS = \ osmocom/core/process.h \ osmocom/core/rate_ctr.h \ osmocom/core/stat_item.h \ + osmocom/core/stats_tcp.h \ osmocom/core/select.h \ osmocom/core/sercomm.h \ osmocom/core/signal.h \ diff --git a/include/osmocom/core/stats_tcp.h b/include/osmocom/core/stats_tcp.h new file mode 100644 index 000000000..6b9657a87 --- /dev/null +++ b/include/osmocom/core/stats_tcp.h @@ -0,0 +1,16 @@ +#pragma once + +#define TCP_STATS_DEFAULT_INTERVAL 0 /* secs */ +#define TCP_STATS_DEFAULT_BATCH_SIZE 1 /* sockets per interval */ + +struct osmo_tcp_stats_config { + /* poll interval in seconds, use osmo_stats_tcp_set_interval() to manipulate this value */ + int interval; + /* specify how many sockets are processed when the interval timer expires */ + int batch_size; +}; +extern struct osmo_tcp_stats_config *osmo_tcp_stats_config; + +int osmo_stats_tcp_osmo_fd_register(const struct osmo_fd *fd, const char *name); +int osmo_stats_tcp_osmo_fd_unregister(const struct osmo_fd *fd); +int osmo_stats_tcp_set_interval(int interval); diff --git a/src/Makefile.am b/src/Makefile.am index 4bda4564e..5777c31a9 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -22,6 +22,7 @@ libosmocore_la_SOURCES = context.c timer.c timer_gettimeofday.c timer_clockgetti conv.c application.c rbtree.c strrb.c \ loggingrb.c crc8gen.c crc16gen.c crc32gen.c crc64gen.c \ macaddr.c stat_item.c stats.c stats_statsd.c prim.c \ + stats_tcp.c \ conv_acc.c conv_acc_generic.c sercomm.c prbs.c \ isdnhdlc.c \ tdef.c \ diff --git a/src/select.c b/src/select.c index 590c3db64..b48d13e76 100644 --- a/src/select.c +++ b/src/select.c @@ -33,6 +33,8 @@ #include #include #include +#include +#include #include "../config.h" @@ -181,6 +183,9 @@ void osmo_fd_unregister(struct osmo_fd *fd) #ifndef FORCE_IO_SELECT g_poll.num_registered--; #endif /* FORCE_IO_SELECT */ + + /* If existent, free any statistical data */ + osmo_stats_tcp_osmo_fd_unregister(fd); } /*! Close a file descriptor, mark it as closed + unregister from select loop abstraction diff --git a/src/stats.c b/src/stats.c index f8c91fd7e..4267284b4 100644 --- a/src/stats.c +++ b/src/stats.c @@ -85,6 +85,7 @@ #include #include #include +#include #ifdef HAVE_SYSTEMTAP /* include the generated probes header and put markers in code */ @@ -240,6 +241,10 @@ void osmo_stats_init(void *ctx) osmo_stats_ctx = ctx; is_initialised = 1; start_timer(); + + /* Make sure that the tcp-stats interval timer also runs at its + * preconfigured rate. The vty might change this setting later. */ + osmo_stats_tcp_set_interval(osmo_tcp_stats_config->interval); } /*! Find a stats_reporter of given \a type and \a name. diff --git a/src/stats_tcp.c b/src/stats_tcp.c new file mode 100644 index 000000000..fffb10aa9 --- /dev/null +++ b/src/stats_tcp.c @@ -0,0 +1,322 @@ +/* + * (C) 2021 by sysmocom - s.f.m.c. GmbH + * Author: Philipp Maier + * All Rights Reserved + * + * SPDX-License-Identifier: GPL-2.0+ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +/*! \addtogroup stats + * @{ + * \file stats_tcp.c */ + +#include "config.h" +#if !defined(EMBEDDED) + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct osmo_tcp_stats_config s_tcp_stats_config = { + .interval = TCP_STATS_DEFAULT_INTERVAL, +}; + +struct osmo_tcp_stats_config *osmo_tcp_stats_config = &s_tcp_stats_config; + +static struct osmo_timer_list stats_tcp_poll_timer; + +static LLIST_HEAD(stats_tcp); +static struct stats_tcp_entry *stats_tcp_entry_cur; +pthread_mutex_t stats_tcp_lock; + +struct stats_tcp_entry { + struct llist_head entry; + const struct osmo_fd *fd; + struct osmo_stat_item_group *stats_tcp; + const char *name; +}; + +enum { + STATS_TCP_UNACKED, + STATS_TCP_LOST, + STATS_TCP_RETRANS, + STATS_TCP_RTT, + STATS_TCP_RCV_RTT, + STATS_TCP_NOTSENT_BYTES, + STATS_TCP_RWND_LIMITED, + STATS_TCP_SNDBUF_LIMITED, + STATS_TCP_REORD_SEEN, +}; + +static struct osmo_stat_item_desc stats_tcp_item_desc[] = { + [STATS_TCP_UNACKED] = { "tcp:unacked", "unacknowledged packets", "", 60, 0 }, + [STATS_TCP_LOST] = { "tcp:lost", "lost packets", "", 60, 0 }, + [STATS_TCP_RETRANS] = { "tcp:retrans", "retransmitted packets", "", 60, 0 }, + [STATS_TCP_RTT] = { "tcp:rtt", "roundtrip-time", "", 60, 0 }, + [STATS_TCP_RCV_RTT] = { "tcp:rcv_rtt", "roundtrip-time (receive)", "", 60, 0 }, + [STATS_TCP_NOTSENT_BYTES] = { "tcp:notsent_bytes", "bytes not yet sent", "", 60, 0 }, + [STATS_TCP_RWND_LIMITED] = { "tcp:rwnd_limited", "time (usec) limited by receive window", "", 60, 0 }, + [STATS_TCP_SNDBUF_LIMITED] = { "tcp:sndbuf_limited", "Time (usec) limited by send buffer", "", 60, 0 }, + [STATS_TCP_REORD_SEEN] = { "tcp:sndbuf_limited", "reordering events seen", "", 60, 0 }, +}; + +static struct osmo_stat_item_group_desc stats_tcp_desc = { + .group_name_prefix = "tcp", + .group_description = "stats tcp", + .class_id = OSMO_STATS_CLASS_GLOBAL, + .num_items = ARRAY_SIZE(stats_tcp_item_desc), + .item_desc = stats_tcp_item_desc, +}; + +static void fill_stats(struct stats_tcp_entry *stats_tcp_entry) +{ + int rc; + struct tcp_info tcp_info; + socklen_t tcp_info_len = sizeof(tcp_info); + char stat_name[256]; + + /* Do not fill in anything before the socket is connected to a remote end */ + if (osmo_sock_get_ip_and_port(stats_tcp_entry->fd->fd, NULL, 0, NULL, 0, false) != 0) + return; + + /* Gather TCP statistics and update the stats items */ + rc = getsockopt(stats_tcp_entry->fd->fd, IPPROTO_TCP, TCP_INFO, &tcp_info, &tcp_info_len); + if (rc < 0) + return; + + /* Create stats items if they do not exist yet */ + if (!stats_tcp_entry->stats_tcp) { + stats_tcp_entry->stats_tcp = + osmo_stat_item_group_alloc(stats_tcp_entry, &stats_tcp_desc, stats_tcp_entry->fd->fd); + OSMO_ASSERT(stats_tcp_entry->stats_tcp); + } + + /* Update statistics */ + if (stats_tcp_entry->name) + snprintf(stat_name, sizeof(stat_name), "%s,%s", stats_tcp_entry->name, + osmo_sock_get_name2(stats_tcp_entry->fd->fd)); + else + snprintf(stat_name, sizeof(stat_name), "%s", osmo_sock_get_name2(stats_tcp_entry->fd->fd)); + osmo_stat_item_group_set_name(stats_tcp_entry->stats_tcp, stat_name); + + osmo_stat_item_set(osmo_stat_item_group_get_item(stats_tcp_entry->stats_tcp, STATS_TCP_UNACKED), + tcp_info.tcpi_unacked); + osmo_stat_item_set(osmo_stat_item_group_get_item(stats_tcp_entry->stats_tcp, STATS_TCP_LOST), + tcp_info.tcpi_lost); + osmo_stat_item_set(osmo_stat_item_group_get_item(stats_tcp_entry->stats_tcp, STATS_TCP_RETRANS), + tcp_info.tcpi_retrans); + osmo_stat_item_set(osmo_stat_item_group_get_item(stats_tcp_entry->stats_tcp, STATS_TCP_RTT), tcp_info.tcpi_rtt); + osmo_stat_item_set(osmo_stat_item_group_get_item(stats_tcp_entry->stats_tcp, STATS_TCP_RCV_RTT), + tcp_info.tcpi_rcv_rtt); + osmo_stat_item_set(osmo_stat_item_group_get_item(stats_tcp_entry->stats_tcp, STATS_TCP_NOTSENT_BYTES), + tcp_info.tcpi_notsent_bytes); + +#if HAVE_TCP_INFO_TCPI_RWND_LIMITED == 1 + osmo_stat_item_set(osmo_stat_item_group_get_item(stats_tcp_entry->stats_tcp, STATS_TCP_RWND_LIMITED), + tcp_info.tcpi_rwnd_limited); +#else + osmo_stat_item_set(osmo_stat_item_group_get_item(stats_tcp_entry->stats_tcp, STATS_TCP_RWND_LIMITED), -1); +#endif + +#if STATS_TCP_SNDBUF_LIMITED == 1 + osmo_stat_item_set(osmo_stat_item_group_get_item(stats_tcp_entry->stats_tcp, STATS_TCP_REORD_SEEN), + tcp_info.tcpi_sndbuf_limited); +#else + osmo_stat_item_set(osmo_stat_item_group_get_item(stats_tcp_entry->stats_tcp, STATS_TCP_REORD_SEEN), -1); +#endif + +#if HAVE_TCP_INFO_TCPI_REORD_SEEN == 1 + osmo_stat_item_set(osmo_stat_item_group_get_item(stats_tcp_entry->stats_tcp, STATS_TCP_REORD_SEEN), + tcp_info.tcpi_reord_seen); +#else + osmo_stat_item_set(osmo_stat_item_group_get_item(stats_tcp_entry->stats_tcp, STATS_TCP_REORD_SEEN), -1); +#endif + +} + +static bool is_tcp(const struct osmo_fd *fd) +{ + int rc; + struct stat fd_stat; + int so_protocol = 0; + socklen_t so_protocol_len = sizeof(so_protocol); + + /* Is this a socket? */ + rc = fstat(fd->fd, &fd_stat); + if (rc < 0) + return false; + if (!S_ISSOCK(fd_stat.st_mode)) + return false; + + /* Is it a TCP socket? */ + rc = getsockopt(fd->fd, SOL_SOCKET, SO_PROTOCOL, &so_protocol, &so_protocol_len); + if (rc < 0) + return false; + if (so_protocol == IPPROTO_TCP) + return true; + + return false; +} + +/*! Register an osmo_fd for TCP stats monitoring. + * \param[in] fd osmocom file descriptor to be registered. + * \param[in] human readbla name that is used as prefix for the related stats item. + * \returns 0 on success; negative in case of error. */ +int osmo_stats_tcp_osmo_fd_register(const struct osmo_fd *fd, const char *name) +{ + struct stats_tcp_entry *stats_tcp_entry; + + /* Only TCP sockets can be registered for monitoring, anything else will fall through. */ + if (!is_tcp(fd)) + return -EINVAL; + + /* When the osmo_fd is registered and unregistered properly there shouldn't be any leftovers from already closed + * osmo_fds in the stats_tcp list. But lets proactively make sure that any leftovers are cleaned up. */ + osmo_stats_tcp_osmo_fd_unregister(fd); + + /* Make a new list object, attach the osmo_fd... */ + stats_tcp_entry = talloc_zero(OTC_GLOBAL, struct stats_tcp_entry); + OSMO_ASSERT(stats_tcp_entry); + stats_tcp_entry->fd = fd; + stats_tcp_entry->name = talloc_strdup(stats_tcp_entry, name); + + pthread_mutex_lock(&stats_tcp_lock); + llist_add_tail(&stats_tcp_entry->entry, &stats_tcp); + pthread_mutex_unlock(&stats_tcp_lock); + + return 0; +} + +static void next_stats_tcp_entry(void) +{ + struct stats_tcp_entry *last; + + if (llist_empty(&stats_tcp)) { + stats_tcp_entry_cur = NULL; + return; + } + + last = (struct stats_tcp_entry *)llist_last_entry(&stats_tcp, struct stats_tcp_entry, entry); + + if (!stats_tcp_entry_cur || stats_tcp_entry_cur == last) + stats_tcp_entry_cur = + (struct stats_tcp_entry *)llist_first_entry(&stats_tcp, struct stats_tcp_entry, entry); + else + stats_tcp_entry_cur = + (struct stats_tcp_entry *)llist_entry(stats_tcp_entry_cur->entry.next, struct stats_tcp_entry, + entry); +} + +/*! Register an osmo_fd for TCP stats monitoring. + * \param[in] fd osmocom file descriptor to be unregistered. + * \returns 0 on success; negative in case of error. */ +int osmo_stats_tcp_osmo_fd_unregister(const struct osmo_fd *fd) +{ + struct stats_tcp_entry *stats_tcp_entry; + int rc = -EINVAL; + + pthread_mutex_lock(&stats_tcp_lock); + llist_for_each_entry(stats_tcp_entry, &stats_tcp, entry) { + if (fd->fd == stats_tcp_entry->fd->fd) { + /* In case we want to remove exactly that item which is also selected as the current itemy, we + * must designate either a different item or invalidate the current item. */ + if (stats_tcp_entry == stats_tcp_entry_cur) { + if (llist_count(&stats_tcp) > 2) + next_stats_tcp_entry(); + else + stats_tcp_entry_cur = NULL; + } + + /* Date item from list */ + llist_del(&stats_tcp_entry->entry); + osmo_stat_item_group_free(stats_tcp_entry->stats_tcp); + talloc_free(stats_tcp_entry); + rc = 0; + break; + } + } + pthread_mutex_unlock(&stats_tcp_lock); + + return rc; +} + +static void stats_tcp_poll_timer_cb(void *data) +{ + int i; + int batch_size; + int llist_size; + + pthread_mutex_lock(&stats_tcp_lock); + + /* Make sure we do not run over the same sockets multiple times if the + * configured llist_size is larger then the actual list */ + batch_size = osmo_tcp_stats_config->batch_size; + llist_size = llist_count(&stats_tcp); + if (llist_size < batch_size) + batch_size = llist_size; + + /* Process a batch of sockets */ + for (i = 0; i < batch_size; i++) { + next_stats_tcp_entry(); + if (stats_tcp_entry_cur) + fill_stats(stats_tcp_entry_cur); + } + + pthread_mutex_unlock(&stats_tcp_lock); + + if (osmo_tcp_stats_config->interval > 0) + osmo_timer_schedule(&stats_tcp_poll_timer, osmo_tcp_stats_config->interval, 0); +} + +/*! Set the polling interval (common for all sockets) + * \param[in] interval Poll interval in seconds + * \returns 0 on success; negative on error */ +int osmo_stats_tcp_set_interval(int interval) +{ + osmo_tcp_stats_config->interval = interval; + if (osmo_tcp_stats_config->interval > 0) + osmo_timer_schedule(&stats_tcp_poll_timer, osmo_tcp_stats_config->interval, 0); + return 0; +} + +static __attribute__((constructor)) +void on_dso_load_stats_tcp(void) +{ + stats_tcp_entry_cur = NULL; + pthread_mutex_init(&stats_tcp_lock, NULL); + + osmo_tcp_stats_config->interval = TCP_STATS_DEFAULT_INTERVAL; + osmo_tcp_stats_config->batch_size = TCP_STATS_DEFAULT_BATCH_SIZE; + + osmo_timer_setup(&stats_tcp_poll_timer, stats_tcp_poll_timer_cb, NULL); +} + +#endif /* !EMBEDDED */ + +/* @} */ diff --git a/src/vty/stats_vty.c b/src/vty/stats_vty.c index e2247a3b7..a73fafbb3 100644 --- a/src/vty/stats_vty.c +++ b/src/vty/stats_vty.c @@ -33,6 +33,7 @@ #include #include #include +#include #define CFG_STATS_STR "Configure stats sub-system\n" #define CFG_REPORTER_STR "Configure a stats reporter\n" @@ -389,6 +390,32 @@ DEFUN(cfg_stats_interval, cfg_stats_interval_cmd, return CMD_SUCCESS; } +DEFUN(cfg_tcp_stats_interval, cfg_tcp_stats_interval_cmd, + "stats-tcp interval <0-65535>", + CFG_STATS_STR "Set the tcp socket stats polling interval\n" + "Interval in seconds (0 disables the polling interval)\n") +{ + int rc; + int interval = atoi(argv[0]); + rc = osmo_stats_tcp_set_interval(interval); + if (rc < 0) { + vty_out(vty, "%% Unable to set interval: %s%s", + strerror(-rc), VTY_NEWLINE); + return CMD_WARNING; + } + + return CMD_SUCCESS; +} + +DEFUN(cfg_tcp_stats_batch_size, cfg_tcp_stats_batch_size_cmd, + "stats-tcp batch-size <1-65535>", + CFG_STATS_STR "Set the number of tcp sockets that are processed per stats polling interval\n" + "Number of sockets per interval\n") +{ + osmo_tcp_stats_config->batch_size = atoi(argv[0]); + return CMD_SUCCESS; +} + DEFUN(show_stats, show_stats_cmd, "show stats", @@ -677,6 +704,10 @@ static int config_write_stats(struct vty *vty) struct osmo_stats_reporter *srep; vty_out(vty, "stats interval %d%s", osmo_stats_config->interval, VTY_NEWLINE); + if (osmo_tcp_stats_config->interval != TCP_STATS_DEFAULT_INTERVAL) + vty_out(vty, "stats-tcp interval %d%s", osmo_tcp_stats_config->interval, VTY_NEWLINE); + if (osmo_tcp_stats_config->batch_size != TCP_STATS_DEFAULT_BATCH_SIZE) + vty_out(vty, "stats-tcp batch-size %d%s", osmo_tcp_stats_config->batch_size, VTY_NEWLINE); /* Loop through all reporters */ llist_for_each_entry(srep, &osmo_stats_reporter_list, list) @@ -699,6 +730,8 @@ void osmo_stats_vty_add_cmds() install_lib_element(CONFIG_NODE, &cfg_stats_reporter_log_cmd); install_lib_element(CONFIG_NODE, &cfg_no_stats_reporter_log_cmd); install_lib_element(CONFIG_NODE, &cfg_stats_interval_cmd); + install_lib_element(CONFIG_NODE, &cfg_tcp_stats_interval_cmd); + install_lib_element(CONFIG_NODE, &cfg_tcp_stats_batch_size_cmd); install_node(&cfg_stats_node, config_write_stats); diff --git a/tests/stats/stats_vty_test.vty b/tests/stats/stats_vty_test.vty index 94cc7e8e1..8732d50b8 100644 --- a/tests/stats/stats_vty_test.vty +++ b/tests/stats/stats_vty_test.vty @@ -7,6 +7,7 @@ stats_vty_test(config)# list stats reporter log [NAME] no stats reporter log [NAME] stats interval <0-65535> + stats-tcp interval <0-65535> ... stats_vty_test(config)# ### No reporters shall be configured by default