select: gather statistics for TCP connections

osmocom applications are deployed in a variety of different situations.
Dependung on the medium that interconnects the network components
unexpected behaviour may occur. To debug problems with the
interconnection between network components it might help to monitor the
health of the related TCP connections.

Change-Id: I1416f95aff2adcf13689646b7574845de169fa3d
Related: SYS#5701
This commit is contained in:
Philipp Maier 2021-12-06 16:31:02 +01:00 committed by laforge
parent 9fe1f9fb0b
commit b1ef8f5f69
9 changed files with 406 additions and 0 deletions

View File

@ -516,6 +516,28 @@ dnl Check if the compiler supports runtime SIMD detection
CHECK_BUILTIN_SUPPORT([__builtin_cpu_supports],
[Runtime SIMD detection will be disabled])
dnl There are some members in struct tcp_info that might not exist on all linux versions
AC_CHECK_MEMBER([struct tcp_info.tcpi_rwnd_limited],
AC_DEFINE([HAVE_TCP_INFO_TCPI_RWND_LIMITED],
[1],
[Define to 1 if your <linux/tcp.h> header file have the tcpi_rwnd_limited member in struct tcp_info]),
[],
[#include <linux/tcp.h>])
AC_CHECK_MEMBER([struct tcp_info.tcpi_sndbuf_limited],
AC_DEFINE([HAVE_TCP_INFO_TCPI_SNDBUF_LIMITED],
[1],
[Define to 1 if your <linux/tcp.h> header file have the tcpi_sndbuf_limited member in struct tcp_info]),
[],
[#include <linux/tcp.h>])
AC_CHECK_MEMBER([struct tcp_info.tcpi_reord_seen],
AC_DEFINE([HAVE_TCP_INFO_TCPI_REORD_SEEN],
[1],
[Define to 1 if your <linux/tcp.h> header file have the tcpi_reord_seen member in struct tcp_info]),
[],
[#include <linux/tcp.h>])
AC_MSG_RESULT([CFLAGS="$CFLAGS"])
AC_MSG_RESULT([CPPFLAGS="$CPPFLAGS"])

View File

@ -46,6 +46,7 @@ nobase_include_HEADERS = \
osmocom/core/process.h \
osmocom/core/rate_ctr.h \
osmocom/core/stat_item.h \
osmocom/core/stats_tcp.h \
osmocom/core/select.h \
osmocom/core/sercomm.h \
osmocom/core/signal.h \

View File

@ -0,0 +1,16 @@
#pragma once
#define TCP_STATS_DEFAULT_INTERVAL 0 /* secs */
#define TCP_STATS_DEFAULT_BATCH_SIZE 1 /* sockets per interval */
struct osmo_tcp_stats_config {
/* poll interval in seconds, use osmo_stats_tcp_set_interval() to manipulate this value */
int interval;
/* specify how many sockets are processed when the interval timer expires */
int batch_size;
};
extern struct osmo_tcp_stats_config *osmo_tcp_stats_config;
int osmo_stats_tcp_osmo_fd_register(const struct osmo_fd *fd, const char *name);
int osmo_stats_tcp_osmo_fd_unregister(const struct osmo_fd *fd);
int osmo_stats_tcp_set_interval(int interval);

View File

@ -22,6 +22,7 @@ libosmocore_la_SOURCES = context.c timer.c timer_gettimeofday.c timer_clockgetti
conv.c application.c rbtree.c strrb.c \
loggingrb.c crc8gen.c crc16gen.c crc32gen.c crc64gen.c \
macaddr.c stat_item.c stats.c stats_statsd.c prim.c \
stats_tcp.c \
conv_acc.c conv_acc_generic.c sercomm.c prbs.c \
isdnhdlc.c \
tdef.c \

View File

@ -33,6 +33,8 @@
#include <osmocom/core/logging.h>
#include <osmocom/core/talloc.h>
#include <osmocom/core/utils.h>
#include <osmocom/core/stat_item.h>
#include <osmocom/core/stats_tcp.h>
#include "../config.h"
@ -181,6 +183,9 @@ void osmo_fd_unregister(struct osmo_fd *fd)
#ifndef FORCE_IO_SELECT
g_poll.num_registered--;
#endif /* FORCE_IO_SELECT */
/* If existent, free any statistical data */
osmo_stats_tcp_osmo_fd_unregister(fd);
}
/*! Close a file descriptor, mark it as closed + unregister from select loop abstraction

View File

@ -85,6 +85,7 @@
#include <osmocom/core/select.h>
#include <osmocom/core/counter.h>
#include <osmocom/core/msgb.h>
#include <osmocom/core/stats_tcp.h>
#ifdef HAVE_SYSTEMTAP
/* include the generated probes header and put markers in code */
@ -240,6 +241,10 @@ void osmo_stats_init(void *ctx)
osmo_stats_ctx = ctx;
is_initialised = 1;
start_timer();
/* Make sure that the tcp-stats interval timer also runs at its
* preconfigured rate. The vty might change this setting later. */
osmo_stats_tcp_set_interval(osmo_tcp_stats_config->interval);
}
/*! Find a stats_reporter of given \a type and \a name.

322
src/stats_tcp.c Normal file
View File

@ -0,0 +1,322 @@
/*
* (C) 2021 by sysmocom - s.f.m.c. GmbH
* Author: Philipp Maier <pmaier@sysmocom.de>
* All Rights Reserved
*
* SPDX-License-Identifier: GPL-2.0+
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
/*! \addtogroup stats
* @{
* \file stats_tcp.c */
#include "config.h"
#if !defined(EMBEDDED)
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <linux/tcp.h>
#include <errno.h>
#include <pthread.h>
#include <osmocom/core/select.h>
#include <osmocom/core/linuxlist.h>
#include <osmocom/core/talloc.h>
#include <osmocom/core/utils.h>
#include <osmocom/core/timer.h>
#include <osmocom/core/stat_item.h>
#include <osmocom/core/stats.h>
#include <osmocom/core/socket.h>
#include <osmocom/core/stats_tcp.h>
static struct osmo_tcp_stats_config s_tcp_stats_config = {
.interval = TCP_STATS_DEFAULT_INTERVAL,
};
struct osmo_tcp_stats_config *osmo_tcp_stats_config = &s_tcp_stats_config;
static struct osmo_timer_list stats_tcp_poll_timer;
static LLIST_HEAD(stats_tcp);
static struct stats_tcp_entry *stats_tcp_entry_cur;
pthread_mutex_t stats_tcp_lock;
struct stats_tcp_entry {
struct llist_head entry;
const struct osmo_fd *fd;
struct osmo_stat_item_group *stats_tcp;
const char *name;
};
enum {
STATS_TCP_UNACKED,
STATS_TCP_LOST,
STATS_TCP_RETRANS,
STATS_TCP_RTT,
STATS_TCP_RCV_RTT,
STATS_TCP_NOTSENT_BYTES,
STATS_TCP_RWND_LIMITED,
STATS_TCP_SNDBUF_LIMITED,
STATS_TCP_REORD_SEEN,
};
static struct osmo_stat_item_desc stats_tcp_item_desc[] = {
[STATS_TCP_UNACKED] = { "tcp:unacked", "unacknowledged packets", "", 60, 0 },
[STATS_TCP_LOST] = { "tcp:lost", "lost packets", "", 60, 0 },
[STATS_TCP_RETRANS] = { "tcp:retrans", "retransmitted packets", "", 60, 0 },
[STATS_TCP_RTT] = { "tcp:rtt", "roundtrip-time", "", 60, 0 },
[STATS_TCP_RCV_RTT] = { "tcp:rcv_rtt", "roundtrip-time (receive)", "", 60, 0 },
[STATS_TCP_NOTSENT_BYTES] = { "tcp:notsent_bytes", "bytes not yet sent", "", 60, 0 },
[STATS_TCP_RWND_LIMITED] = { "tcp:rwnd_limited", "time (usec) limited by receive window", "", 60, 0 },
[STATS_TCP_SNDBUF_LIMITED] = { "tcp:sndbuf_limited", "Time (usec) limited by send buffer", "", 60, 0 },
[STATS_TCP_REORD_SEEN] = { "tcp:sndbuf_limited", "reordering events seen", "", 60, 0 },
};
static struct osmo_stat_item_group_desc stats_tcp_desc = {
.group_name_prefix = "tcp",
.group_description = "stats tcp",
.class_id = OSMO_STATS_CLASS_GLOBAL,
.num_items = ARRAY_SIZE(stats_tcp_item_desc),
.item_desc = stats_tcp_item_desc,
};
static void fill_stats(struct stats_tcp_entry *stats_tcp_entry)
{
int rc;
struct tcp_info tcp_info;
socklen_t tcp_info_len = sizeof(tcp_info);
char stat_name[256];
/* Do not fill in anything before the socket is connected to a remote end */
if (osmo_sock_get_ip_and_port(stats_tcp_entry->fd->fd, NULL, 0, NULL, 0, false) != 0)
return;
/* Gather TCP statistics and update the stats items */
rc = getsockopt(stats_tcp_entry->fd->fd, IPPROTO_TCP, TCP_INFO, &tcp_info, &tcp_info_len);
if (rc < 0)
return;
/* Create stats items if they do not exist yet */
if (!stats_tcp_entry->stats_tcp) {
stats_tcp_entry->stats_tcp =
osmo_stat_item_group_alloc(stats_tcp_entry, &stats_tcp_desc, stats_tcp_entry->fd->fd);
OSMO_ASSERT(stats_tcp_entry->stats_tcp);
}
/* Update statistics */
if (stats_tcp_entry->name)
snprintf(stat_name, sizeof(stat_name), "%s,%s", stats_tcp_entry->name,
osmo_sock_get_name2(stats_tcp_entry->fd->fd));
else
snprintf(stat_name, sizeof(stat_name), "%s", osmo_sock_get_name2(stats_tcp_entry->fd->fd));
osmo_stat_item_group_set_name(stats_tcp_entry->stats_tcp, stat_name);
osmo_stat_item_set(osmo_stat_item_group_get_item(stats_tcp_entry->stats_tcp, STATS_TCP_UNACKED),
tcp_info.tcpi_unacked);
osmo_stat_item_set(osmo_stat_item_group_get_item(stats_tcp_entry->stats_tcp, STATS_TCP_LOST),
tcp_info.tcpi_lost);
osmo_stat_item_set(osmo_stat_item_group_get_item(stats_tcp_entry->stats_tcp, STATS_TCP_RETRANS),
tcp_info.tcpi_retrans);
osmo_stat_item_set(osmo_stat_item_group_get_item(stats_tcp_entry->stats_tcp, STATS_TCP_RTT), tcp_info.tcpi_rtt);
osmo_stat_item_set(osmo_stat_item_group_get_item(stats_tcp_entry->stats_tcp, STATS_TCP_RCV_RTT),
tcp_info.tcpi_rcv_rtt);
osmo_stat_item_set(osmo_stat_item_group_get_item(stats_tcp_entry->stats_tcp, STATS_TCP_NOTSENT_BYTES),
tcp_info.tcpi_notsent_bytes);
#if HAVE_TCP_INFO_TCPI_RWND_LIMITED == 1
osmo_stat_item_set(osmo_stat_item_group_get_item(stats_tcp_entry->stats_tcp, STATS_TCP_RWND_LIMITED),
tcp_info.tcpi_rwnd_limited);
#else
osmo_stat_item_set(osmo_stat_item_group_get_item(stats_tcp_entry->stats_tcp, STATS_TCP_RWND_LIMITED), -1);
#endif
#if STATS_TCP_SNDBUF_LIMITED == 1
osmo_stat_item_set(osmo_stat_item_group_get_item(stats_tcp_entry->stats_tcp, STATS_TCP_REORD_SEEN),
tcp_info.tcpi_sndbuf_limited);
#else
osmo_stat_item_set(osmo_stat_item_group_get_item(stats_tcp_entry->stats_tcp, STATS_TCP_REORD_SEEN), -1);
#endif
#if HAVE_TCP_INFO_TCPI_REORD_SEEN == 1
osmo_stat_item_set(osmo_stat_item_group_get_item(stats_tcp_entry->stats_tcp, STATS_TCP_REORD_SEEN),
tcp_info.tcpi_reord_seen);
#else
osmo_stat_item_set(osmo_stat_item_group_get_item(stats_tcp_entry->stats_tcp, STATS_TCP_REORD_SEEN), -1);
#endif
}
static bool is_tcp(const struct osmo_fd *fd)
{
int rc;
struct stat fd_stat;
int so_protocol = 0;
socklen_t so_protocol_len = sizeof(so_protocol);
/* Is this a socket? */
rc = fstat(fd->fd, &fd_stat);
if (rc < 0)
return false;
if (!S_ISSOCK(fd_stat.st_mode))
return false;
/* Is it a TCP socket? */
rc = getsockopt(fd->fd, SOL_SOCKET, SO_PROTOCOL, &so_protocol, &so_protocol_len);
if (rc < 0)
return false;
if (so_protocol == IPPROTO_TCP)
return true;
return false;
}
/*! Register an osmo_fd for TCP stats monitoring.
* \param[in] fd osmocom file descriptor to be registered.
* \param[in] human readbla name that is used as prefix for the related stats item.
* \returns 0 on success; negative in case of error. */
int osmo_stats_tcp_osmo_fd_register(const struct osmo_fd *fd, const char *name)
{
struct stats_tcp_entry *stats_tcp_entry;
/* Only TCP sockets can be registered for monitoring, anything else will fall through. */
if (!is_tcp(fd))
return -EINVAL;
/* When the osmo_fd is registered and unregistered properly there shouldn't be any leftovers from already closed
* osmo_fds in the stats_tcp list. But lets proactively make sure that any leftovers are cleaned up. */
osmo_stats_tcp_osmo_fd_unregister(fd);
/* Make a new list object, attach the osmo_fd... */
stats_tcp_entry = talloc_zero(OTC_GLOBAL, struct stats_tcp_entry);
OSMO_ASSERT(stats_tcp_entry);
stats_tcp_entry->fd = fd;
stats_tcp_entry->name = talloc_strdup(stats_tcp_entry, name);
pthread_mutex_lock(&stats_tcp_lock);
llist_add_tail(&stats_tcp_entry->entry, &stats_tcp);
pthread_mutex_unlock(&stats_tcp_lock);
return 0;
}
static void next_stats_tcp_entry(void)
{
struct stats_tcp_entry *last;
if (llist_empty(&stats_tcp)) {
stats_tcp_entry_cur = NULL;
return;
}
last = (struct stats_tcp_entry *)llist_last_entry(&stats_tcp, struct stats_tcp_entry, entry);
if (!stats_tcp_entry_cur || stats_tcp_entry_cur == last)
stats_tcp_entry_cur =
(struct stats_tcp_entry *)llist_first_entry(&stats_tcp, struct stats_tcp_entry, entry);
else
stats_tcp_entry_cur =
(struct stats_tcp_entry *)llist_entry(stats_tcp_entry_cur->entry.next, struct stats_tcp_entry,
entry);
}
/*! Register an osmo_fd for TCP stats monitoring.
* \param[in] fd osmocom file descriptor to be unregistered.
* \returns 0 on success; negative in case of error. */
int osmo_stats_tcp_osmo_fd_unregister(const struct osmo_fd *fd)
{
struct stats_tcp_entry *stats_tcp_entry;
int rc = -EINVAL;
pthread_mutex_lock(&stats_tcp_lock);
llist_for_each_entry(stats_tcp_entry, &stats_tcp, entry) {
if (fd->fd == stats_tcp_entry->fd->fd) {
/* In case we want to remove exactly that item which is also selected as the current itemy, we
* must designate either a different item or invalidate the current item. */
if (stats_tcp_entry == stats_tcp_entry_cur) {
if (llist_count(&stats_tcp) > 2)
next_stats_tcp_entry();
else
stats_tcp_entry_cur = NULL;
}
/* Date item from list */
llist_del(&stats_tcp_entry->entry);
osmo_stat_item_group_free(stats_tcp_entry->stats_tcp);
talloc_free(stats_tcp_entry);
rc = 0;
break;
}
}
pthread_mutex_unlock(&stats_tcp_lock);
return rc;
}
static void stats_tcp_poll_timer_cb(void *data)
{
int i;
int batch_size;
int llist_size;
pthread_mutex_lock(&stats_tcp_lock);
/* Make sure we do not run over the same sockets multiple times if the
* configured llist_size is larger then the actual list */
batch_size = osmo_tcp_stats_config->batch_size;
llist_size = llist_count(&stats_tcp);
if (llist_size < batch_size)
batch_size = llist_size;
/* Process a batch of sockets */
for (i = 0; i < batch_size; i++) {
next_stats_tcp_entry();
if (stats_tcp_entry_cur)
fill_stats(stats_tcp_entry_cur);
}
pthread_mutex_unlock(&stats_tcp_lock);
if (osmo_tcp_stats_config->interval > 0)
osmo_timer_schedule(&stats_tcp_poll_timer, osmo_tcp_stats_config->interval, 0);
}
/*! Set the polling interval (common for all sockets)
* \param[in] interval Poll interval in seconds
* \returns 0 on success; negative on error */
int osmo_stats_tcp_set_interval(int interval)
{
osmo_tcp_stats_config->interval = interval;
if (osmo_tcp_stats_config->interval > 0)
osmo_timer_schedule(&stats_tcp_poll_timer, osmo_tcp_stats_config->interval, 0);
return 0;
}
static __attribute__((constructor))
void on_dso_load_stats_tcp(void)
{
stats_tcp_entry_cur = NULL;
pthread_mutex_init(&stats_tcp_lock, NULL);
osmo_tcp_stats_config->interval = TCP_STATS_DEFAULT_INTERVAL;
osmo_tcp_stats_config->batch_size = TCP_STATS_DEFAULT_BATCH_SIZE;
osmo_timer_setup(&stats_tcp_poll_timer, stats_tcp_poll_timer_cb, NULL);
}
#endif /* !EMBEDDED */
/* @} */

View File

@ -33,6 +33,7 @@
#include <osmocom/core/stats.h>
#include <osmocom/core/counter.h>
#include <osmocom/core/rate_ctr.h>
#include <osmocom/core/stats_tcp.h>
#define CFG_STATS_STR "Configure stats sub-system\n"
#define CFG_REPORTER_STR "Configure a stats reporter\n"
@ -389,6 +390,32 @@ DEFUN(cfg_stats_interval, cfg_stats_interval_cmd,
return CMD_SUCCESS;
}
DEFUN(cfg_tcp_stats_interval, cfg_tcp_stats_interval_cmd,
"stats-tcp interval <0-65535>",
CFG_STATS_STR "Set the tcp socket stats polling interval\n"
"Interval in seconds (0 disables the polling interval)\n")
{
int rc;
int interval = atoi(argv[0]);
rc = osmo_stats_tcp_set_interval(interval);
if (rc < 0) {
vty_out(vty, "%% Unable to set interval: %s%s",
strerror(-rc), VTY_NEWLINE);
return CMD_WARNING;
}
return CMD_SUCCESS;
}
DEFUN(cfg_tcp_stats_batch_size, cfg_tcp_stats_batch_size_cmd,
"stats-tcp batch-size <1-65535>",
CFG_STATS_STR "Set the number of tcp sockets that are processed per stats polling interval\n"
"Number of sockets per interval\n")
{
osmo_tcp_stats_config->batch_size = atoi(argv[0]);
return CMD_SUCCESS;
}
DEFUN(show_stats,
show_stats_cmd,
"show stats",
@ -677,6 +704,10 @@ static int config_write_stats(struct vty *vty)
struct osmo_stats_reporter *srep;
vty_out(vty, "stats interval %d%s", osmo_stats_config->interval, VTY_NEWLINE);
if (osmo_tcp_stats_config->interval != TCP_STATS_DEFAULT_INTERVAL)
vty_out(vty, "stats-tcp interval %d%s", osmo_tcp_stats_config->interval, VTY_NEWLINE);
if (osmo_tcp_stats_config->batch_size != TCP_STATS_DEFAULT_BATCH_SIZE)
vty_out(vty, "stats-tcp batch-size %d%s", osmo_tcp_stats_config->batch_size, VTY_NEWLINE);
/* Loop through all reporters */
llist_for_each_entry(srep, &osmo_stats_reporter_list, list)
@ -699,6 +730,8 @@ void osmo_stats_vty_add_cmds()
install_lib_element(CONFIG_NODE, &cfg_stats_reporter_log_cmd);
install_lib_element(CONFIG_NODE, &cfg_no_stats_reporter_log_cmd);
install_lib_element(CONFIG_NODE, &cfg_stats_interval_cmd);
install_lib_element(CONFIG_NODE, &cfg_tcp_stats_interval_cmd);
install_lib_element(CONFIG_NODE, &cfg_tcp_stats_batch_size_cmd);
install_node(&cfg_stats_node, config_write_stats);

View File

@ -7,6 +7,7 @@ stats_vty_test(config)# list
stats reporter log [NAME]
no stats reporter log [NAME]
stats interval <0-65535>
stats-tcp interval <0-65535>
...
stats_vty_test(config)# ### No reporters shall be configured by default