nft_kpi: retrieve counters in a separate thread
Introduce an NFT thread which does: - periodically run nftables command to read all counters - parse the response - update rate_ctr values. The main thread still runs the rule addition / removal when a HNB registers or deregisters. See the comment added in nft_kpi.c, starting with "A more scalable solution...". This patch requires the new osmo_stats_report_lock(), see 'Depends'. Related: SYS#6773 Depends: libosmocore Ib335bea7d2a440ca284e6c439066f96456bf2c2d Change-Id: I9dc54e6bc94c553f45adfa71ae8ad70be4afbc8f
This commit is contained in:
parent
532968d766
commit
9ec7279329
|
@ -513,6 +513,12 @@ struct hnb_persistent *hnb_persistent_alloc(const struct umts_cell_id *id)
|
|||
if (!hnbp)
|
||||
return NULL;
|
||||
|
||||
/* nft_kpi.c updates hnb stats, and it locks osmo_stats_report_lock() while it does so. This here should run
|
||||
* in the same thread as stats reporting, so there should be no conflict with stats. But to avoid a hnb in flux
|
||||
* while nft_kpi.c looks up hnb, also obtain the osmo_stats_report_lock() while manipulating hnb records. */
|
||||
osmo_stats_report_lock();
|
||||
/* { */
|
||||
|
||||
hnbp->id = *id;
|
||||
hnbp->id_str = talloc_strdup(hnbp, umts_cell_id_name(id));
|
||||
hnbp->ctrs = rate_ctr_group_alloc(hnbp, &hnb_ctrg_desc, 0);
|
||||
|
@ -525,14 +531,21 @@ struct hnb_persistent *hnb_persistent_alloc(const struct umts_cell_id *id)
|
|||
osmo_stat_item_group_set_name(hnbp->statg, hnbp->id_str);
|
||||
|
||||
llist_add(&hnbp->list, &g_hnbgw->hnb_persistent_list);
|
||||
/* success */
|
||||
goto out_unlock;
|
||||
|
||||
return hnbp;
|
||||
|
||||
/* failure */
|
||||
out_free_ctrs:
|
||||
rate_ctr_group_free(hnbp->ctrs);
|
||||
out_free:
|
||||
talloc_free(hnbp);
|
||||
return NULL;
|
||||
hnbp = NULL;
|
||||
|
||||
/* for both success and failure: */
|
||||
out_unlock:
|
||||
/* } */
|
||||
osmo_stats_report_unlock();
|
||||
return hnbp;
|
||||
}
|
||||
|
||||
struct hnb_persistent *hnb_persistent_find_by_id(const struct umts_cell_id *id)
|
||||
|
|
|
@ -15,7 +15,11 @@
|
|||
* GNU Affero General Public License for more details.
|
||||
*/
|
||||
|
||||
#include <pthread.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <osmocom/core/logging.h>
|
||||
#include <osmocom/core/stats.h>
|
||||
#include <osmocom/hnbgw/hnbgw.h>
|
||||
|
||||
#include "config.h"
|
||||
|
@ -46,13 +50,68 @@ int hnb_nft_kpi_end(struct hnb_persistent *hnbp)
|
|||
#include <osmocom/hnbgw/nft_kpi.h>
|
||||
#include <osmocom/hnbgw/tdefs.h>
|
||||
|
||||
/* Threading and locks in this implementation:
|
||||
*
|
||||
* - osmo_stats_report_lock() held while updating rate_ctr from nft results.
|
||||
* - g_nft_kpi_state.lock held while running an nftables command buffer.
|
||||
*
|
||||
* contrived example:
|
||||
*
|
||||
* Main Thread
|
||||
* |
|
||||
* osmo_stats_report_use_lock(true)
|
||||
* |
|
||||
* nft_kpi_init()
|
||||
* create nft ctx, create table
|
||||
* |
|
||||
* +---------------------------> NFT thread
|
||||
* | |
|
||||
* | sleep(X34)
|
||||
* | |
|
||||
* | LOCK(g_nft_kpi_state.lock)
|
||||
* | |
|
||||
* osmo_stats_report() query all nft counters
|
||||
* LOCK(osmo_stats_report_lock) |
|
||||
* | LOCK(osmo_stats_report_lock)
|
||||
* collect stats : wait because libosmocore stats reporting is busy
|
||||
* | :
|
||||
* UNLOCK(osmo_stats_report_lock) LOCK------|
|
||||
* send out stats for all hnbp: rate_ctr_add2()
|
||||
* | |
|
||||
* | UNLOCK(osmo_stats_report_lock)
|
||||
* | |
|
||||
* | UNLOCK(g_nft_kpi_state.lock)
|
||||
* | |
|
||||
* hnbgw_rx_hnb_register_req() sleep(X34)
|
||||
* hnb_nft_kpi_start() |
|
||||
* LOCK(g_nft_kpi_state.lock) ...
|
||||
* |
|
||||
* nftables: add new rule
|
||||
* |
|
||||
* UNLOCK(g_nft_kpi_state.lock)
|
||||
* |
|
||||
* ...
|
||||
*
|
||||
* So the NFT thread only retrieves counters. The main thread adds and removes NFT rules for counters. It is possible
|
||||
* that a HNBAP HNB Register or HNB De-Register occurrs while the NFT thread holds the g_nft_kpi_state.lock, so that the
|
||||
* main thread blocks until the NFT thread is done reading counters. Note, this happens only for HNB attach or detach.
|
||||
*
|
||||
* A more scalable solution is to move all NFT interaction to the thread. Instead of submitting rules from the main
|
||||
* thread, we could submit instructions to an inter-thread queue that the NFT thread works off. This would add
|
||||
* considerable complexity -- for now we accept the possible but rarely occurring short delay for HNB de/registration.
|
||||
*/
|
||||
|
||||
struct nft_kpi_state {
|
||||
/* lock this while modifying g_nft_kpi_state */
|
||||
pthread_mutex_t lock;
|
||||
|
||||
struct {
|
||||
struct nft_ctx *nft_ctx;
|
||||
char *table_name;
|
||||
bool table_created;
|
||||
} nft;
|
||||
struct osmo_timer_list period;
|
||||
|
||||
pthread_t thread;
|
||||
};
|
||||
|
||||
static struct nft_kpi_state g_nft_kpi_state = {};
|
||||
|
@ -98,15 +157,22 @@ static int nft_run_now(const char *buffer)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void nft_kpi_period_cb(void *data);
|
||||
static void nft_kpi_period_cb(void);
|
||||
|
||||
static void nft_kpi_period_schedule(void)
|
||||
void *nft_kpi_thread(void *arg)
|
||||
{
|
||||
unsigned long period = osmo_tdef_get(hnbgw_T_defs, -34, OSMO_TDEF_S, 10);
|
||||
if (period < 1)
|
||||
period = 1;
|
||||
osmo_timer_setup(&g_nft_kpi_state.period, nft_kpi_period_cb, NULL);
|
||||
osmo_timer_schedule(&g_nft_kpi_state.period, period, 0);
|
||||
OSMO_ASSERT(osmo_ctx_init(__func__) == 0);
|
||||
while (1) {
|
||||
/* Let's just hope that the unsigned long in the hnbgw_T_defs is not modified non-atomically while
|
||||
* reading the timeout value. */
|
||||
unsigned long period = osmo_tdef_get(hnbgw_T_defs, -34, OSMO_TDEF_US, 1000000);
|
||||
if (period < 1)
|
||||
period = 1;
|
||||
usleep(period);
|
||||
|
||||
nft_kpi_period_cb();
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int nft_kpi_init(const char *table_name)
|
||||
|
@ -145,7 +211,11 @@ int nft_kpi_init(const char *table_name)
|
|||
return -EIO;
|
||||
|
||||
s->nft.table_created = true;
|
||||
nft_kpi_period_schedule();
|
||||
|
||||
/* Up to here, it was fine to dispatch nft without locking, because this is the initialization from the main
|
||||
* thread. From now on, whoever wants to use the g_nft_ctx must lock this mutex first. */
|
||||
pthread_mutex_init(&s->lock, NULL);
|
||||
pthread_create(&s->thread, NULL, nft_kpi_thread, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -163,14 +233,19 @@ int hnb_nft_kpi_start(struct hnb_persistent *hnbp, const struct osmo_sockaddr_st
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Manipulating nft state, obtain lock */
|
||||
pthread_mutex_lock(&s->lock);
|
||||
/* { */
|
||||
|
||||
if (!osmo_sockaddr_str_cmp(gtpu_remote, &hnbp->nft_kpi.addr_remote)) {
|
||||
/* The remote address is unchanged, no need to update the nft probe */
|
||||
return 0;
|
||||
rc = 0;
|
||||
goto unlock_return;
|
||||
}
|
||||
|
||||
/* When there is no table created, it means nft is disabled. Do not attempt to set up counters. */
|
||||
if (!s->nft.table_created)
|
||||
return 0;
|
||||
goto unlock_return;
|
||||
|
||||
/* The remote address has changed. Cancel previous probe, if any, and start a new one. */
|
||||
if (osmo_sockaddr_str_is_nonzero(&hnbp->nft_kpi.addr_remote))
|
||||
|
@ -196,6 +271,10 @@ int hnb_nft_kpi_start(struct hnb_persistent *hnbp, const struct osmo_sockaddr_st
|
|||
/* There was an error running the rule, clear addr_remote to indicate that no rule exists. */
|
||||
hnbp->nft_kpi.addr_remote = (struct osmo_sockaddr_str){};
|
||||
}
|
||||
|
||||
unlock_return:
|
||||
/* } */
|
||||
pthread_mutex_unlock(&s->lock);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -206,15 +285,19 @@ int hnb_nft_kpi_end(struct hnb_persistent *hnbp)
|
|||
{
|
||||
struct nft_kpi_state *s = &g_nft_kpi_state;
|
||||
char *cmd;
|
||||
int rc = 0;
|
||||
|
||||
/* When there is no table created, neither can there be any rules to be deleted.
|
||||
* The rules get removed, but the table remains present for as long as osmo-hnbgw runs. */
|
||||
if (!s->nft.table_created)
|
||||
return 0;
|
||||
|
||||
pthread_mutex_lock(&s->lock);
|
||||
/* { */
|
||||
|
||||
/* presence of addr_remote indicates whether an nft rule has been submitted and still needs to be removed */
|
||||
if (!osmo_sockaddr_str_is_nonzero(&hnbp->nft_kpi.addr_remote))
|
||||
return 0;
|
||||
goto unlock_return;
|
||||
|
||||
if (!hnbp->nft_kpi.last.ul.handle_present
|
||||
|| !hnbp->nft_kpi.last.dl.handle_present) {
|
||||
|
@ -235,7 +318,12 @@ int hnb_nft_kpi_end(struct hnb_persistent *hnbp)
|
|||
hnbp->nft_kpi.last.ul.handle,
|
||||
s->nft.table_name,
|
||||
hnbp->nft_kpi.last.dl.handle);
|
||||
return nft_run_now(cmd);
|
||||
rc = nft_run_now(cmd);
|
||||
|
||||
unlock_return:
|
||||
/* } */
|
||||
pthread_mutex_unlock(&s->lock);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void update_ctr(struct rate_ctr_group *cg, int cgidx, uint64_t *last_val, uint64_t new_val)
|
||||
|
@ -380,6 +468,7 @@ static void decode_nft_response(const char *response)
|
|||
LOGP(DNFT, LOGL_DEBUG, "read %d counters from nft table %s\n", count, s->nft.table_name);
|
||||
}
|
||||
|
||||
/* The caller must hold the g_nft_kpi_state.lock! */
|
||||
static void nft_kpi_read_counters(void)
|
||||
{
|
||||
int rc;
|
||||
|
@ -396,6 +485,12 @@ static void nft_kpi_read_counters(void)
|
|||
OSMO_STRBUF_PRINTF(sb, "list table inet %s", s->nft.table_name);
|
||||
OSMO_ASSERT(sb.chars_needed < sizeof(cmd));
|
||||
|
||||
size_t l = strlen(cmd);
|
||||
LOGP(DNFT, LOGL_DEBUG, "running nft request, %zu chars: \"%s%s\"\n",
|
||||
l,
|
||||
osmo_escape_cstr_c(OTC_SELECT, cmd, OSMO_MIN(logmax, l)),
|
||||
l > logmax ? "..." : "");
|
||||
|
||||
rc = nft_ctx_buffer_output(nft);
|
||||
if (rc) {
|
||||
LOGP(DNFT, LOGL_ERROR, "error: nft_ctx_buffer_output() returned failure: rc=%d cmd=%s\n",
|
||||
|
@ -410,29 +505,29 @@ static void nft_kpi_read_counters(void)
|
|||
}
|
||||
|
||||
output = nft_ctx_get_output_buffer(nft);
|
||||
if (log_check_level(DNFT, LOGL_DEBUG)) {
|
||||
size_t l = strlen(cmd);
|
||||
LOGP(DNFT, LOGL_DEBUG, "ran nft request, %zu chars: \"%s%s\"\n",
|
||||
l,
|
||||
osmo_escape_cstr_c(OTC_SELECT, cmd, OSMO_MIN(logmax, l)),
|
||||
l > logmax ? "..." : "");
|
||||
l = strlen(output);
|
||||
LOGP(DNFT, LOGL_DEBUG, "got nft response, %zu chars: \"%s%s\"\n",
|
||||
l,
|
||||
osmo_escape_cstr_c(OTC_SELECT, output, OSMO_MIN(logmax, l)),
|
||||
l > logmax ? "..." : "");
|
||||
}
|
||||
l = strlen(output);
|
||||
LOGP(DNFT, LOGL_DEBUG, "got nft response, %zu chars: \"%s%s\"\n",
|
||||
l,
|
||||
osmo_escape_cstr_c(OTC_SELECT, output, OSMO_MIN(logmax, l)),
|
||||
l > logmax ? "..." : "");
|
||||
|
||||
osmo_stats_report_lock();
|
||||
/* { */
|
||||
decode_nft_response(output);
|
||||
/* } */
|
||||
osmo_stats_report_unlock();
|
||||
|
||||
unbuffer_and_exit:
|
||||
nft_ctx_unbuffer_output(nft);
|
||||
}
|
||||
|
||||
static void nft_kpi_period_cb(void *data)
|
||||
static void nft_kpi_period_cb(void)
|
||||
{
|
||||
pthread_mutex_lock(&g_nft_kpi_state.lock);
|
||||
/* { */
|
||||
nft_kpi_read_counters();
|
||||
nft_kpi_period_schedule();
|
||||
/* } */
|
||||
pthread_mutex_unlock(&g_nft_kpi_state.lock);
|
||||
}
|
||||
|
||||
#endif // ENABLE_NFTABLES
|
||||
|
|
|
@ -328,16 +328,23 @@ int main(int argc, char **argv)
|
|||
/* If UPF is configured, set up PFCP socket and send Association Setup Request to UPF */
|
||||
hnbgw_pfcp_init();
|
||||
#endif
|
||||
#if ENABLE_NFTABLES
|
||||
|
||||
/* If nftables is enabled, initialize the nft table now or fail startup. This is important to immediately let
|
||||
* the user know if cap_net_admin privileges are missing, and not only when the first hNodeB connects. */
|
||||
if (g_hnbgw->config.nft_kpi.enable) {
|
||||
#if ENABLE_NFTABLES
|
||||
if (nft_kpi_init(g_hnbgw->config.nft_kpi.table_name)) {
|
||||
perror("Failed to initialize nft KPI, probably missing cap_net_admin");
|
||||
perror("ERROR: Failed to initialize nft KPI, probably missing cap_net_admin");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
/* nft_kpi.c manipulates rate_ctr state directly. Enable the mutex lock around stats reporting, so
|
||||
* nft_kpi.c can make use of it. */
|
||||
osmo_stats_report_use_lock(true);
|
||||
#else
|
||||
fprintf(stderr, "ERROR: Cannot enable nft KPI, this binary was built without nftables support\n");
|
||||
exit(1);
|
||||
#endif
|
||||
}
|
||||
|
||||
hnbgw_cnpool_start(&g_hnbgw->sccp.cnpool_iucs);
|
||||
hnbgw_cnpool_start(&g_hnbgw->sccp.cnpool_iups);
|
||||
|
|
|
@ -35,8 +35,8 @@ struct osmo_tdef hnbgw_T_defs[] = {
|
|||
{.T = 3113, .default_val = 15, .desc = "Time to keep Paging record, for CN pools with more than one link" },
|
||||
{.T = 4, .default_val = 5, .desc = "Timeout to receive RANAP RESET ACKNOWLEDGE from an MSC/SGSN" },
|
||||
{.T = -31, .default_val = 15, .desc = "Timeout for establishing and releasing context maps (RUA <-> SCCP)" },
|
||||
{.T = -34, .default_val = 1000, .unit = OSMO_TDEF_MS, .desc = "Period to query network traffic stats from netfilter" },
|
||||
{.T = -1002, .default_val = 10, .desc = "Timeout for the HNB to respond to PS RAB Assignment Request" },
|
||||
{.T = -34, .default_val = 1, .desc = "Period to query network traffic stats from netfilter" },
|
||||
{ }
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue