stats: Add counters and gauges for BORKEN lchans/TS

Now we can monitor the situation with the BORKEN lchans and TS in our
BTS's over time.

Change-Id: I427bbe1613a0e92bff432a7d76592fe50f620ebe
This commit is contained in:
Alexander Chemeris 2020-05-11 00:30:11 +03:00 committed by laforge
parent 8013b02685
commit 5d63827318
5 changed files with 148 additions and 11 deletions

View File

@ -1396,7 +1396,7 @@ static inline struct gsm_bts *conn_get_bts(struct gsm_subscriber_connection *con
void conn_update_ms_power_class(struct gsm_subscriber_connection *conn, uint8_t power_class);
void lchan_update_ms_power_ctrl_level(struct gsm_lchan *lchan, int ms_power_dbm);
enum {
enum bts_counter_id {
BTS_CTR_CHREQ_TOTAL,
BTS_CTR_CHREQ_NO_CHANNEL,
BTS_CTR_CHAN_RF_FAIL,
@ -1417,6 +1417,27 @@ enum {
BTS_CTR_RSL_UNKNOWN,
BTS_CTR_RSL_IPA_NACK,
BTS_CTR_MODE_MODIFY_NACK,
BTS_CTR_LCHAN_BORKEN_FROM_UNUSED,
BTS_CTR_LCHAN_BORKEN_FROM_WAIT_ACTIV_ACK,
BTS_CTR_LCHAN_BORKEN_FROM_WAIT_RF_RELEASE_ACK,
BTS_CTR_LCHAN_BORKEN_FROM_BORKEN,
BTS_CTR_LCHAN_BORKEN_FROM_UNKNOWN,
BTS_CTR_LCHAN_BORKEN_EV_CHAN_ACTIV_ACK,
BTS_CTR_LCHAN_BORKEN_EV_CHAN_ACTIV_NACK,
BTS_CTR_LCHAN_BORKEN_EV_RF_CHAN_REL_ACK,
BTS_CTR_LCHAN_BORKEN_EV_VTY,
BTS_CTR_LCHAN_BORKEN_EV_TEARDOWN,
BTS_CTR_TS_BORKEN_FROM_NOT_INITIALIZED,
BTS_CTR_TS_BORKEN_FROM_UNUSED,
BTS_CTR_TS_BORKEN_FROM_WAIT_PDCH_ACT,
BTS_CTR_TS_BORKEN_FROM_PDCH,
BTS_CTR_TS_BORKEN_FROM_WAIT_PDCH_DEACT,
BTS_CTR_TS_BORKEN_FROM_IN_USE,
BTS_CTR_TS_BORKEN_FROM_BORKEN,
BTS_CTR_TS_BORKEN_FROM_UNKNOWN,
BTS_CTR_TS_BORKEN_EV_PDCH_ACT_ACK_NACK,
BTS_CTR_TS_BORKEN_EV_PDCH_DEACT_ACK_NACK,
BTS_CTR_TS_BORKEN_EV_TEARDOWN,
};
static const struct rate_ctr_desc bts_ctr_description[] = {
@ -1441,6 +1462,29 @@ static const struct rate_ctr_desc bts_ctr_description[] = {
[BTS_CTR_RSL_UNKNOWN] = {"rsl:unknown", "Number of unknown/unsupported RSL messages received from BTS"},
[BTS_CTR_RSL_IPA_NACK] = {"rsl:ipa_nack", "Number of IPA (RTP/dyn-PDCH) related NACKs received from BTS"},
[BTS_CTR_MODE_MODIFY_NACK] = {"chan:mode_modify_nack", "Number of Channel Mode Modify NACKs received from BTS"},
/* lchan/TS BORKEN state counters */
[BTS_CTR_LCHAN_BORKEN_FROM_UNUSED] = {"lchan_borken:from_state:unused", "Transitions from lchan UNUSED state to BORKEN state"},
[BTS_CTR_LCHAN_BORKEN_FROM_WAIT_ACTIV_ACK] = {"lchan_borken:from_state:wait_activ_ack", "Transitions from lchan WAIT_ACTIV_ACK state to BORKEN state"},
[BTS_CTR_LCHAN_BORKEN_FROM_WAIT_RF_RELEASE_ACK] = {"lchan_borken:from_state:wait_rf_release_ack", "Transitions from lchan WAIT_RF_RELEASE_ACK state to BORKEN state"},
[BTS_CTR_LCHAN_BORKEN_FROM_BORKEN] = {"lchan_borken:from_state:borken", "Transitions from lchan BORKEN state to BORKEN state"},
[BTS_CTR_LCHAN_BORKEN_FROM_UNKNOWN] = {"lchan_borken:from_state:unknown", "Transitions from an unknown lchan state to BORKEN state"},
[BTS_CTR_LCHAN_BORKEN_EV_CHAN_ACTIV_ACK] = {"lchan_borken:event:chan_activ_ack", "CHAN_ACTIV_ACK received in the lchan BORKEN state"},
[BTS_CTR_LCHAN_BORKEN_EV_CHAN_ACTIV_NACK] = {"lchan_borken:event:chan_activ_nack", "CHAN_ACTIV_NACK received in the lchan BORKEN state"},
[BTS_CTR_LCHAN_BORKEN_EV_RF_CHAN_REL_ACK] = {"lchan_borken:event:rf_chan_rel_ack", "RF_CHAN_REL_ACK received in the lchan BORKEN state"},
[BTS_CTR_LCHAN_BORKEN_EV_VTY] = {"lchan_borken:event:vty", "VTY commands received in the lchan BORKEN state"},
[BTS_CTR_LCHAN_BORKEN_EV_TEARDOWN] = {"lchan_borken:event:teardown", "lchan in a BORKEN state is shutting down (BTS disconnected?)"},
[BTS_CTR_TS_BORKEN_FROM_NOT_INITIALIZED] = {"ts_borken:from_state:not_initialized", "Transitions from TS NOT_INITIALIZED state to BORKEN state"},
[BTS_CTR_TS_BORKEN_FROM_UNUSED] = {"ts_borken:from_state:unused", "Transitions from TS UNUSED state to BORKEN state"},
[BTS_CTR_TS_BORKEN_FROM_WAIT_PDCH_ACT] = {"ts_borken:from_state:wait_pdch_act", "Transitions from TS WAIT_PDCH_ACT state to BORKEN state"},
[BTS_CTR_TS_BORKEN_FROM_PDCH] = {"ts_borken:from_state:pdch", "Transitions from TS PDCH state to BORKEN state"},
[BTS_CTR_TS_BORKEN_FROM_WAIT_PDCH_DEACT] = {"ts_borken:from_state:wait_pdch_deact", "Transitions from TS WAIT_PDCH_DEACT state to BORKEN state"},
[BTS_CTR_TS_BORKEN_FROM_IN_USE] = {"ts_borken:from_state:in_use", "Transitions from TS IN_USE state to BORKEN state"},
[BTS_CTR_TS_BORKEN_FROM_BORKEN] = {"ts_borken:from_state:borken", "Transitions from TS BORKEN state to BORKEN state"},
[BTS_CTR_TS_BORKEN_FROM_UNKNOWN] = {"ts_borken:from_state:unknown", "Transitions from an unknown TS state to BORKEN state"},
[BTS_CTR_TS_BORKEN_EV_PDCH_ACT_ACK_NACK] = {"ts_borken:event:pdch_act_ack_nack", "PDCH_ACT_ACK/NACK received in the TS BORKEN state"},
[BTS_CTR_TS_BORKEN_EV_PDCH_DEACT_ACK_NACK] = {"ts_borken:event:pdch_deact_ack_nack", "PDCH_DEACT_ACK/NACK received in the TS BORKEN state"},
[BTS_CTR_TS_BORKEN_EV_TEARDOWN] = {"ts_borken:event:teardown", "TS in a BORKEN state is shutting down (BTS disconnected?)"},
};
static const struct rate_ctr_group_desc bts_ctrg_desc = {
@ -1474,6 +1518,8 @@ enum {
BTS_STAT_RACH_ACCESS,
BTS_STAT_OML_CONNECTED,
BTS_STAT_RSL_CONNECTED,
BTS_STAT_LCHAN_BORKEN,
BTS_STAT_TS_BORKEN,
};
enum {

View File

@ -5018,9 +5018,10 @@ DEFUN_HIDDEN(lchan_set_borken, lchan_set_borken_cmd,
return CMD_WARNING;
}
} else {
if (lchan->fi->state == LCHAN_ST_BORKEN)
if (lchan->fi->state == LCHAN_ST_BORKEN) {
rate_ctr_inc(&lchan->ts->trx->bts->bts_ctrs->ctr[BTS_CTR_LCHAN_BORKEN_EV_VTY]);
osmo_fsm_inst_state_chg(lchan->fi, LCHAN_ST_UNUSED, 0, 0);
else {
} else {
vty_out(vty,
"%% lchan is in state %s, only lchans that are in state %s may be moved to state %s manually%s",
osmo_fsm_state_name(lchan->fi->fsm, lchan->fi->state),

View File

@ -392,6 +392,8 @@ static const struct osmo_stat_item_desc bts_stat_desc[] = {
{ "rach_access", "RACH slots with access bursts in them", "%", 16, 0 },
{ "oml_connected", "Number of OML links connected", "", 16, 0 },
{ "rsl_connected", "Number of RSL links connected", "", 16, 0 },
{ "lchan_borken", "Number of lchans in the BORKEN state", "", 16, 0 },
{ "ts_borken", "Number of timeslots in the BORKEN state", "", 16, 0 },
};
static const struct osmo_stat_item_group_desc bts_statg_desc = {

View File

@ -1071,6 +1071,28 @@ static void lchan_fsm_wait_rf_release_ack(struct osmo_fsm_inst *fi, uint32_t eve
static void lchan_fsm_borken_onenter(struct osmo_fsm_inst *fi, uint32_t prev_state)
{
struct gsm_lchan *lchan = lchan_fi_lchan(fi);
enum bts_counter_id ctr;
switch (prev_state) {
case LCHAN_ST_UNUSED:
ctr = BTS_CTR_LCHAN_BORKEN_FROM_UNUSED;
break;
case LCHAN_ST_WAIT_ACTIV_ACK:
ctr = BTS_CTR_LCHAN_BORKEN_FROM_WAIT_ACTIV_ACK;
break;
case LCHAN_ST_WAIT_RF_RELEASE_ACK:
ctr = BTS_CTR_LCHAN_BORKEN_FROM_WAIT_RF_RELEASE_ACK;
break;
case LCHAN_ST_BORKEN:
ctr = BTS_CTR_LCHAN_BORKEN_FROM_BORKEN;
break;
default:
ctr = BTS_CTR_LCHAN_BORKEN_FROM_UNKNOWN;
}
rate_ctr_inc(&lchan->ts->trx->bts->bts_ctrs->ctr[ctr]);
if (prev_state != LCHAN_ST_BORKEN)
osmo_stat_item_inc(lchan->ts->trx->bts->bts_statg->items[BTS_STAT_LCHAN_BORKEN], 1);
/* The actual action besides all the beancounting above */
lchan_reset(lchan);
}
@ -1081,6 +1103,8 @@ static void lchan_fsm_borken(struct osmo_fsm_inst *fi, uint32_t event, void *dat
case LCHAN_EV_RSL_CHAN_ACTIV_ACK:
/* A late Chan Activ ACK? Release. */
rate_ctr_inc(&lchan->ts->trx->bts->bts_ctrs->ctr[BTS_CTR_LCHAN_BORKEN_EV_CHAN_ACTIV_ACK]);
osmo_stat_item_dec(lchan->ts->trx->bts->bts_statg->items[BTS_STAT_LCHAN_BORKEN], 1);
lchan->release.in_error = true;
lchan->release.rsl_error_cause = RSL_ERR_INTERWORKING;
lchan_fsm_state_chg(LCHAN_ST_WAIT_RF_RELEASE_ACK);
@ -1088,11 +1112,15 @@ static void lchan_fsm_borken(struct osmo_fsm_inst *fi, uint32_t event, void *dat
case LCHAN_EV_RSL_CHAN_ACTIV_NACK:
/* A late Chan Activ NACK? Ok then, unused. */
rate_ctr_inc(&lchan->ts->trx->bts->bts_ctrs->ctr[BTS_CTR_LCHAN_BORKEN_EV_CHAN_ACTIV_NACK]);
osmo_stat_item_dec(lchan->ts->trx->bts->bts_statg->items[BTS_STAT_LCHAN_BORKEN], 1);
lchan_fsm_state_chg(LCHAN_ST_UNUSED);
return;
case LCHAN_EV_RSL_RF_CHAN_REL_ACK:
/* A late Release ACK? */
rate_ctr_inc(&lchan->ts->trx->bts->bts_ctrs->ctr[BTS_CTR_LCHAN_BORKEN_EV_RF_CHAN_REL_ACK]);
osmo_stat_item_dec(lchan->ts->trx->bts->bts_statg->items[BTS_STAT_LCHAN_BORKEN], 1);
lchan->release.in_error = true;
lchan->release.rsl_error_cause = RSL_ERR_INTERWORKING;
lchan_fsm_state_chg(LCHAN_ST_WAIT_AFTER_ERROR);
@ -1384,6 +1412,10 @@ exit_release_handler:
void lchan_fsm_cleanup(struct osmo_fsm_inst *fi, enum osmo_fsm_term_cause cause)
{
struct gsm_lchan *lchan = lchan_fi_lchan(fi);
if (lchan->fi->state == LCHAN_ST_BORKEN) {
rate_ctr_inc(&lchan->ts->trx->bts->bts_ctrs->ctr[BTS_CTR_LCHAN_BORKEN_EV_TEARDOWN]);
osmo_stat_item_dec(lchan->ts->trx->bts->bts_statg->items[BTS_STAT_LCHAN_BORKEN], 1);
}
lchan_reset(lchan);
if (lchan->last_error) {
talloc_free(lchan->last_error);

View File

@ -654,6 +654,39 @@ static void ts_fsm_in_use(struct osmo_fsm_inst *fi, uint32_t event, void *data)
}
}
static void ts_fsm_borken_onenter(struct osmo_fsm_inst *fi, uint32_t prev_state)
{
struct gsm_bts_trx_ts *ts = ts_fi_ts(fi);
enum bts_counter_id ctr;
switch (prev_state) {
case TS_ST_NOT_INITIALIZED:
ctr = BTS_CTR_TS_BORKEN_FROM_NOT_INITIALIZED;
break;
case TS_ST_UNUSED:
ctr = BTS_CTR_TS_BORKEN_FROM_UNUSED;
break;
case TS_ST_WAIT_PDCH_ACT:
ctr = BTS_CTR_TS_BORKEN_FROM_WAIT_PDCH_ACT;
break;
case TS_ST_PDCH:
ctr = BTS_CTR_TS_BORKEN_FROM_PDCH;
break;
case TS_ST_WAIT_PDCH_DEACT:
ctr = BTS_CTR_TS_BORKEN_FROM_WAIT_PDCH_DEACT;
break;
case TS_ST_IN_USE:
ctr = BTS_CTR_TS_BORKEN_FROM_IN_USE;
break;
case TS_ST_BORKEN:
ctr = BTS_CTR_TS_BORKEN_FROM_BORKEN;
break;
default:
ctr = BTS_CTR_TS_BORKEN_FROM_UNKNOWN;
}
rate_ctr_inc(&ts->trx->bts->bts_ctrs->ctr[ctr]);
osmo_stat_item_inc(ts->trx->bts->bts_statg->items[BTS_STAT_TS_BORKEN], 1);
}
static void ts_fsm_borken(struct osmo_fsm_inst *fi, uint32_t event, void *data)
{
switch (event) {
@ -670,17 +703,29 @@ static void ts_fsm_borken(struct osmo_fsm_inst *fi, uint32_t event, void *data)
case TS_EV_PDCH_ACT_ACK:
case TS_EV_PDCH_ACT_NACK:
/* Late PDCH activation ACK/NACK is not a crime.
* Just process them as normal. */
ts_fsm_wait_pdch_act(fi, event, data);
return;
{
struct gsm_bts_trx_ts *ts = ts_fi_ts(fi);
struct gsm_bts *bts = ts->trx->bts;
/* Late PDCH activation ACK/NACK is not a crime.
* Just process them as normal. */
rate_ctr_inc(&bts->bts_ctrs->ctr[BTS_CTR_TS_BORKEN_EV_PDCH_ACT_ACK_NACK]);
osmo_stat_item_dec(bts->bts_statg->items[BTS_STAT_TS_BORKEN], 1);
ts_fsm_wait_pdch_act(fi, event, data);
return;
}
case TS_EV_PDCH_DEACT_ACK:
case TS_EV_PDCH_DEACT_NACK:
/* Late PDCH deactivation ACK/NACK is also not a crime.
* Just process them as normal. */
ts_fsm_wait_pdch_deact(fi, event, data);
return;
{
struct gsm_bts_trx_ts *ts = ts_fi_ts(fi);
struct gsm_bts *bts = ts->trx->bts;
/* Late PDCH deactivation ACK/NACK is also not a crime.
* Just process them as normal. */
rate_ctr_inc(&bts->bts_ctrs->ctr[BTS_CTR_TS_BORKEN_EV_PDCH_DEACT_ACK_NACK]);
osmo_stat_item_dec(bts->bts_statg->items[BTS_STAT_TS_BORKEN], 1);
ts_fsm_wait_pdch_deact(fi, event, data);
return;
}
default:
OSMO_ASSERT(false);
@ -733,6 +778,15 @@ static void ts_fsm_allstate(struct osmo_fsm_inst *fi, uint32_t event, void *data
}
}
static void ts_fsm_cleanup(struct osmo_fsm_inst *fi, enum osmo_fsm_term_cause cause)
{
struct gsm_bts_trx_ts *ts = ts_fi_ts(fi);
if (ts->fi->state == TS_ST_BORKEN) {
rate_ctr_inc(&ts->trx->bts->bts_ctrs->ctr[BTS_CTR_TS_BORKEN_EV_TEARDOWN]);
osmo_stat_item_dec(ts->trx->bts->bts_statg->items[BTS_STAT_TS_BORKEN], 1);
}
}
#define S(x) (1 << (x))
static const struct osmo_fsm_state ts_fsm_states[] = {
@ -830,6 +884,7 @@ static const struct osmo_fsm_state ts_fsm_states[] = {
},
[TS_ST_BORKEN] = {
.name = "BORKEN",
.onenter = ts_fsm_borken_onenter,
.action = ts_fsm_borken,
.in_event_mask = 0
| S(TS_EV_LCHAN_REQUESTED)
@ -875,6 +930,7 @@ static struct osmo_fsm ts_fsm = {
| S(TS_EV_RSL_DOWN)
,
.allstate_action = ts_fsm_allstate,
.cleanup = ts_fsm_cleanup,
};
/* Return true if any lchans are waiting for this timeslot to become a specific PCHAN. If target_pchan is