Don't route messages to an SGSN if it is down

If an SGSN in a pool is down we expect the messages to instead be sent
to a different SGSN in the pool. That SGSN will not necessarily know
what to do with those messages, but it should (implicitly) detach that
UE so that it can reattach at the new SGSN. Otherwise UEs on a failed
SGSN would simply stop working as the messages would never be forwarded
anywhere.

Fixes: OS#4952
Change-Id: I3f794659866e1f31496a39ca631b3b042a60aa27
This commit is contained in:
Daniel Willmann 2021-05-27 18:13:36 +02:00
parent a42963421f
commit 37518b3409
3 changed files with 34 additions and 11 deletions

View File

@ -168,6 +168,7 @@ struct gbproxy_nse {
/* Are we facing towards a SGSN (true) or BSS (false) */
bool sgsn_facing;
bool alive;
/* List of all BVCs in this NSE */
DECLARE_HASHTABLE(bvcs, 10);
@ -306,3 +307,4 @@ struct gbproxy_sgsn *gbproxy_sgsn_by_nsei_or_new(struct gbproxy_config *cfg, uin
struct gbproxy_sgsn *gbproxy_sgsn_by_nri(struct gbproxy_config *cfg, uint16_t nri, bool *null_nri);
struct gbproxy_sgsn *gbproxy_sgsn_by_tlli(struct gbproxy_config *cfg, struct gbproxy_sgsn *sgsn_avoid,
uint32_t tlli);
struct gbproxy_sgsn *gbproxy_sgsn_by_available(struct gbproxy_config *cfg);

View File

@ -265,22 +265,24 @@ static struct gbproxy_sgsn *gbproxy_select_sgsn(struct gbproxy_config *cfg, cons
bool null_nri = false;
if (!tlli) {
sgsn = llist_first_entry(&cfg->sgsns, struct gbproxy_sgsn, list);
sgsn = gbproxy_sgsn_by_available(cfg);
if (!sgsn) {
LOGP(DGPRS, LOGL_ERROR, "Could not find any available SGSN\n");
return NULL;
}
LOGPSGSN(sgsn, LOGL_INFO, "Could not get TLLI, using first SGSN\n");
LOGPSGSN(sgsn, LOGL_INFO, "Could not get TLLI, using first available SGSN\n");
return sgsn;
}
if (cfg->pool.nri_bitlen == 0) {
/* Pooling is disabled */
sgsn = llist_first_entry(&cfg->sgsns, struct gbproxy_sgsn, list);
sgsn = gbproxy_sgsn_by_available(cfg);
if (!sgsn) {
LOGP(DGPRS, LOGL_ERROR, "Could not find any available SGSN\n");
return NULL;
}
LOGPSGSN(sgsn, LOGL_INFO, "Pooling disabled, using first configured SGSN\n");
LOGPSGSN(sgsn, LOGL_INFO, "Pooling disabled, using first available SGSN\n");
} else {
/* Pooling is enabled, try to use the NRI for routing to an SGSN
* See 3GPP TS 23.236 Ch. 5.3.2 */
@ -1527,9 +1529,6 @@ int gbprox_rcvmsg(void *ctx, struct msgb *msg)
void gprs_ns_prim_status_cb(struct gbproxy_config *cfg, struct osmo_gprs_ns2_prim *nsp)
{
/* TODO: bss nsei available/unavailable bssgp_tx_simple_bvci(BSSGP_PDUT_BVC_BLOCK, nsvc->nsei, bvc->bvci, 0);
*/
int i;
struct gbproxy_bvc *bvc;
struct gbproxy_nse *nse;
@ -1543,6 +1542,7 @@ void gprs_ns_prim_status_cb(struct gbproxy_config *cfg, struct osmo_gprs_ns2_pri
LOGP(DGPRS, LOGL_NOTICE, "NS-NSE %d became available\n", nsp->nsei);
nse = gbproxy_nse_by_nsei(cfg, nsp->nsei, NSE_F_SGSN);
if (nse) {
nse->alive = true;
// Update the NSE max SDU len
nse->max_sdu_len = nsp->u.status.mtu;
@ -1560,6 +1560,8 @@ void gprs_ns_prim_status_cb(struct gbproxy_config *cfg, struct osmo_gprs_ns2_pri
LOGP(DGPRS, LOGL_ERROR, "Unknown NSE(%05d) became unavailable\n", nsp->nsei);
break;
}
nse->alive = false;
if (nse->sgsn_facing) {
struct hlist_node *ntmp;
/* SGSN */

View File

@ -625,7 +625,7 @@ void gbproxy_sgsn_free(struct gbproxy_sgsn *sgsn)
if (!sgsn)
return;
OSMO_ASSERT(sgsn->nse)
OSMO_ASSERT(sgsn->nse);
_nse_free(sgsn->nse);
_sgsn_free(sgsn);
@ -696,6 +696,9 @@ struct gbproxy_sgsn *gbproxy_sgsn_by_nri(struct gbproxy_config *cfg, uint16_t nr
OSMO_ASSERT(cfg);
llist_for_each_entry(sgsn, &cfg->sgsns, list) {
if (!sgsn->nse->alive)
continue;
if (osmo_nri_v_matches_ranges(nri, sgsn->pool.nri_ranges)) {
/* Also check if the NRI we're looking for is a NULL NRI */
if (null_nri) {
@ -711,7 +714,7 @@ struct gbproxy_sgsn *gbproxy_sgsn_by_nri(struct gbproxy_config *cfg, uint16_t nr
return NULL;
}
/*! Select a pseudo-random SGSN for a given TLLI, ignoring any SGSN that is not accepting connections
/*! Select a pseudo-random SGSN for a given TLLI, ignoring any SGSN that is not accepting connections or down
* \param[in] cfg The gbproxy configuration
* \param[in] sgsn_avoid If not NULL then avoid this SGSN when selecting a new one. Use for load redistribution
* \param[in] tlli The tlli to choose an SGSN for. The same tlli will map to the same SGSN as long as no SGSN is
@ -743,7 +746,7 @@ struct gbproxy_sgsn *gbproxy_sgsn_by_tlli(struct gbproxy_config *cfg, struct gbp
/* Get the first enabled SGSN after index */
llist_for_each_entry(sgsn, &cfg->sgsns, list) {
if (i >= index && sgsn->pool.allow_attach) {
if (i >= index && sgsn->pool.allow_attach && sgsn->nse->alive) {
return sgsn;
}
i++;
@ -753,7 +756,7 @@ struct gbproxy_sgsn *gbproxy_sgsn_by_tlli(struct gbproxy_config *cfg, struct gbp
llist_for_each_entry(sgsn, &cfg->sgsns, list) {
if (i >= index) {
break;
} else if (sgsn->pool.allow_attach) {
} else if (sgsn->pool.allow_attach && sgsn->nse->alive) {
return sgsn;
}
i++;
@ -761,3 +764,19 @@ struct gbproxy_sgsn *gbproxy_sgsn_by_tlli(struct gbproxy_config *cfg, struct gbp
return NULL;
}
/*! Return the first available gbproxy_sgsn
* \param[in] cfg proxy in which we operate
* \return The SGSN, NULL if no matching SGSN could be found
*/
struct gbproxy_sgsn *gbproxy_sgsn_by_available(struct gbproxy_config *cfg)
{
struct gbproxy_sgsn *sgsn;
OSMO_ASSERT(cfg);
llist_for_each_entry(sgsn, &cfg->sgsns, list)
if (sgsn->nse->alive &&sgsn->pool.allow_attach)
return sgsn;
return NULL;
}