osmo-hnbgw/src/osmo-hnbgw/hnbgw_cn.c

572 lines
17 KiB
C
Raw Normal View History

/* IuCS/IuPS Core Network interface of HNB-GW */
/* (C) 2015 by Harald Welte <laforge@gnumonks.org>
* All Rights Reserved
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
#include "config.h"
#include <arpa/inet.h>
#include <errno.h>
#include <osmocom/core/msgb.h>
#include <osmocom/core/utils.h>
#include <osmocom/core/timer.h>
#include <osmocom/sigtran/protocol/m3ua.h>
#include <osmocom/sigtran/sccp_sap.h>
#include <osmocom/sigtran/sccp_helpers.h>
#if ENABLE_PFCP
#include <osmocom/pfcp/pfcp_cp_peer.h>
#endif
#include <osmocom/hnbgw/hnbgw.h>
#include <osmocom/hnbgw/hnbgw_rua.h>
#include <osmocom/ranap/ranap_ies_defs.h>
#include <osmocom/ranap/ranap_msg_factory.h>
#include <osmocom/hnbgw/context_map.h>
#include <osmocom/hnbgw/mgw_fsm.h>
#include <osmocom/hnbgw/ps_rab_ass_fsm.h>
#include <osmocom/ranap/RANAP_ProcedureCode.h>
#include <osmocom/ranap/ranap_common.h>
#include <osmocom/ranap/ranap_common_ran.h>
#include <osmocom/ranap/RANAP_RANAP-PDU.h>
/***********************************************************************
* Outbound RANAP RESET to CN
***********************************************************************/
void hnbgw_cnlink_change_state(struct hnbgw_cnlink *cnlink, enum hnbgw_cnlink_state state);
static int transmit_rst(struct hnb_gw *gw, RANAP_CN_DomainIndicator_t domain,
struct osmo_sccp_addr *remote_addr)
{
struct msgb *msg;
RANAP_Cause_t cause = {
.present = RANAP_Cause_PR_transmissionNetwork,
.choice. transmissionNetwork = RANAP_CauseTransmissionNetwork_signalling_transport_resource_failure,
};
LOGP(DRANAP, LOGL_NOTICE, "Tx RESET to %s %s\n",
domain == RANAP_CN_DomainIndicator_cs_domain ? "IuCS" : "IuPS",
osmo_sccp_inst_addr_name(gw->sccp.cnlink->sccp, remote_addr));
msg = ranap_new_msg_reset(domain, &cause);
return osmo_sccp_tx_unitdata_msg(gw->sccp.cnlink->sccp_user,
&gw->sccp.local_addr,
remote_addr,
msg);
}
static int transmit_reset_ack(struct hnb_gw *gw, RANAP_CN_DomainIndicator_t domain,
const struct osmo_sccp_addr *remote_addr)
{
struct msgb *msg;
LOGP(DRANAP, LOGL_NOTICE, "Tx RESET ACK to %s %s\n",
domain == RANAP_CN_DomainIndicator_cs_domain ? "IuCS" : "IuPS",
osmo_sccp_inst_addr_name(gw->sccp.cnlink->sccp, remote_addr));
msg = ranap_new_msg_reset_ack(domain, NULL);
return osmo_sccp_tx_unitdata_msg(gw->sccp.cnlink->sccp_user,
&gw->sccp.local_addr,
remote_addr,
msg);
}
/* Timer callback once T_RafC expires */
static void cnlink_trafc_cb(void *data)
{
struct hnb_gw *gw = data;
transmit_rst(gw, RANAP_CN_DomainIndicator_cs_domain, &gw->sccp.iucs_remote_addr);
transmit_rst(gw, RANAP_CN_DomainIndicator_ps_domain, &gw->sccp.iups_remote_addr);
hnbgw_cnlink_change_state(gw->sccp.cnlink, CNLINK_S_EST_RST_TX_WAIT_ACK);
/* The spec states that we should abandon after a configurable
* number of times. We decide to simply continue trying */
}
/* change the state of a CN Link */
void hnbgw_cnlink_change_state(struct hnbgw_cnlink *cnlink, enum hnbgw_cnlink_state state)
{
switch (state) {
case CNLINK_S_NULL:
case CNLINK_S_EST_PEND:
break;
case CNLINK_S_EST_CONF:
cnlink_trafc_cb(cnlink->gw);
break;
case CNLINK_S_EST_RST_TX_WAIT_ACK:
osmo_timer_schedule(&cnlink->T_RafC, 5, 0);
break;
case CNLINK_S_EST_ACTIVE:
osmo_timer_del(&cnlink->T_RafC);
break;
}
}
/***********************************************************************
* Incoming primitives from SCCP User SAP
***********************************************************************/
static int cn_ranap_rx_reset_cmd(struct hnbgw_cnlink *cnlink,
const struct osmo_scu_unitdata_param *unitdata,
RANAP_InitiatingMessage_t *imsg)
{
RANAP_CN_DomainIndicator_t domain;
RANAP_ResetIEs_t ies;
int rc;
rc = ranap_decode_reseties(&ies, &imsg->value);
domain = ies.cN_DomainIndicator;
ranap_free_reseties(&ies);
LOGP(DRANAP, LOGL_NOTICE, "Rx RESET from %s %s, returning ACK\n",
domain == RANAP_CN_DomainIndicator_cs_domain ? "IuCS" : "IuPS",
osmo_sccp_inst_addr_name(cnlink->sccp, &unitdata->calling_addr));
/* FIXME: actually reset connections, if any */
if (transmit_reset_ack(cnlink->gw, domain, &unitdata->calling_addr))
LOGP(DRANAP, LOGL_ERROR, "Error: cannot send RESET ACK to %s %s\n",
domain == RANAP_CN_DomainIndicator_cs_domain ? "IuCS" : "IuPS",
osmo_sccp_inst_addr_name(cnlink->sccp, &unitdata->calling_addr));
return rc;
}
static int cn_ranap_rx_reset_ack(struct hnbgw_cnlink *cnlink,
RANAP_SuccessfulOutcome_t *omsg)
{
RANAP_ResetAcknowledgeIEs_t ies;
int rc;
rc = ranap_decode_resetacknowledgeies(&ies, &omsg->value);
hnbgw_cnlink_change_state(cnlink, CNLINK_S_EST_ACTIVE);
ranap_free_resetacknowledgeies(&ies);
return rc;
}
static int cn_ranap_rx_paging_cmd(struct hnbgw_cnlink *cnlink,
RANAP_InitiatingMessage_t *imsg,
const uint8_t *data, unsigned int len)
{
struct hnb_gw *gw = cnlink->gw;
struct hnb_context *hnb;
RANAP_PagingIEs_t ies;
int rc;
rc = ranap_decode_pagingies(&ies, &imsg->value);
if (rc < 0)
return rc;
/* FIXME: determine which HNBs to send this Paging command,
* rather than broadcasting to all HNBs */
llist_for_each_entry(hnb, &gw->hnb_list, list) {
rc = rua_tx_udt(hnb, data, len);
}
ranap_free_pagingies(&ies);
return 0;
}
static int cn_ranap_rx_initiating_msg(struct hnbgw_cnlink *cnlink,
const struct osmo_scu_unitdata_param *unitdata,
RANAP_InitiatingMessage_t *imsg,
const uint8_t *data, unsigned int len)
{
switch (imsg->procedureCode) {
case RANAP_ProcedureCode_id_Reset:
return cn_ranap_rx_reset_cmd(cnlink, unitdata, imsg);
case RANAP_ProcedureCode_id_Paging:
return cn_ranap_rx_paging_cmd(cnlink, imsg, data, len);
case RANAP_ProcedureCode_id_OverloadControl: /* Overload ind */
break;
case RANAP_ProcedureCode_id_ErrorIndication: /* Error ind */
break;
case RANAP_ProcedureCode_id_ResetResource: /* request */
case RANAP_ProcedureCode_id_InformationTransfer:
case RANAP_ProcedureCode_id_DirectInformationTransfer:
case RANAP_ProcedureCode_id_UplinkInformationExchange:
LOGP(DRANAP, LOGL_NOTICE, "Received unsupported RANAP "
"Procedure %ld from CN, ignoring\n", imsg->procedureCode);
break;
default:
LOGP(DRANAP, LOGL_NOTICE, "Received suspicious RANAP "
"Procedure %ld from CN, ignoring\n", imsg->procedureCode);
break;
}
return 0;
}
static int cn_ranap_rx_successful_msg(struct hnbgw_cnlink *cnlink,
RANAP_SuccessfulOutcome_t *omsg)
{
switch (omsg->procedureCode) {
case RANAP_ProcedureCode_id_Reset: /* Reset acknowledge */
return cn_ranap_rx_reset_ack(cnlink, omsg);
case RANAP_ProcedureCode_id_ResetResource: /* response */
case RANAP_ProcedureCode_id_InformationTransfer:
case RANAP_ProcedureCode_id_DirectInformationTransfer:
case RANAP_ProcedureCode_id_UplinkInformationExchange:
LOGP(DRANAP, LOGL_NOTICE, "Received unsupported RANAP "
"Procedure %ld from CN, ignoring\n", omsg->procedureCode);
break;
default:
LOGP(DRANAP, LOGL_NOTICE, "Received suspicious RANAP "
"Procedure %ld from CN, ignoring\n", omsg->procedureCode);
break;
}
return 0;
}
static int _cn_ranap_rx(struct hnbgw_cnlink *cnlink,
const struct osmo_scu_unitdata_param *unitdata,
RANAP_RANAP_PDU_t *pdu, const uint8_t *data, unsigned int len)
{
int rc;
switch (pdu->present) {
case RANAP_RANAP_PDU_PR_initiatingMessage:
rc = cn_ranap_rx_initiating_msg(cnlink, unitdata, &pdu->choice.initiatingMessage,
data, len);
break;
case RANAP_RANAP_PDU_PR_successfulOutcome:
rc = cn_ranap_rx_successful_msg(cnlink, &pdu->choice.successfulOutcome);
break;
case RANAP_RANAP_PDU_PR_unsuccessfulOutcome:
LOGP(DRANAP, LOGL_NOTICE, "Received unsupported RANAP "
"unsuccessful outcome procedure %ld from CN, ignoring\n",
pdu->choice.unsuccessfulOutcome.procedureCode);
rc = -ENOTSUP;
break;
default:
LOGP(DRANAP, LOGL_NOTICE, "Received suspicious RANAP "
"presence %u from CN, ignoring\n", pdu->present);
rc = -EINVAL;
break;
}
return rc;
}
static int handle_cn_ranap(struct hnbgw_cnlink *cnlink, const struct osmo_scu_unitdata_param *unitdata,
const uint8_t *data, unsigned int len)
{
RANAP_RANAP_PDU_t _pdu, *pdu = &_pdu;
asn_dec_rval_t dec_ret;
int rc;
memset(pdu, 0, sizeof(*pdu));
dec_ret = aper_decode(NULL, &asn_DEF_RANAP_RANAP_PDU, (void **) &pdu,
data, len, 0, 0);
if (dec_ret.code != RC_OK) {
LOGP(DRANAP, LOGL_ERROR, "Error in RANAP ASN.1 decode\n");
return -1;
}
rc = _cn_ranap_rx(cnlink, unitdata, pdu, data, len);
return rc;
}
static bool pc_and_ssn_match(const struct osmo_sccp_addr *a, const struct osmo_sccp_addr *b)
{
return (a == b)
|| ((a->pc == b->pc)
&& (a->ssn == b->ssn));
}
static int classify_cn_remote_addr(const struct hnb_gw *gw,
const struct osmo_sccp_addr *cn_remote_addr,
bool *is_ps)
{
if (pc_and_ssn_match(cn_remote_addr, &gw->sccp.iucs_remote_addr)) {
if (is_ps)
*is_ps = false;
return 0;
}
if (pc_and_ssn_match(cn_remote_addr, &gw->sccp.iups_remote_addr)) {
if (is_ps)
*is_ps = true;
return 0;
}
LOGP(DMAIN, LOGL_ERROR, "Unexpected remote address, matches neither CS nor PS address: %s\n",
osmo_sccp_addr_dump(cn_remote_addr));
return -1;
}
static int handle_cn_unitdata(struct hnbgw_cnlink *cnlink,
const struct osmo_scu_unitdata_param *param,
struct osmo_prim_hdr *oph)
{
if (param->called_addr.ssn != OSMO_SCCP_SSN_RANAP) {
LOGP(DMAIN, LOGL_NOTICE, "N-UNITDATA.ind for unknown SSN %u\n",
param->called_addr.ssn);
return -1;
}
if (classify_cn_remote_addr(cnlink->gw, &param->calling_addr, NULL) < 0)
return -1;
return handle_cn_ranap(cnlink, param, msgb_l2(oph->msg), msgb_l2len(oph->msg));
}
static int handle_cn_conn_conf(struct hnbgw_cnlink *cnlink,
const struct osmo_scu_connect_param *param,
struct osmo_prim_hdr *oph)
{
struct osmo_ss7_instance *ss7 = osmo_sccp_get_ss7(cnlink->gw->sccp.client);
struct hnbgw_context_map *map;
LOGP(DMAIN, LOGL_DEBUG, "handle_cn_conn_conf() conn_id=%d, addrs: called=%s calling=%s responding=%s\n",
param->conn_id,
osmo_sccp_addr_to_str_c(OTC_SELECT, ss7, &param->called_addr),
osmo_sccp_addr_to_str_c(OTC_SELECT, ss7, &param->calling_addr),
osmo_sccp_addr_to_str_c(OTC_SELECT, ss7, &param->responding_addr));
map = context_map_by_cn(cnlink, param->conn_id);
context map: introduce RUA and SCCP FSMs to fix leaks Refactor the entire RUA <-> SCCP connection-oriented message forwarding: - conquer confusion about hnbgw_context_map release behavior, and - eradicate SCCP connection leaks. Finer points: == Context map state == So far, we had a single context map state and some flags to keep track of both the RUA and the SCCP connections. It was easy to miss connection cleanup steps, especially on the SCCP side. Instead, the two FSMs clearly define the RUA and SCCP conn states separately, and each side takes care of its own release needs for all possible scenarios. - When both RUA and SCCP are released, the context map is discarded. - A context map can stay around to wait for proper SCCP release, even if the RUA side has lost the HNB connection. - Completely drop the async "context mapper garbage collection", because the FSMs clarify the release and free steps, synchronously. - We still keep a (simplified) enum for global context map state, but this is only used so that VTY reporting remains mostly unchanged. == Context map cleanup confusion == The function context_map_hnb_released() was the general cleanup function for a context map. Instead, add separate context_map_free(). == Free context maps separately from HNB == When a HNB releases, talloc_steal() the context maps out of the HNB specific hnb_ctx, so that they are not freed along with the HNB state, possibly leaving SCCP connections afloat. (It is still nice to normally keep context maps as talloc children of their respective hnb_ctx, so talloc reports show which belongs to which.) So far, context map handling found the global hnb_gw pointer via map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time. Instead, use a separate hnb_gw pointer in map->gw. == RUA procedure codes vs. SCCP prims == So far, the RUA rx side composed SCCP prims to pass on: RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx That is a source of confusion: a RUA procedure code should not translate 1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts below). Instead, move SCCP prim composition over to the SCCP side, using FSM events to forward: RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx +RANAP +RANAP +RANAP RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx +RANAP +RANAP +RANAP Hence choose the correct prim according to the SCCP FSM state. - in hnbgw_rua.c, use RUA procedure codes, not prim types. - via the new FSM events' data args, pass msgb containing RANAP PDUs. == Fix SCCP Release behavior == So far, the normal conn release behavior was HNB HNBGW CN | --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete | | <--SCCP-RLC-------- | (no data) Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2 'Connection release': The MSC sends a SCCP released message. This message shall not contain any user data field. i.e.: HNB HNBGW CN | --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete | | <--SCCP-Released---- | (no data) | | ---SCCP-RLC--------> | (no data) (Side note, the final SCCP Release Confirm step is taken care of implicitly by libosmo-sigtran's sccp_scoc.c FSM.) If the CN fails to respond with SCCP-Released, on new X31 timeout, osmo-hnbgw will send an SCCP Released to the CN as fallback. == Memory model for message dispatch == So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP handling code, and the final dispatch freed it. Every error path had to take care not to leak any oph. Instead, use a much easier and much more leakage proof memory model, inspired by fixeria: - on rx, dispatch RANAP msgb that live in OTC_SELECT. - no code path needs to msgb_free() -- the msgb is discarded via OTC_SELECT when handling is done, error or no error. - any code path may also choose to store the msgb for async dispatch, using talloc_steal(). The user plane mapping via MGW and UPF do that. - if any code path does msgb_free(), that would be no problem either (but none do so now, for simplicity). == Layer separation == Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down() directly. == Memory model for decoded ranap_message IEs == Use a talloc destructor to make sure that the ranap_message IEs are always implicitly freed upon talloc_free(), so that no code path can possibly forget to do so. == Implicit cleanup by talloc == Use talloc scoping to remove a bunch of explicit cleanup code. For example, make a chached message a talloc child of its handler: talloc_steal(mgw_fsm_priv, message); mgw_fsm_priv->ranap_rab_ass_req_message = message; and later implicitly free 'message' by only freeing the handler: talloc_free(mgw_fsm_priv) Related: SYS#6297 Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
if (!map) {
/* We have no such SCCP connection. Ignore. */
return 0;
context map: introduce RUA and SCCP FSMs to fix leaks Refactor the entire RUA <-> SCCP connection-oriented message forwarding: - conquer confusion about hnbgw_context_map release behavior, and - eradicate SCCP connection leaks. Finer points: == Context map state == So far, we had a single context map state and some flags to keep track of both the RUA and the SCCP connections. It was easy to miss connection cleanup steps, especially on the SCCP side. Instead, the two FSMs clearly define the RUA and SCCP conn states separately, and each side takes care of its own release needs for all possible scenarios. - When both RUA and SCCP are released, the context map is discarded. - A context map can stay around to wait for proper SCCP release, even if the RUA side has lost the HNB connection. - Completely drop the async "context mapper garbage collection", because the FSMs clarify the release and free steps, synchronously. - We still keep a (simplified) enum for global context map state, but this is only used so that VTY reporting remains mostly unchanged. == Context map cleanup confusion == The function context_map_hnb_released() was the general cleanup function for a context map. Instead, add separate context_map_free(). == Free context maps separately from HNB == When a HNB releases, talloc_steal() the context maps out of the HNB specific hnb_ctx, so that they are not freed along with the HNB state, possibly leaving SCCP connections afloat. (It is still nice to normally keep context maps as talloc children of their respective hnb_ctx, so talloc reports show which belongs to which.) So far, context map handling found the global hnb_gw pointer via map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time. Instead, use a separate hnb_gw pointer in map->gw. == RUA procedure codes vs. SCCP prims == So far, the RUA rx side composed SCCP prims to pass on: RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx That is a source of confusion: a RUA procedure code should not translate 1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts below). Instead, move SCCP prim composition over to the SCCP side, using FSM events to forward: RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx +RANAP +RANAP +RANAP RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx +RANAP +RANAP +RANAP Hence choose the correct prim according to the SCCP FSM state. - in hnbgw_rua.c, use RUA procedure codes, not prim types. - via the new FSM events' data args, pass msgb containing RANAP PDUs. == Fix SCCP Release behavior == So far, the normal conn release behavior was HNB HNBGW CN | --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete | | <--SCCP-RLC-------- | (no data) Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2 'Connection release': The MSC sends a SCCP released message. This message shall not contain any user data field. i.e.: HNB HNBGW CN | --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete | | <--SCCP-Released---- | (no data) | | ---SCCP-RLC--------> | (no data) (Side note, the final SCCP Release Confirm step is taken care of implicitly by libosmo-sigtran's sccp_scoc.c FSM.) If the CN fails to respond with SCCP-Released, on new X31 timeout, osmo-hnbgw will send an SCCP Released to the CN as fallback. == Memory model for message dispatch == So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP handling code, and the final dispatch freed it. Every error path had to take care not to leak any oph. Instead, use a much easier and much more leakage proof memory model, inspired by fixeria: - on rx, dispatch RANAP msgb that live in OTC_SELECT. - no code path needs to msgb_free() -- the msgb is discarded via OTC_SELECT when handling is done, error or no error. - any code path may also choose to store the msgb for async dispatch, using talloc_steal(). The user plane mapping via MGW and UPF do that. - if any code path does msgb_free(), that would be no problem either (but none do so now, for simplicity). == Layer separation == Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down() directly. == Memory model for decoded ranap_message IEs == Use a talloc destructor to make sure that the ranap_message IEs are always implicitly freed upon talloc_free(), so that no code path can possibly forget to do so. == Implicit cleanup by talloc == Use talloc scoping to remove a bunch of explicit cleanup code. For example, make a chached message a talloc child of its handler: talloc_steal(mgw_fsm_priv, message); mgw_fsm_priv->ranap_rab_ass_req_message = message; and later implicitly free 'message' by only freeing the handler: talloc_free(mgw_fsm_priv) Related: SYS#6297 Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
}
context map: introduce RUA and SCCP FSMs to fix leaks Refactor the entire RUA <-> SCCP connection-oriented message forwarding: - conquer confusion about hnbgw_context_map release behavior, and - eradicate SCCP connection leaks. Finer points: == Context map state == So far, we had a single context map state and some flags to keep track of both the RUA and the SCCP connections. It was easy to miss connection cleanup steps, especially on the SCCP side. Instead, the two FSMs clearly define the RUA and SCCP conn states separately, and each side takes care of its own release needs for all possible scenarios. - When both RUA and SCCP are released, the context map is discarded. - A context map can stay around to wait for proper SCCP release, even if the RUA side has lost the HNB connection. - Completely drop the async "context mapper garbage collection", because the FSMs clarify the release and free steps, synchronously. - We still keep a (simplified) enum for global context map state, but this is only used so that VTY reporting remains mostly unchanged. == Context map cleanup confusion == The function context_map_hnb_released() was the general cleanup function for a context map. Instead, add separate context_map_free(). == Free context maps separately from HNB == When a HNB releases, talloc_steal() the context maps out of the HNB specific hnb_ctx, so that they are not freed along with the HNB state, possibly leaving SCCP connections afloat. (It is still nice to normally keep context maps as talloc children of their respective hnb_ctx, so talloc reports show which belongs to which.) So far, context map handling found the global hnb_gw pointer via map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time. Instead, use a separate hnb_gw pointer in map->gw. == RUA procedure codes vs. SCCP prims == So far, the RUA rx side composed SCCP prims to pass on: RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx That is a source of confusion: a RUA procedure code should not translate 1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts below). Instead, move SCCP prim composition over to the SCCP side, using FSM events to forward: RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx +RANAP +RANAP +RANAP RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx +RANAP +RANAP +RANAP Hence choose the correct prim according to the SCCP FSM state. - in hnbgw_rua.c, use RUA procedure codes, not prim types. - via the new FSM events' data args, pass msgb containing RANAP PDUs. == Fix SCCP Release behavior == So far, the normal conn release behavior was HNB HNBGW CN | --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete | | <--SCCP-RLC-------- | (no data) Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2 'Connection release': The MSC sends a SCCP released message. This message shall not contain any user data field. i.e.: HNB HNBGW CN | --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete | | <--SCCP-Released---- | (no data) | | ---SCCP-RLC--------> | (no data) (Side note, the final SCCP Release Confirm step is taken care of implicitly by libosmo-sigtran's sccp_scoc.c FSM.) If the CN fails to respond with SCCP-Released, on new X31 timeout, osmo-hnbgw will send an SCCP Released to the CN as fallback. == Memory model for message dispatch == So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP handling code, and the final dispatch freed it. Every error path had to take care not to leak any oph. Instead, use a much easier and much more leakage proof memory model, inspired by fixeria: - on rx, dispatch RANAP msgb that live in OTC_SELECT. - no code path needs to msgb_free() -- the msgb is discarded via OTC_SELECT when handling is done, error or no error. - any code path may also choose to store the msgb for async dispatch, using talloc_steal(). The user plane mapping via MGW and UPF do that. - if any code path does msgb_free(), that would be no problem either (but none do so now, for simplicity). == Layer separation == Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down() directly. == Memory model for decoded ranap_message IEs == Use a talloc destructor to make sure that the ranap_message IEs are always implicitly freed upon talloc_free(), so that no code path can possibly forget to do so. == Implicit cleanup by talloc == Use talloc scoping to remove a bunch of explicit cleanup code. For example, make a chached message a talloc child of its handler: talloc_steal(mgw_fsm_priv, message); mgw_fsm_priv->ranap_rab_ass_req_message = message; and later implicitly free 'message' by only freeing the handler: talloc_free(mgw_fsm_priv) Related: SYS#6297 Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
map_sccp_dispatch(map, MAP_SCCP_EV_RX_CONNECTION_CONFIRM, oph->msg);
return 0;
}
static int handle_cn_data_ind(struct hnbgw_cnlink *cnlink,
const struct osmo_scu_data_param *param,
struct osmo_prim_hdr *oph)
{
struct hnbgw_context_map *map;
map = context_map_by_cn(cnlink, param->conn_id);
if (!map) {
context map: introduce RUA and SCCP FSMs to fix leaks Refactor the entire RUA <-> SCCP connection-oriented message forwarding: - conquer confusion about hnbgw_context_map release behavior, and - eradicate SCCP connection leaks. Finer points: == Context map state == So far, we had a single context map state and some flags to keep track of both the RUA and the SCCP connections. It was easy to miss connection cleanup steps, especially on the SCCP side. Instead, the two FSMs clearly define the RUA and SCCP conn states separately, and each side takes care of its own release needs for all possible scenarios. - When both RUA and SCCP are released, the context map is discarded. - A context map can stay around to wait for proper SCCP release, even if the RUA side has lost the HNB connection. - Completely drop the async "context mapper garbage collection", because the FSMs clarify the release and free steps, synchronously. - We still keep a (simplified) enum for global context map state, but this is only used so that VTY reporting remains mostly unchanged. == Context map cleanup confusion == The function context_map_hnb_released() was the general cleanup function for a context map. Instead, add separate context_map_free(). == Free context maps separately from HNB == When a HNB releases, talloc_steal() the context maps out of the HNB specific hnb_ctx, so that they are not freed along with the HNB state, possibly leaving SCCP connections afloat. (It is still nice to normally keep context maps as talloc children of their respective hnb_ctx, so talloc reports show which belongs to which.) So far, context map handling found the global hnb_gw pointer via map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time. Instead, use a separate hnb_gw pointer in map->gw. == RUA procedure codes vs. SCCP prims == So far, the RUA rx side composed SCCP prims to pass on: RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx That is a source of confusion: a RUA procedure code should not translate 1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts below). Instead, move SCCP prim composition over to the SCCP side, using FSM events to forward: RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx +RANAP +RANAP +RANAP RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx +RANAP +RANAP +RANAP Hence choose the correct prim according to the SCCP FSM state. - in hnbgw_rua.c, use RUA procedure codes, not prim types. - via the new FSM events' data args, pass msgb containing RANAP PDUs. == Fix SCCP Release behavior == So far, the normal conn release behavior was HNB HNBGW CN | --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete | | <--SCCP-RLC-------- | (no data) Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2 'Connection release': The MSC sends a SCCP released message. This message shall not contain any user data field. i.e.: HNB HNBGW CN | --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete | | <--SCCP-Released---- | (no data) | | ---SCCP-RLC--------> | (no data) (Side note, the final SCCP Release Confirm step is taken care of implicitly by libosmo-sigtran's sccp_scoc.c FSM.) If the CN fails to respond with SCCP-Released, on new X31 timeout, osmo-hnbgw will send an SCCP Released to the CN as fallback. == Memory model for message dispatch == So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP handling code, and the final dispatch freed it. Every error path had to take care not to leak any oph. Instead, use a much easier and much more leakage proof memory model, inspired by fixeria: - on rx, dispatch RANAP msgb that live in OTC_SELECT. - no code path needs to msgb_free() -- the msgb is discarded via OTC_SELECT when handling is done, error or no error. - any code path may also choose to store the msgb for async dispatch, using talloc_steal(). The user plane mapping via MGW and UPF do that. - if any code path does msgb_free(), that would be no problem either (but none do so now, for simplicity). == Layer separation == Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down() directly. == Memory model for decoded ranap_message IEs == Use a talloc destructor to make sure that the ranap_message IEs are always implicitly freed upon talloc_free(), so that no code path can possibly forget to do so. == Implicit cleanup by talloc == Use talloc scoping to remove a bunch of explicit cleanup code. For example, make a chached message a talloc child of its handler: talloc_steal(mgw_fsm_priv, message); mgw_fsm_priv->ranap_rab_ass_req_message = message; and later implicitly free 'message' by only freeing the handler: talloc_free(mgw_fsm_priv) Related: SYS#6297 Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
/* We have no such SCCP connection. Ignore. */
return 0;
}
context map: introduce RUA and SCCP FSMs to fix leaks Refactor the entire RUA <-> SCCP connection-oriented message forwarding: - conquer confusion about hnbgw_context_map release behavior, and - eradicate SCCP connection leaks. Finer points: == Context map state == So far, we had a single context map state and some flags to keep track of both the RUA and the SCCP connections. It was easy to miss connection cleanup steps, especially on the SCCP side. Instead, the two FSMs clearly define the RUA and SCCP conn states separately, and each side takes care of its own release needs for all possible scenarios. - When both RUA and SCCP are released, the context map is discarded. - A context map can stay around to wait for proper SCCP release, even if the RUA side has lost the HNB connection. - Completely drop the async "context mapper garbage collection", because the FSMs clarify the release and free steps, synchronously. - We still keep a (simplified) enum for global context map state, but this is only used so that VTY reporting remains mostly unchanged. == Context map cleanup confusion == The function context_map_hnb_released() was the general cleanup function for a context map. Instead, add separate context_map_free(). == Free context maps separately from HNB == When a HNB releases, talloc_steal() the context maps out of the HNB specific hnb_ctx, so that they are not freed along with the HNB state, possibly leaving SCCP connections afloat. (It is still nice to normally keep context maps as talloc children of their respective hnb_ctx, so talloc reports show which belongs to which.) So far, context map handling found the global hnb_gw pointer via map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time. Instead, use a separate hnb_gw pointer in map->gw. == RUA procedure codes vs. SCCP prims == So far, the RUA rx side composed SCCP prims to pass on: RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx That is a source of confusion: a RUA procedure code should not translate 1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts below). Instead, move SCCP prim composition over to the SCCP side, using FSM events to forward: RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx +RANAP +RANAP +RANAP RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx +RANAP +RANAP +RANAP Hence choose the correct prim according to the SCCP FSM state. - in hnbgw_rua.c, use RUA procedure codes, not prim types. - via the new FSM events' data args, pass msgb containing RANAP PDUs. == Fix SCCP Release behavior == So far, the normal conn release behavior was HNB HNBGW CN | --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete | | <--SCCP-RLC-------- | (no data) Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2 'Connection release': The MSC sends a SCCP released message. This message shall not contain any user data field. i.e.: HNB HNBGW CN | --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete | | <--SCCP-Released---- | (no data) | | ---SCCP-RLC--------> | (no data) (Side note, the final SCCP Release Confirm step is taken care of implicitly by libosmo-sigtran's sccp_scoc.c FSM.) If the CN fails to respond with SCCP-Released, on new X31 timeout, osmo-hnbgw will send an SCCP Released to the CN as fallback. == Memory model for message dispatch == So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP handling code, and the final dispatch freed it. Every error path had to take care not to leak any oph. Instead, use a much easier and much more leakage proof memory model, inspired by fixeria: - on rx, dispatch RANAP msgb that live in OTC_SELECT. - no code path needs to msgb_free() -- the msgb is discarded via OTC_SELECT when handling is done, error or no error. - any code path may also choose to store the msgb for async dispatch, using talloc_steal(). The user plane mapping via MGW and UPF do that. - if any code path does msgb_free(), that would be no problem either (but none do so now, for simplicity). == Layer separation == Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down() directly. == Memory model for decoded ranap_message IEs == Use a talloc destructor to make sure that the ranap_message IEs are always implicitly freed upon talloc_free(), so that no code path can possibly forget to do so. == Implicit cleanup by talloc == Use talloc scoping to remove a bunch of explicit cleanup code. For example, make a chached message a talloc child of its handler: talloc_steal(mgw_fsm_priv, message); mgw_fsm_priv->ranap_rab_ass_req_message = message; and later implicitly free 'message' by only freeing the handler: talloc_free(mgw_fsm_priv) Related: SYS#6297 Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
return map_sccp_dispatch(map, MAP_SCCP_EV_RX_DATA_INDICATION, oph->msg);
}
static int handle_cn_disc_ind(struct hnbgw_cnlink *cnlink,
const struct osmo_scu_disconn_param *param,
struct osmo_prim_hdr *oph)
{
struct hnbgw_context_map *map;
LOGP(DMAIN, LOGL_DEBUG, "handle_cn_disc_ind() conn_id=%d originator=%d\n",
param->conn_id, param->originator);
LOGP(DMAIN, LOGL_DEBUG, "handle_cn_disc_ind() responding_addr=%s\n",
inet_ntoa(param->responding_addr.ip.v4));
map = context_map_by_cn(cnlink, param->conn_id);
if (!map) {
context map: introduce RUA and SCCP FSMs to fix leaks Refactor the entire RUA <-> SCCP connection-oriented message forwarding: - conquer confusion about hnbgw_context_map release behavior, and - eradicate SCCP connection leaks. Finer points: == Context map state == So far, we had a single context map state and some flags to keep track of both the RUA and the SCCP connections. It was easy to miss connection cleanup steps, especially on the SCCP side. Instead, the two FSMs clearly define the RUA and SCCP conn states separately, and each side takes care of its own release needs for all possible scenarios. - When both RUA and SCCP are released, the context map is discarded. - A context map can stay around to wait for proper SCCP release, even if the RUA side has lost the HNB connection. - Completely drop the async "context mapper garbage collection", because the FSMs clarify the release and free steps, synchronously. - We still keep a (simplified) enum for global context map state, but this is only used so that VTY reporting remains mostly unchanged. == Context map cleanup confusion == The function context_map_hnb_released() was the general cleanup function for a context map. Instead, add separate context_map_free(). == Free context maps separately from HNB == When a HNB releases, talloc_steal() the context maps out of the HNB specific hnb_ctx, so that they are not freed along with the HNB state, possibly leaving SCCP connections afloat. (It is still nice to normally keep context maps as talloc children of their respective hnb_ctx, so talloc reports show which belongs to which.) So far, context map handling found the global hnb_gw pointer via map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time. Instead, use a separate hnb_gw pointer in map->gw. == RUA procedure codes vs. SCCP prims == So far, the RUA rx side composed SCCP prims to pass on: RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx That is a source of confusion: a RUA procedure code should not translate 1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts below). Instead, move SCCP prim composition over to the SCCP side, using FSM events to forward: RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx +RANAP +RANAP +RANAP RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx +RANAP +RANAP +RANAP Hence choose the correct prim according to the SCCP FSM state. - in hnbgw_rua.c, use RUA procedure codes, not prim types. - via the new FSM events' data args, pass msgb containing RANAP PDUs. == Fix SCCP Release behavior == So far, the normal conn release behavior was HNB HNBGW CN | --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete | | <--SCCP-RLC-------- | (no data) Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2 'Connection release': The MSC sends a SCCP released message. This message shall not contain any user data field. i.e.: HNB HNBGW CN | --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete | | <--SCCP-Released---- | (no data) | | ---SCCP-RLC--------> | (no data) (Side note, the final SCCP Release Confirm step is taken care of implicitly by libosmo-sigtran's sccp_scoc.c FSM.) If the CN fails to respond with SCCP-Released, on new X31 timeout, osmo-hnbgw will send an SCCP Released to the CN as fallback. == Memory model for message dispatch == So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP handling code, and the final dispatch freed it. Every error path had to take care not to leak any oph. Instead, use a much easier and much more leakage proof memory model, inspired by fixeria: - on rx, dispatch RANAP msgb that live in OTC_SELECT. - no code path needs to msgb_free() -- the msgb is discarded via OTC_SELECT when handling is done, error or no error. - any code path may also choose to store the msgb for async dispatch, using talloc_steal(). The user plane mapping via MGW and UPF do that. - if any code path does msgb_free(), that would be no problem either (but none do so now, for simplicity). == Layer separation == Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down() directly. == Memory model for decoded ranap_message IEs == Use a talloc destructor to make sure that the ranap_message IEs are always implicitly freed upon talloc_free(), so that no code path can possibly forget to do so. == Implicit cleanup by talloc == Use talloc scoping to remove a bunch of explicit cleanup code. For example, make a chached message a talloc child of its handler: talloc_steal(mgw_fsm_priv, message); mgw_fsm_priv->ranap_rab_ass_req_message = message; and later implicitly free 'message' by only freeing the handler: talloc_free(mgw_fsm_priv) Related: SYS#6297 Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
/* We have no connection. Ignore. */
return 0;
}
context map: introduce RUA and SCCP FSMs to fix leaks Refactor the entire RUA <-> SCCP connection-oriented message forwarding: - conquer confusion about hnbgw_context_map release behavior, and - eradicate SCCP connection leaks. Finer points: == Context map state == So far, we had a single context map state and some flags to keep track of both the RUA and the SCCP connections. It was easy to miss connection cleanup steps, especially on the SCCP side. Instead, the two FSMs clearly define the RUA and SCCP conn states separately, and each side takes care of its own release needs for all possible scenarios. - When both RUA and SCCP are released, the context map is discarded. - A context map can stay around to wait for proper SCCP release, even if the RUA side has lost the HNB connection. - Completely drop the async "context mapper garbage collection", because the FSMs clarify the release and free steps, synchronously. - We still keep a (simplified) enum for global context map state, but this is only used so that VTY reporting remains mostly unchanged. == Context map cleanup confusion == The function context_map_hnb_released() was the general cleanup function for a context map. Instead, add separate context_map_free(). == Free context maps separately from HNB == When a HNB releases, talloc_steal() the context maps out of the HNB specific hnb_ctx, so that they are not freed along with the HNB state, possibly leaving SCCP connections afloat. (It is still nice to normally keep context maps as talloc children of their respective hnb_ctx, so talloc reports show which belongs to which.) So far, context map handling found the global hnb_gw pointer via map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time. Instead, use a separate hnb_gw pointer in map->gw. == RUA procedure codes vs. SCCP prims == So far, the RUA rx side composed SCCP prims to pass on: RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx That is a source of confusion: a RUA procedure code should not translate 1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts below). Instead, move SCCP prim composition over to the SCCP side, using FSM events to forward: RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx +RANAP +RANAP +RANAP RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx +RANAP +RANAP +RANAP Hence choose the correct prim according to the SCCP FSM state. - in hnbgw_rua.c, use RUA procedure codes, not prim types. - via the new FSM events' data args, pass msgb containing RANAP PDUs. == Fix SCCP Release behavior == So far, the normal conn release behavior was HNB HNBGW CN | --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete | | <--SCCP-RLC-------- | (no data) Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2 'Connection release': The MSC sends a SCCP released message. This message shall not contain any user data field. i.e.: HNB HNBGW CN | --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete | | <--SCCP-Released---- | (no data) | | ---SCCP-RLC--------> | (no data) (Side note, the final SCCP Release Confirm step is taken care of implicitly by libosmo-sigtran's sccp_scoc.c FSM.) If the CN fails to respond with SCCP-Released, on new X31 timeout, osmo-hnbgw will send an SCCP Released to the CN as fallback. == Memory model for message dispatch == So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP handling code, and the final dispatch freed it. Every error path had to take care not to leak any oph. Instead, use a much easier and much more leakage proof memory model, inspired by fixeria: - on rx, dispatch RANAP msgb that live in OTC_SELECT. - no code path needs to msgb_free() -- the msgb is discarded via OTC_SELECT when handling is done, error or no error. - any code path may also choose to store the msgb for async dispatch, using talloc_steal(). The user plane mapping via MGW and UPF do that. - if any code path does msgb_free(), that would be no problem either (but none do so now, for simplicity). == Layer separation == Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down() directly. == Memory model for decoded ranap_message IEs == Use a talloc destructor to make sure that the ranap_message IEs are always implicitly freed upon talloc_free(), so that no code path can possibly forget to do so. == Implicit cleanup by talloc == Use talloc scoping to remove a bunch of explicit cleanup code. For example, make a chached message a talloc child of its handler: talloc_steal(mgw_fsm_priv, message); mgw_fsm_priv->ranap_rab_ass_req_message = message; and later implicitly free 'message' by only freeing the handler: talloc_free(mgw_fsm_priv) Related: SYS#6297 Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
return map_sccp_dispatch(map, MAP_SCCP_EV_RX_RELEASED, oph->msg);
}
/* Entry point for primitives coming up from SCCP User SAP */
static int sccp_sap_up(struct osmo_prim_hdr *oph, void *ctx)
{
struct osmo_sccp_user *scu = ctx;
struct hnbgw_cnlink *cnlink;
struct osmo_scu_prim *prim = (struct osmo_scu_prim *) oph;
int rc = 0;
LOGP(DMAIN, LOGL_DEBUG, "sccp_sap_up(%s)\n", osmo_scu_prim_name(oph));
if (!scu) {
LOGP(DMAIN, LOGL_ERROR,
"sccp_sap_up(): NULL osmo_sccp_user, cannot send prim (sap %u prim %u op %d)\n",
oph->sap, oph->primitive, oph->operation);
return -1;
}
cnlink = osmo_sccp_user_get_priv(scu);
if (!cnlink) {
LOGP(DMAIN, LOGL_ERROR,
"sccp_sap_up(): NULL hnbgw_cnlink, cannot send prim (sap %u prim %u op %d)\n",
oph->sap, oph->primitive, oph->operation);
return -1;
}
context map: introduce RUA and SCCP FSMs to fix leaks Refactor the entire RUA <-> SCCP connection-oriented message forwarding: - conquer confusion about hnbgw_context_map release behavior, and - eradicate SCCP connection leaks. Finer points: == Context map state == So far, we had a single context map state and some flags to keep track of both the RUA and the SCCP connections. It was easy to miss connection cleanup steps, especially on the SCCP side. Instead, the two FSMs clearly define the RUA and SCCP conn states separately, and each side takes care of its own release needs for all possible scenarios. - When both RUA and SCCP are released, the context map is discarded. - A context map can stay around to wait for proper SCCP release, even if the RUA side has lost the HNB connection. - Completely drop the async "context mapper garbage collection", because the FSMs clarify the release and free steps, synchronously. - We still keep a (simplified) enum for global context map state, but this is only used so that VTY reporting remains mostly unchanged. == Context map cleanup confusion == The function context_map_hnb_released() was the general cleanup function for a context map. Instead, add separate context_map_free(). == Free context maps separately from HNB == When a HNB releases, talloc_steal() the context maps out of the HNB specific hnb_ctx, so that they are not freed along with the HNB state, possibly leaving SCCP connections afloat. (It is still nice to normally keep context maps as talloc children of their respective hnb_ctx, so talloc reports show which belongs to which.) So far, context map handling found the global hnb_gw pointer via map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time. Instead, use a separate hnb_gw pointer in map->gw. == RUA procedure codes vs. SCCP prims == So far, the RUA rx side composed SCCP prims to pass on: RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx That is a source of confusion: a RUA procedure code should not translate 1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts below). Instead, move SCCP prim composition over to the SCCP side, using FSM events to forward: RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx +RANAP +RANAP +RANAP RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx +RANAP +RANAP +RANAP Hence choose the correct prim according to the SCCP FSM state. - in hnbgw_rua.c, use RUA procedure codes, not prim types. - via the new FSM events' data args, pass msgb containing RANAP PDUs. == Fix SCCP Release behavior == So far, the normal conn release behavior was HNB HNBGW CN | --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete | | <--SCCP-RLC-------- | (no data) Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2 'Connection release': The MSC sends a SCCP released message. This message shall not contain any user data field. i.e.: HNB HNBGW CN | --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete | | <--SCCP-Released---- | (no data) | | ---SCCP-RLC--------> | (no data) (Side note, the final SCCP Release Confirm step is taken care of implicitly by libosmo-sigtran's sccp_scoc.c FSM.) If the CN fails to respond with SCCP-Released, on new X31 timeout, osmo-hnbgw will send an SCCP Released to the CN as fallback. == Memory model for message dispatch == So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP handling code, and the final dispatch freed it. Every error path had to take care not to leak any oph. Instead, use a much easier and much more leakage proof memory model, inspired by fixeria: - on rx, dispatch RANAP msgb that live in OTC_SELECT. - no code path needs to msgb_free() -- the msgb is discarded via OTC_SELECT when handling is done, error or no error. - any code path may also choose to store the msgb for async dispatch, using talloc_steal(). The user plane mapping via MGW and UPF do that. - if any code path does msgb_free(), that would be no problem either (but none do so now, for simplicity). == Layer separation == Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down() directly. == Memory model for decoded ranap_message IEs == Use a talloc destructor to make sure that the ranap_message IEs are always implicitly freed upon talloc_free(), so that no code path can possibly forget to do so. == Implicit cleanup by talloc == Use talloc scoping to remove a bunch of explicit cleanup code. For example, make a chached message a talloc child of its handler: talloc_steal(mgw_fsm_priv, message); mgw_fsm_priv->ranap_rab_ass_req_message = message; and later implicitly free 'message' by only freeing the handler: talloc_free(mgw_fsm_priv) Related: SYS#6297 Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
talloc_steal(OTC_SELECT, oph->msg);
switch (OSMO_PRIM_HDR(oph)) {
case OSMO_PRIM(OSMO_SCU_PRIM_N_UNITDATA, PRIM_OP_INDICATION):
rc = handle_cn_unitdata(cnlink, &prim->u.unitdata, oph);
break;
case OSMO_PRIM(OSMO_SCU_PRIM_N_CONNECT, PRIM_OP_CONFIRM):
rc = handle_cn_conn_conf(cnlink, &prim->u.connect, oph);
break;
case OSMO_PRIM(OSMO_SCU_PRIM_N_DATA, PRIM_OP_INDICATION):
rc = handle_cn_data_ind(cnlink, &prim->u.data, oph);
break;
case OSMO_PRIM(OSMO_SCU_PRIM_N_DISCONNECT, PRIM_OP_INDICATION):
rc = handle_cn_disc_ind(cnlink, &prim->u.disconnect, oph);
break;
case OSMO_PRIM(OSMO_SCU_PRIM_N_PCSTATE, PRIM_OP_INDICATION):
LOGP(DMAIN, LOGL_DEBUG, "Ignoring prim %s from SCCP USER SAP\n",
osmo_scu_prim_hdr_name_c(OTC_SELECT, oph));
break;
default:
LOGP(DMAIN, LOGL_ERROR,
"Received unknown prim %u from SCCP USER SAP\n",
OSMO_PRIM_HDR(oph));
break;
}
return rc;
}
static bool addr_has_pc_and_ssn(const struct osmo_sccp_addr *addr)
{
if (!(addr->presence & OSMO_SCCP_ADDR_T_SSN))
return false;
if (!(addr->presence & OSMO_SCCP_ADDR_T_PC))
return false;
return true;
}
static int resolve_addr_name(struct osmo_sccp_addr *dest, struct osmo_ss7_instance **ss7,
const char *addr_name, const char *label,
uint32_t default_pc)
{
struct osmo_ss7_instance *ss7_tmp;
if (!addr_name) {
osmo_sccp_make_addr_pc_ssn(dest, default_pc, OSMO_SCCP_SSN_RANAP);
LOGP(DMAIN, LOGL_INFO, "%s remote addr not configured, using default: %s\n", label,
osmo_sccp_addr_name(*ss7, dest));
return 0;
}
ss7_tmp = osmo_sccp_addr_by_name(dest, addr_name);
if (!ss7_tmp) {
LOGP(DMAIN, LOGL_ERROR, "%s remote addr: no such SCCP address book entry: '%s'\n",
label, addr_name);
return -1;
}
if (*ss7 && (*ss7 != ss7_tmp)) {
LOGP(DMAIN, LOGL_ERROR, "IuCS and IuPS cannot be served from separate CS7 instances,"
" cs7 instance %d != %d\n", (*ss7)->cfg.id, ss7_tmp->cfg.id);
return -1;
}
*ss7 = ss7_tmp;
osmo_sccp_addr_set_ssn(dest, OSMO_SCCP_SSN_RANAP);
if (!addr_has_pc_and_ssn(dest)) {
LOGP(DMAIN, LOGL_ERROR, "Invalid/incomplete %s remote-addr: %s\n",
label, osmo_sccp_addr_name(*ss7, dest));
return -1;
}
LOGP(DRANAP, LOGL_NOTICE, "Remote %s SCCP addr: %s\n",
label, osmo_sccp_addr_name(*ss7, dest));
return 0;
}
int hnbgw_cnlink_init(struct hnb_gw *gw, const char *stp_host, uint16_t stp_port, const char *local_ip)
{
struct hnbgw_cnlink *cnlink;
struct osmo_ss7_instance *ss7;
uint32_t local_pc;
OSMO_ASSERT(!gw->sccp.client);
OSMO_ASSERT(!gw->sccp.cnlink);
ss7 = NULL;
if (resolve_addr_name(&gw->sccp.iucs_remote_addr, &ss7,
gw->config.iucs_remote_addr_name, "IuCS", (23 << 3) + 1))
return -1;
if (resolve_addr_name(&gw->sccp.iups_remote_addr, &ss7,
gw->config.iups_remote_addr_name, "IuPS", (23 << 3) + 4))
return -1;
if (!ss7) {
LOGP(DRANAP, LOGL_NOTICE, "No cs7 instance configured for IuCS nor IuPS,"
" creating default instance\n");
ss7 = osmo_ss7_instance_find_or_create(gw, 0);
if (!ss7)
return -1;
ss7->cfg.primary_pc = (23 << 3) + 5;
}
if (!osmo_ss7_pc_is_valid(ss7->cfg.primary_pc)) {
LOGP(DMAIN, LOGL_ERROR, "IuCS/IuPS uplink cannot be setup: CS7 instance %d has no point-code set\n",
ss7->cfg.id);
return -1;
}
local_pc = ss7->cfg.primary_pc;
osmo_sccp_make_addr_pc_ssn(&gw->sccp.local_addr, local_pc, OSMO_SCCP_SSN_RANAP);
LOGP(DRANAP, LOGL_NOTICE, "Local SCCP addr: %s\n", osmo_sccp_addr_name(ss7, &gw->sccp.local_addr));
gw->sccp.client = osmo_sccp_simple_client_on_ss7_id(gw, ss7->cfg.id, "OsmoHNBGW",
local_pc, OSMO_SS7_ASP_PROT_M3UA,
0, local_ip, stp_port, stp_host);
if (!gw->sccp.client) {
LOGP(DMAIN, LOGL_ERROR, "Failed to init SCCP Client\n");
return -1;
}
cnlink = talloc_zero(gw, struct hnbgw_cnlink);
cnlink->gw = gw;
INIT_LLIST_HEAD(&cnlink->map_list);
cnlink->T_RafC.cb = cnlink_trafc_cb;
cnlink->T_RafC.data = gw;
cnlink->next_conn_id = 1000;
cnlink->sccp_user = osmo_sccp_user_bind_pc(gw->sccp.client, "OsmoHNBGW", sccp_sap_up,
OSMO_SCCP_SSN_RANAP, gw->sccp.local_addr.pc);
if (!cnlink->sccp_user) {
LOGP(DMAIN, LOGL_ERROR, "Failed to init SCCP User\n");
return -1;
}
LOGP(DRANAP, LOGL_NOTICE, "Remote SCCP addr: IuCS: %s\n",
osmo_sccp_addr_name(ss7, &gw->sccp.iucs_remote_addr));
LOGP(DRANAP, LOGL_NOTICE, "Remote SCCP addr: IuPS: %s\n",
osmo_sccp_addr_name(ss7, &gw->sccp.iups_remote_addr));
/* In sccp_sap_up() we expect the cnlink in the user's priv. */
osmo_sccp_user_set_priv(cnlink->sccp_user, cnlink);
gw->sccp.cnlink = cnlink;
return 0;
}
context map: introduce RUA and SCCP FSMs to fix leaks Refactor the entire RUA <-> SCCP connection-oriented message forwarding: - conquer confusion about hnbgw_context_map release behavior, and - eradicate SCCP connection leaks. Finer points: == Context map state == So far, we had a single context map state and some flags to keep track of both the RUA and the SCCP connections. It was easy to miss connection cleanup steps, especially on the SCCP side. Instead, the two FSMs clearly define the RUA and SCCP conn states separately, and each side takes care of its own release needs for all possible scenarios. - When both RUA and SCCP are released, the context map is discarded. - A context map can stay around to wait for proper SCCP release, even if the RUA side has lost the HNB connection. - Completely drop the async "context mapper garbage collection", because the FSMs clarify the release and free steps, synchronously. - We still keep a (simplified) enum for global context map state, but this is only used so that VTY reporting remains mostly unchanged. == Context map cleanup confusion == The function context_map_hnb_released() was the general cleanup function for a context map. Instead, add separate context_map_free(). == Free context maps separately from HNB == When a HNB releases, talloc_steal() the context maps out of the HNB specific hnb_ctx, so that they are not freed along with the HNB state, possibly leaving SCCP connections afloat. (It is still nice to normally keep context maps as talloc children of their respective hnb_ctx, so talloc reports show which belongs to which.) So far, context map handling found the global hnb_gw pointer via map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time. Instead, use a separate hnb_gw pointer in map->gw. == RUA procedure codes vs. SCCP prims == So far, the RUA rx side composed SCCP prims to pass on: RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx That is a source of confusion: a RUA procedure code should not translate 1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts below). Instead, move SCCP prim composition over to the SCCP side, using FSM events to forward: RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx +RANAP +RANAP +RANAP RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx +RANAP +RANAP +RANAP Hence choose the correct prim according to the SCCP FSM state. - in hnbgw_rua.c, use RUA procedure codes, not prim types. - via the new FSM events' data args, pass msgb containing RANAP PDUs. == Fix SCCP Release behavior == So far, the normal conn release behavior was HNB HNBGW CN | --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete | | <--SCCP-RLC-------- | (no data) Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2 'Connection release': The MSC sends a SCCP released message. This message shall not contain any user data field. i.e.: HNB HNBGW CN | --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete | | <--SCCP-Released---- | (no data) | | ---SCCP-RLC--------> | (no data) (Side note, the final SCCP Release Confirm step is taken care of implicitly by libosmo-sigtran's sccp_scoc.c FSM.) If the CN fails to respond with SCCP-Released, on new X31 timeout, osmo-hnbgw will send an SCCP Released to the CN as fallback. == Memory model for message dispatch == So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP handling code, and the final dispatch freed it. Every error path had to take care not to leak any oph. Instead, use a much easier and much more leakage proof memory model, inspired by fixeria: - on rx, dispatch RANAP msgb that live in OTC_SELECT. - no code path needs to msgb_free() -- the msgb is discarded via OTC_SELECT when handling is done, error or no error. - any code path may also choose to store the msgb for async dispatch, using talloc_steal(). The user plane mapping via MGW and UPF do that. - if any code path does msgb_free(), that would be no problem either (but none do so now, for simplicity). == Layer separation == Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down() directly. == Memory model for decoded ranap_message IEs == Use a talloc destructor to make sure that the ranap_message IEs are always implicitly freed upon talloc_free(), so that no code path can possibly forget to do so. == Implicit cleanup by talloc == Use talloc scoping to remove a bunch of explicit cleanup code. For example, make a chached message a talloc child of its handler: talloc_steal(mgw_fsm_priv, message); mgw_fsm_priv->ranap_rab_ass_req_message = message; and later implicitly free 'message' by only freeing the handler: talloc_free(mgw_fsm_priv) Related: SYS#6297 Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
const struct osmo_sccp_addr *hnbgw_cn_get_remote_addr(struct hnb_gw *gw, bool is_ps)
{
return is_ps ? &gw->sccp.iups_remote_addr : &gw->sccp.iucs_remote_addr;
}