2022-01-06 16:05:08 +00:00
|
|
|
/* (C) 2021 by sysmocom s.f.m.c. GmbH <info@sysmocom.de>
|
|
|
|
* All Rights Reserved
|
|
|
|
*
|
|
|
|
* Author: Philipp Maier
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <errno.h>
|
|
|
|
|
|
|
|
#include <osmocom/core/msgb.h>
|
|
|
|
#include <osmocom/core/utils.h>
|
|
|
|
#include <osmocom/core/prim.h>
|
|
|
|
|
|
|
|
#include <osmocom/core/fsm.h>
|
|
|
|
#include <osmocom/core/byteswap.h>
|
|
|
|
#include <arpa/inet.h>
|
|
|
|
#include <osmocom/core/logging.h>
|
|
|
|
#include <osmocom/core/sockaddr_str.h>
|
|
|
|
|
|
|
|
#include <osmocom/ranap/ranap_common.h>
|
|
|
|
#include <osmocom/ranap/ranap_common_cn.h>
|
|
|
|
#include <osmocom/ranap/ranap_common_ran.h>
|
|
|
|
#include <osmocom/ranap/ranap_msg_factory.h>
|
|
|
|
|
|
|
|
#include <osmocom/ranap/ranap_ies_defs.h>
|
|
|
|
#include <osmocom/ranap/iu_helpers.h>
|
|
|
|
#include <asn1c/asn1helpers.h>
|
|
|
|
|
|
|
|
#include <osmocom/hnbgw/hnbgw.h>
|
|
|
|
#include <osmocom/hnbgw/context_map.h>
|
|
|
|
#include <osmocom/hnbgw/ranap_rab_ass.h>
|
|
|
|
|
|
|
|
#include <osmocom/hnbgw/hnbgw_rua.h>
|
|
|
|
|
|
|
|
#include <osmocom/core/tdef.h>
|
|
|
|
#include <osmocom/hnbgw/tdefs.h>
|
|
|
|
#include <osmocom/mgcp_client/mgcp_client_endpoint_fsm.h>
|
|
|
|
|
|
|
|
/* NOTE: This implementation can only handle one RAB per hnbgw context. This simplification was made because usually
|
|
|
|
* a voice call will require only one RAB at a time. An exception may be corner cases like video calls, which we
|
|
|
|
* do not support at the moment. */
|
|
|
|
|
|
|
|
/* Send Iu Release Request, this is done in erroneous cases from which we cannot recover */
|
|
|
|
static void tx_release_req(struct hnbgw_context_map *map)
|
|
|
|
{
|
|
|
|
struct msgb *msg;
|
|
|
|
static const struct RANAP_Cause cause = {
|
|
|
|
.present = RANAP_Cause_PR_transmissionNetwork,
|
|
|
|
.choice.transmissionNetwork =
|
|
|
|
RANAP_CauseTransmissionNetwork_iu_transport_connection_failed_to_establish,
|
|
|
|
};
|
|
|
|
|
|
|
|
msg = ranap_new_msg_iu_rel_req(&cause);
|
|
|
|
msg->l2h = msg->data;
|
context map: introduce RUA and SCCP FSMs to fix leaks
Refactor the entire RUA <-> SCCP connection-oriented message forwarding:
- conquer confusion about hnbgw_context_map release behavior, and
- eradicate SCCP connection leaks.
Finer points:
== Context map state ==
So far, we had a single context map state and some flags to keep track
of both the RUA and the SCCP connections. It was easy to miss connection
cleanup steps, especially on the SCCP side.
Instead, the two FSMs clearly define the RUA and SCCP conn states
separately, and each side takes care of its own release needs for all
possible scenarios.
- When both RUA and SCCP are released, the context map is discarded.
- A context map can stay around to wait for proper SCCP release, even if
the RUA side has lost the HNB connection.
- Completely drop the async "context mapper garbage collection", because
the FSMs clarify the release and free steps, synchronously.
- We still keep a (simplified) enum for global context map state, but
this is only used so that VTY reporting remains mostly unchanged.
== Context map cleanup confusion ==
The function context_map_hnb_released() was the general cleanup function
for a context map. Instead, add separate context_map_free().
== Free context maps separately from HNB ==
When a HNB releases, talloc_steal() the context maps out of the HNB
specific hnb_ctx, so that they are not freed along with the HNB state,
possibly leaving SCCP connections afloat.
(It is still nice to normally keep context maps as talloc children of
their respective hnb_ctx, so talloc reports show which belongs to
which.)
So far, context map handling found the global hnb_gw pointer via
map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time.
Instead, use a separate hnb_gw pointer in map->gw.
== RUA procedure codes vs. SCCP prims ==
So far, the RUA rx side composed SCCP prims to pass on:
RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx
That is a source of confusion: a RUA procedure code should not translate
1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts
below).
Instead, move SCCP prim composition over to the SCCP side, using FSM
events to forward:
RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx
+RANAP +RANAP +RANAP
RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx
+RANAP +RANAP +RANAP
Hence choose the correct prim according to the SCCP FSM state.
- in hnbgw_rua.c, use RUA procedure codes, not prim types.
- via the new FSM events' data args, pass msgb containing RANAP PDUs.
== Fix SCCP Release behavior ==
So far, the normal conn release behavior was
HNB HNBGW CN
| --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete
| | <--SCCP-RLC-------- | (no data)
Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2
'Connection release':
The MSC sends a SCCP released message. This message shall not contain
any user data field.
i.e.:
HNB HNBGW CN
| --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete
| | <--SCCP-Released---- | (no data)
| | ---SCCP-RLC--------> | (no data)
(Side note, the final SCCP Release Confirm step is taken care of
implicitly by libosmo-sigtran's sccp_scoc.c FSM.)
If the CN fails to respond with SCCP-Released, on new X31 timeout,
osmo-hnbgw will send an SCCP Released to the CN as fallback.
== Memory model for message dispatch ==
So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP
handling code, and the final dispatch freed it. Every error path had to
take care not to leak any oph.
Instead, use a much easier and much more leakage proof memory model,
inspired by fixeria:
- on rx, dispatch RANAP msgb that live in OTC_SELECT.
- no code path needs to msgb_free() -- the msgb is discarded via
OTC_SELECT when handling is done, error or no error.
- any code path may also choose to store the msgb for async dispatch,
using talloc_steal(). The user plane mapping via MGW and UPF do that.
- if any code path does msgb_free(), that would be no problem either
(but none do so now, for simplicity).
== Layer separation ==
Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx
via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down()
directly.
== Memory model for decoded ranap_message IEs ==
Use a talloc destructor to make sure that the ranap_message IEs are
always implicitly freed upon talloc_free(), so that no code path can
possibly forget to do so.
== Implicit cleanup by talloc ==
Use talloc scoping to remove a bunch of explicit cleanup code. For
example, make a chached message a talloc child of its handler:
talloc_steal(mgw_fsm_priv, message);
mgw_fsm_priv->ranap_rab_ass_req_message = message;
and later implicitly free 'message' by only freeing the handler:
talloc_free(mgw_fsm_priv)
Related: SYS#6297
Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
|
|
|
talloc_steal(OTC_SELECT, msg);
|
|
|
|
map_sccp_dispatch(map, MAP_SCCP_EV_TX_DATA_REQUEST, msg);
|
2022-01-06 16:05:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#define S(x) (1 << (x))
|
|
|
|
|
|
|
|
extern int asn1_xer_print;
|
|
|
|
|
|
|
|
enum mgw_fsm_event {
|
|
|
|
MGW_EV_MGCP_OK,
|
|
|
|
MGW_EV_MGCP_FAIL,
|
|
|
|
MGW_EV_MGCP_TERM,
|
|
|
|
MGW_EV_RAB_ASS_RESP,
|
|
|
|
MGW_EV_RELEASE,
|
|
|
|
};
|
|
|
|
|
|
|
|
static const struct value_string mgw_fsm_event_names[] = {
|
|
|
|
OSMO_VALUE_STRING(MGW_EV_MGCP_OK),
|
|
|
|
OSMO_VALUE_STRING(MGW_EV_MGCP_FAIL),
|
|
|
|
OSMO_VALUE_STRING(MGW_EV_MGCP_TERM),
|
|
|
|
OSMO_VALUE_STRING(MGW_EV_RAB_ASS_RESP),
|
|
|
|
OSMO_VALUE_STRING(MGW_EV_RELEASE),
|
|
|
|
{}
|
|
|
|
};
|
|
|
|
|
|
|
|
enum mgw_fsm_state {
|
|
|
|
MGW_ST_CRCX_HNB,
|
|
|
|
MGW_ST_ASSIGN,
|
|
|
|
MGW_ST_MDCX_HNB,
|
|
|
|
MGW_ST_CRCX_MSC,
|
|
|
|
MGW_ST_ESTABLISHED,
|
|
|
|
MGW_ST_RELEASE,
|
|
|
|
MGW_ST_FAILURE,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct mgw_fsm_priv {
|
|
|
|
/* Backpointer to HNBGW context */
|
|
|
|
struct hnbgw_context_map *map;
|
|
|
|
|
|
|
|
/* RAB-ID from RANAP RAB AssignmentRequest message */
|
|
|
|
uint8_t rab_id;
|
|
|
|
|
|
|
|
/* Pointers to messages and prim header we take ownership of */
|
|
|
|
ranap_message *ranap_rab_ass_req_message;
|
|
|
|
ranap_message *ranap_rab_ass_resp_message;
|
context map: introduce RUA and SCCP FSMs to fix leaks
Refactor the entire RUA <-> SCCP connection-oriented message forwarding:
- conquer confusion about hnbgw_context_map release behavior, and
- eradicate SCCP connection leaks.
Finer points:
== Context map state ==
So far, we had a single context map state and some flags to keep track
of both the RUA and the SCCP connections. It was easy to miss connection
cleanup steps, especially on the SCCP side.
Instead, the two FSMs clearly define the RUA and SCCP conn states
separately, and each side takes care of its own release needs for all
possible scenarios.
- When both RUA and SCCP are released, the context map is discarded.
- A context map can stay around to wait for proper SCCP release, even if
the RUA side has lost the HNB connection.
- Completely drop the async "context mapper garbage collection", because
the FSMs clarify the release and free steps, synchronously.
- We still keep a (simplified) enum for global context map state, but
this is only used so that VTY reporting remains mostly unchanged.
== Context map cleanup confusion ==
The function context_map_hnb_released() was the general cleanup function
for a context map. Instead, add separate context_map_free().
== Free context maps separately from HNB ==
When a HNB releases, talloc_steal() the context maps out of the HNB
specific hnb_ctx, so that they are not freed along with the HNB state,
possibly leaving SCCP connections afloat.
(It is still nice to normally keep context maps as talloc children of
their respective hnb_ctx, so talloc reports show which belongs to
which.)
So far, context map handling found the global hnb_gw pointer via
map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time.
Instead, use a separate hnb_gw pointer in map->gw.
== RUA procedure codes vs. SCCP prims ==
So far, the RUA rx side composed SCCP prims to pass on:
RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx
That is a source of confusion: a RUA procedure code should not translate
1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts
below).
Instead, move SCCP prim composition over to the SCCP side, using FSM
events to forward:
RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx
+RANAP +RANAP +RANAP
RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx
+RANAP +RANAP +RANAP
Hence choose the correct prim according to the SCCP FSM state.
- in hnbgw_rua.c, use RUA procedure codes, not prim types.
- via the new FSM events' data args, pass msgb containing RANAP PDUs.
== Fix SCCP Release behavior ==
So far, the normal conn release behavior was
HNB HNBGW CN
| --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete
| | <--SCCP-RLC-------- | (no data)
Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2
'Connection release':
The MSC sends a SCCP released message. This message shall not contain
any user data field.
i.e.:
HNB HNBGW CN
| --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete
| | <--SCCP-Released---- | (no data)
| | ---SCCP-RLC--------> | (no data)
(Side note, the final SCCP Release Confirm step is taken care of
implicitly by libosmo-sigtran's sccp_scoc.c FSM.)
If the CN fails to respond with SCCP-Released, on new X31 timeout,
osmo-hnbgw will send an SCCP Released to the CN as fallback.
== Memory model for message dispatch ==
So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP
handling code, and the final dispatch freed it. Every error path had to
take care not to leak any oph.
Instead, use a much easier and much more leakage proof memory model,
inspired by fixeria:
- on rx, dispatch RANAP msgb that live in OTC_SELECT.
- no code path needs to msgb_free() -- the msgb is discarded via
OTC_SELECT when handling is done, error or no error.
- any code path may also choose to store the msgb for async dispatch,
using talloc_steal(). The user plane mapping via MGW and UPF do that.
- if any code path does msgb_free(), that would be no problem either
(but none do so now, for simplicity).
== Layer separation ==
Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx
via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down()
directly.
== Memory model for decoded ranap_message IEs ==
Use a talloc destructor to make sure that the ranap_message IEs are
always implicitly freed upon talloc_free(), so that no code path can
possibly forget to do so.
== Implicit cleanup by talloc ==
Use talloc scoping to remove a bunch of explicit cleanup code. For
example, make a chached message a talloc child of its handler:
talloc_steal(mgw_fsm_priv, message);
mgw_fsm_priv->ranap_rab_ass_req_message = message;
and later implicitly free 'message' by only freeing the handler:
talloc_free(mgw_fsm_priv)
Related: SYS#6297
Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
|
|
|
struct msgb *ranap_rab_ass_resp_msgb;
|
2022-01-06 16:05:08 +00:00
|
|
|
|
|
|
|
/* MGW context */
|
2023-01-17 22:27:03 +00:00
|
|
|
struct mgcp_client *mgcpc;
|
2022-01-06 16:05:08 +00:00
|
|
|
struct osmo_mgcpc_ep *mgcpc_ep;
|
|
|
|
struct osmo_mgcpc_ep_ci *mgcpc_ep_ci_hnb;
|
|
|
|
struct osmo_mgcpc_ep_ci *mgcpc_ep_ci_msc;
|
|
|
|
char msc_rtp_addr[INET6_ADDRSTRLEN];
|
|
|
|
uint16_t msc_rtp_port;
|
|
|
|
};
|
|
|
|
|
2022-04-02 00:10:16 +00:00
|
|
|
struct osmo_tdef_state_timeout mgw_fsm_timeouts[32] = {
|
2022-01-06 16:05:08 +00:00
|
|
|
[MGW_ST_CRCX_HNB] = {.T = -1001 },
|
|
|
|
[MGW_ST_ASSIGN] = {.T = -1002 },
|
|
|
|
[MGW_ST_MDCX_HNB] = {.T = -1003 },
|
|
|
|
[MGW_ST_CRCX_MSC] = {.T = -1004 },
|
|
|
|
};
|
|
|
|
|
2022-06-14 16:39:01 +00:00
|
|
|
#define mgw_fsm_state_chg(fi, state) \
|
2022-01-06 16:05:08 +00:00
|
|
|
osmo_tdef_fsm_inst_state_chg(fi, state, mgw_fsm_timeouts, mgw_fsm_T_defs, -1)
|
|
|
|
|
|
|
|
static void mgw_fsm_crcx_hnb_onenter(struct osmo_fsm_inst *fi, uint32_t prev_state)
|
|
|
|
{
|
|
|
|
struct mgw_fsm_priv *mgw_fsm_priv = fi->priv;
|
|
|
|
struct hnbgw_context_map *map = mgw_fsm_priv->map;
|
2022-06-14 16:29:56 +00:00
|
|
|
struct osmo_sockaddr addr;
|
|
|
|
struct osmo_sockaddr_str addr_str;
|
|
|
|
RANAP_RAB_AssignmentRequestIEs_t *ies;
|
2022-01-06 16:05:08 +00:00
|
|
|
const char *epname;
|
|
|
|
struct mgcp_conn_peer mgw_info;
|
2022-06-14 16:29:56 +00:00
|
|
|
int rc;
|
2022-01-06 16:05:08 +00:00
|
|
|
|
|
|
|
LOGPFSML(fi, LOGL_DEBUG, "RAB-AssignmentRequest received, creating HNB side call-leg on MGW...\n");
|
|
|
|
|
2022-06-14 16:29:56 +00:00
|
|
|
/* Parse the RAB Assignment Request now */
|
|
|
|
ies = &mgw_fsm_priv->ranap_rab_ass_req_message->msg.raB_AssignmentRequestIEs;
|
|
|
|
rc = ranap_rab_ass_req_ies_extract_inet_addr(&addr, &mgw_fsm_priv->rab_id, ies, 0);
|
|
|
|
if (rc < 0) {
|
|
|
|
LOGPFSML(fi, LOGL_ERROR, "Invalid RAB-AssignmentRequest -- abort\n");
|
|
|
|
osmo_fsm_inst_state_chg(fi, MGW_ST_FAILURE, 0, 0);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
rc = osmo_sockaddr_str_from_sockaddr(&addr_str, &addr.u.sas);
|
|
|
|
if (rc < 0) {
|
|
|
|
LOGPFSML(fi, LOGL_ERROR,
|
|
|
|
"Invalid RTP IP-address or port in RAB-AssignmentRequest -- abort\n");
|
|
|
|
osmo_fsm_inst_state_chg(fi, MGW_ST_FAILURE, 0, 0);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
osmo_strlcpy(mgw_fsm_priv->msc_rtp_addr, addr_str.ip, sizeof(mgw_fsm_priv->msc_rtp_addr));
|
|
|
|
mgw_fsm_priv->msc_rtp_port = addr_str.port;
|
|
|
|
|
2022-01-06 16:05:08 +00:00
|
|
|
mgw_info = (struct mgcp_conn_peer) {
|
|
|
|
.call_id = (map->rua_ctx_id << 8) | mgw_fsm_priv->rab_id,
|
|
|
|
.ptime = 20,
|
|
|
|
.conn_mode = MGCP_CONN_LOOPBACK,
|
|
|
|
};
|
|
|
|
mgw_info.codecs[0] = CODEC_IUFP;
|
|
|
|
mgw_info.codecs_len = 1;
|
|
|
|
|
context map: introduce RUA and SCCP FSMs to fix leaks
Refactor the entire RUA <-> SCCP connection-oriented message forwarding:
- conquer confusion about hnbgw_context_map release behavior, and
- eradicate SCCP connection leaks.
Finer points:
== Context map state ==
So far, we had a single context map state and some flags to keep track
of both the RUA and the SCCP connections. It was easy to miss connection
cleanup steps, especially on the SCCP side.
Instead, the two FSMs clearly define the RUA and SCCP conn states
separately, and each side takes care of its own release needs for all
possible scenarios.
- When both RUA and SCCP are released, the context map is discarded.
- A context map can stay around to wait for proper SCCP release, even if
the RUA side has lost the HNB connection.
- Completely drop the async "context mapper garbage collection", because
the FSMs clarify the release and free steps, synchronously.
- We still keep a (simplified) enum for global context map state, but
this is only used so that VTY reporting remains mostly unchanged.
== Context map cleanup confusion ==
The function context_map_hnb_released() was the general cleanup function
for a context map. Instead, add separate context_map_free().
== Free context maps separately from HNB ==
When a HNB releases, talloc_steal() the context maps out of the HNB
specific hnb_ctx, so that they are not freed along with the HNB state,
possibly leaving SCCP connections afloat.
(It is still nice to normally keep context maps as talloc children of
their respective hnb_ctx, so talloc reports show which belongs to
which.)
So far, context map handling found the global hnb_gw pointer via
map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time.
Instead, use a separate hnb_gw pointer in map->gw.
== RUA procedure codes vs. SCCP prims ==
So far, the RUA rx side composed SCCP prims to pass on:
RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx
That is a source of confusion: a RUA procedure code should not translate
1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts
below).
Instead, move SCCP prim composition over to the SCCP side, using FSM
events to forward:
RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx
+RANAP +RANAP +RANAP
RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx
+RANAP +RANAP +RANAP
Hence choose the correct prim according to the SCCP FSM state.
- in hnbgw_rua.c, use RUA procedure codes, not prim types.
- via the new FSM events' data args, pass msgb containing RANAP PDUs.
== Fix SCCP Release behavior ==
So far, the normal conn release behavior was
HNB HNBGW CN
| --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete
| | <--SCCP-RLC-------- | (no data)
Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2
'Connection release':
The MSC sends a SCCP released message. This message shall not contain
any user data field.
i.e.:
HNB HNBGW CN
| --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete
| | <--SCCP-Released---- | (no data)
| | ---SCCP-RLC--------> | (no data)
(Side note, the final SCCP Release Confirm step is taken care of
implicitly by libosmo-sigtran's sccp_scoc.c FSM.)
If the CN fails to respond with SCCP-Released, on new X31 timeout,
osmo-hnbgw will send an SCCP Released to the CN as fallback.
== Memory model for message dispatch ==
So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP
handling code, and the final dispatch freed it. Every error path had to
take care not to leak any oph.
Instead, use a much easier and much more leakage proof memory model,
inspired by fixeria:
- on rx, dispatch RANAP msgb that live in OTC_SELECT.
- no code path needs to msgb_free() -- the msgb is discarded via
OTC_SELECT when handling is done, error or no error.
- any code path may also choose to store the msgb for async dispatch,
using talloc_steal(). The user plane mapping via MGW and UPF do that.
- if any code path does msgb_free(), that would be no problem either
(but none do so now, for simplicity).
== Layer separation ==
Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx
via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down()
directly.
== Memory model for decoded ranap_message IEs ==
Use a talloc destructor to make sure that the ranap_message IEs are
always implicitly freed upon talloc_free(), so that no code path can
possibly forget to do so.
== Implicit cleanup by talloc ==
Use talloc scoping to remove a bunch of explicit cleanup code. For
example, make a chached message a talloc child of its handler:
talloc_steal(mgw_fsm_priv, message);
mgw_fsm_priv->ranap_rab_ass_req_message = message;
and later implicitly free 'message' by only freeing the handler:
talloc_free(mgw_fsm_priv)
Related: SYS#6297
Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
|
|
|
mgw_fsm_priv->mgcpc = mgcp_client_pool_get(map->gw->mgw_pool);
|
2023-01-17 22:27:03 +00:00
|
|
|
if (!mgw_fsm_priv->mgcpc) {
|
2022-10-19 12:19:22 +00:00
|
|
|
LOGPFSML(fi, LOGL_ERROR,
|
|
|
|
"cannot ensure MGW endpoint -- no MGW configured, check configuration!\n");
|
|
|
|
osmo_fsm_inst_state_chg(fi, MGW_ST_FAILURE, 0, 0);
|
|
|
|
return;
|
|
|
|
}
|
2023-01-17 22:27:03 +00:00
|
|
|
epname = mgcp_client_rtpbridge_wildcard(mgw_fsm_priv->mgcpc);
|
2022-01-06 16:05:08 +00:00
|
|
|
mgw_fsm_priv->mgcpc_ep =
|
2023-01-17 22:27:03 +00:00
|
|
|
osmo_mgcpc_ep_alloc(fi, MGW_EV_MGCP_TERM, mgw_fsm_priv->mgcpc, mgw_fsm_T_defs, fi->id, "%s", epname);
|
2022-01-06 16:05:08 +00:00
|
|
|
mgw_fsm_priv->mgcpc_ep_ci_hnb = osmo_mgcpc_ep_ci_add(mgw_fsm_priv->mgcpc_ep, "to-HNB");
|
|
|
|
|
|
|
|
osmo_mgcpc_ep_ci_request(mgw_fsm_priv->mgcpc_ep_ci_hnb, MGCP_VERB_CRCX, &mgw_info, fi, MGW_EV_MGCP_OK,
|
|
|
|
MGW_EV_MGCP_FAIL, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mgw_fsm_crcx_hnb(struct osmo_fsm_inst *fi, uint32_t event, void *data)
|
|
|
|
{
|
|
|
|
struct mgw_fsm_priv *mgw_fsm_priv = fi->priv;
|
|
|
|
const struct mgcp_conn_peer *mgw_info;
|
|
|
|
struct osmo_sockaddr addr;
|
|
|
|
struct osmo_sockaddr_str addr_str;
|
|
|
|
RANAP_RAB_AssignmentRequestIEs_t *ies;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
switch (event) {
|
|
|
|
case MGW_EV_MGCP_OK:
|
|
|
|
mgw_info = osmo_mgcpc_ep_ci_get_rtp_info(mgw_fsm_priv->mgcpc_ep_ci_hnb);
|
|
|
|
if (!mgw_info) {
|
2022-06-14 15:50:46 +00:00
|
|
|
LOGPFSML(fi, LOGL_ERROR, "Got no RTP info response from MGW\n");
|
2022-01-06 16:05:08 +00:00
|
|
|
osmo_fsm_inst_state_chg(fi, MGW_ST_FAILURE, 0, 0);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (strchr(mgw_info->addr, '.'))
|
|
|
|
addr_str.af = AF_INET;
|
|
|
|
else
|
|
|
|
addr_str.af = AF_INET6;
|
|
|
|
addr_str.port = mgw_info->port;
|
|
|
|
osmo_strlcpy(addr_str.ip, mgw_info->addr, sizeof(addr_str.ip));
|
|
|
|
rc = osmo_sockaddr_str_to_sockaddr(&addr_str, &addr.u.sas);
|
|
|
|
if (rc < 0) {
|
|
|
|
LOGPFSML(fi, LOGL_ERROR,
|
|
|
|
"Failed to convert RTP IP-address (%s) and Port (%u) to its binary representation\n",
|
|
|
|
mgw_info->addr, mgw_info->port);
|
|
|
|
osmo_fsm_inst_state_chg(fi, MGW_ST_FAILURE, 0, 0);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
ies = &mgw_fsm_priv->ranap_rab_ass_req_message->msg.raB_AssignmentRequestIEs;
|
|
|
|
rc = ranap_rab_ass_req_ies_replace_inet_addr(ies, &addr, mgw_fsm_priv->rab_id);
|
|
|
|
if (rc < 0) {
|
|
|
|
LOGPFSML(fi, LOGL_ERROR,
|
|
|
|
"Failed to replace RTP IP-address (%s) and Port (%u) in RAB-AssignmentRequest\n",
|
|
|
|
mgw_info->addr, mgw_info->port);
|
|
|
|
osmo_fsm_inst_state_chg(fi, MGW_ST_FAILURE, 0, 0);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2022-06-14 16:39:01 +00:00
|
|
|
mgw_fsm_state_chg(fi, MGW_ST_ASSIGN);
|
2022-01-06 16:05:08 +00:00
|
|
|
return;
|
|
|
|
default:
|
|
|
|
OSMO_ASSERT(false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mgw_fsm_assign_onenter(struct osmo_fsm_inst *fi, uint32_t prev_state)
|
|
|
|
{
|
|
|
|
struct mgw_fsm_priv *mgw_fsm_priv = fi->priv;
|
|
|
|
struct hnbgw_context_map *map = mgw_fsm_priv->map;
|
|
|
|
RANAP_RAB_AssignmentRequestIEs_t *ies;
|
2022-04-09 22:28:07 +00:00
|
|
|
struct msgb *msg;
|
2022-01-06 16:05:08 +00:00
|
|
|
|
|
|
|
ies = &mgw_fsm_priv->ranap_rab_ass_req_message->msg.raB_AssignmentRequestIEs;
|
2022-04-09 22:28:07 +00:00
|
|
|
msg = ranap_rab_ass_req_encode(ies);
|
|
|
|
if (!msg) {
|
2022-01-06 16:05:08 +00:00
|
|
|
LOGPFSML(fi, LOGL_ERROR, "failed to re-encode RAB-AssignmentRequest message\n");
|
|
|
|
osmo_fsm_inst_state_chg(fi, MGW_ST_FAILURE, 0, 0);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
LOGPFSML(fi, LOGL_DEBUG, "forwarding modified RAB-AssignmentRequest to HNB\n");
|
context map: introduce RUA and SCCP FSMs to fix leaks
Refactor the entire RUA <-> SCCP connection-oriented message forwarding:
- conquer confusion about hnbgw_context_map release behavior, and
- eradicate SCCP connection leaks.
Finer points:
== Context map state ==
So far, we had a single context map state and some flags to keep track
of both the RUA and the SCCP connections. It was easy to miss connection
cleanup steps, especially on the SCCP side.
Instead, the two FSMs clearly define the RUA and SCCP conn states
separately, and each side takes care of its own release needs for all
possible scenarios.
- When both RUA and SCCP are released, the context map is discarded.
- A context map can stay around to wait for proper SCCP release, even if
the RUA side has lost the HNB connection.
- Completely drop the async "context mapper garbage collection", because
the FSMs clarify the release and free steps, synchronously.
- We still keep a (simplified) enum for global context map state, but
this is only used so that VTY reporting remains mostly unchanged.
== Context map cleanup confusion ==
The function context_map_hnb_released() was the general cleanup function
for a context map. Instead, add separate context_map_free().
== Free context maps separately from HNB ==
When a HNB releases, talloc_steal() the context maps out of the HNB
specific hnb_ctx, so that they are not freed along with the HNB state,
possibly leaving SCCP connections afloat.
(It is still nice to normally keep context maps as talloc children of
their respective hnb_ctx, so talloc reports show which belongs to
which.)
So far, context map handling found the global hnb_gw pointer via
map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time.
Instead, use a separate hnb_gw pointer in map->gw.
== RUA procedure codes vs. SCCP prims ==
So far, the RUA rx side composed SCCP prims to pass on:
RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx
That is a source of confusion: a RUA procedure code should not translate
1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts
below).
Instead, move SCCP prim composition over to the SCCP side, using FSM
events to forward:
RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx
+RANAP +RANAP +RANAP
RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx
+RANAP +RANAP +RANAP
Hence choose the correct prim according to the SCCP FSM state.
- in hnbgw_rua.c, use RUA procedure codes, not prim types.
- via the new FSM events' data args, pass msgb containing RANAP PDUs.
== Fix SCCP Release behavior ==
So far, the normal conn release behavior was
HNB HNBGW CN
| --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete
| | <--SCCP-RLC-------- | (no data)
Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2
'Connection release':
The MSC sends a SCCP released message. This message shall not contain
any user data field.
i.e.:
HNB HNBGW CN
| --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete
| | <--SCCP-Released---- | (no data)
| | ---SCCP-RLC--------> | (no data)
(Side note, the final SCCP Release Confirm step is taken care of
implicitly by libosmo-sigtran's sccp_scoc.c FSM.)
If the CN fails to respond with SCCP-Released, on new X31 timeout,
osmo-hnbgw will send an SCCP Released to the CN as fallback.
== Memory model for message dispatch ==
So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP
handling code, and the final dispatch freed it. Every error path had to
take care not to leak any oph.
Instead, use a much easier and much more leakage proof memory model,
inspired by fixeria:
- on rx, dispatch RANAP msgb that live in OTC_SELECT.
- no code path needs to msgb_free() -- the msgb is discarded via
OTC_SELECT when handling is done, error or no error.
- any code path may also choose to store the msgb for async dispatch,
using talloc_steal(). The user plane mapping via MGW and UPF do that.
- if any code path does msgb_free(), that would be no problem either
(but none do so now, for simplicity).
== Layer separation ==
Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx
via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down()
directly.
== Memory model for decoded ranap_message IEs ==
Use a talloc destructor to make sure that the ranap_message IEs are
always implicitly freed upon talloc_free(), so that no code path can
possibly forget to do so.
== Implicit cleanup by talloc ==
Use talloc scoping to remove a bunch of explicit cleanup code. For
example, make a chached message a talloc child of its handler:
talloc_steal(mgw_fsm_priv, message);
mgw_fsm_priv->ranap_rab_ass_req_message = message;
and later implicitly free 'message' by only freeing the handler:
talloc_free(mgw_fsm_priv)
Related: SYS#6297
Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
|
|
|
msg->l2h = msg->data;
|
|
|
|
talloc_steal(OTC_SELECT, msg);
|
|
|
|
map_rua_dispatch(map, MAP_RUA_EV_TX_DIRECT_TRANSFER, msg);
|
2022-01-06 16:05:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void mgw_fsm_assign(struct osmo_fsm_inst *fi, uint32_t event, void *data)
|
|
|
|
{
|
|
|
|
switch (event) {
|
|
|
|
case MGW_EV_RAB_ASS_RESP:
|
2022-06-14 16:39:01 +00:00
|
|
|
mgw_fsm_state_chg(fi, MGW_ST_MDCX_HNB);
|
2022-01-06 16:05:08 +00:00
|
|
|
return;
|
|
|
|
default:
|
|
|
|
OSMO_ASSERT(false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mgw_fsm_mdcx_hnb_onenter(struct osmo_fsm_inst *fi, uint32_t prev_state)
|
|
|
|
{
|
|
|
|
struct mgw_fsm_priv *mgw_fsm_priv = fi->priv;
|
|
|
|
struct hnbgw_context_map *map = mgw_fsm_priv->map;
|
|
|
|
struct mgcp_conn_peer mgw_info;
|
|
|
|
struct osmo_sockaddr addr;
|
|
|
|
struct osmo_sockaddr_str addr_str;
|
|
|
|
RANAP_RAB_AssignmentResponseIEs_t *ies;
|
|
|
|
int rc;
|
|
|
|
bool rab_failed_at_hnb;
|
|
|
|
|
|
|
|
LOGPFSML(fi, LOGL_DEBUG, "RAB-AssignmentResponse received, completing HNB side call-leg on MGW...\n");
|
|
|
|
|
|
|
|
mgw_info = (struct mgcp_conn_peer) {
|
|
|
|
.call_id = map->rua_ctx_id,
|
|
|
|
.ptime = 20,
|
|
|
|
.conn_mode = MGCP_CONN_RECV_SEND,
|
|
|
|
};
|
|
|
|
mgw_info.codecs[0] = CODEC_IUFP;
|
|
|
|
mgw_info.codecs_len = 1;
|
|
|
|
|
|
|
|
ies = &mgw_fsm_priv->ranap_rab_ass_resp_message->msg.raB_AssignmentResponseIEs;
|
|
|
|
rc = ranap_rab_ass_resp_ies_extract_inet_addr(&addr, ies, mgw_fsm_priv->rab_id);
|
|
|
|
if (rc < 0) {
|
|
|
|
rab_failed_at_hnb = ranap_rab_ass_resp_ies_check_failure(ies, mgw_fsm_priv->rab_id);
|
|
|
|
if (rab_failed_at_hnb) {
|
context map: introduce RUA and SCCP FSMs to fix leaks
Refactor the entire RUA <-> SCCP connection-oriented message forwarding:
- conquer confusion about hnbgw_context_map release behavior, and
- eradicate SCCP connection leaks.
Finer points:
== Context map state ==
So far, we had a single context map state and some flags to keep track
of both the RUA and the SCCP connections. It was easy to miss connection
cleanup steps, especially on the SCCP side.
Instead, the two FSMs clearly define the RUA and SCCP conn states
separately, and each side takes care of its own release needs for all
possible scenarios.
- When both RUA and SCCP are released, the context map is discarded.
- A context map can stay around to wait for proper SCCP release, even if
the RUA side has lost the HNB connection.
- Completely drop the async "context mapper garbage collection", because
the FSMs clarify the release and free steps, synchronously.
- We still keep a (simplified) enum for global context map state, but
this is only used so that VTY reporting remains mostly unchanged.
== Context map cleanup confusion ==
The function context_map_hnb_released() was the general cleanup function
for a context map. Instead, add separate context_map_free().
== Free context maps separately from HNB ==
When a HNB releases, talloc_steal() the context maps out of the HNB
specific hnb_ctx, so that they are not freed along with the HNB state,
possibly leaving SCCP connections afloat.
(It is still nice to normally keep context maps as talloc children of
their respective hnb_ctx, so talloc reports show which belongs to
which.)
So far, context map handling found the global hnb_gw pointer via
map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time.
Instead, use a separate hnb_gw pointer in map->gw.
== RUA procedure codes vs. SCCP prims ==
So far, the RUA rx side composed SCCP prims to pass on:
RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx
That is a source of confusion: a RUA procedure code should not translate
1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts
below).
Instead, move SCCP prim composition over to the SCCP side, using FSM
events to forward:
RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx
+RANAP +RANAP +RANAP
RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx
+RANAP +RANAP +RANAP
Hence choose the correct prim according to the SCCP FSM state.
- in hnbgw_rua.c, use RUA procedure codes, not prim types.
- via the new FSM events' data args, pass msgb containing RANAP PDUs.
== Fix SCCP Release behavior ==
So far, the normal conn release behavior was
HNB HNBGW CN
| --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete
| | <--SCCP-RLC-------- | (no data)
Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2
'Connection release':
The MSC sends a SCCP released message. This message shall not contain
any user data field.
i.e.:
HNB HNBGW CN
| --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete
| | <--SCCP-Released---- | (no data)
| | ---SCCP-RLC--------> | (no data)
(Side note, the final SCCP Release Confirm step is taken care of
implicitly by libosmo-sigtran's sccp_scoc.c FSM.)
If the CN fails to respond with SCCP-Released, on new X31 timeout,
osmo-hnbgw will send an SCCP Released to the CN as fallback.
== Memory model for message dispatch ==
So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP
handling code, and the final dispatch freed it. Every error path had to
take care not to leak any oph.
Instead, use a much easier and much more leakage proof memory model,
inspired by fixeria:
- on rx, dispatch RANAP msgb that live in OTC_SELECT.
- no code path needs to msgb_free() -- the msgb is discarded via
OTC_SELECT when handling is done, error or no error.
- any code path may also choose to store the msgb for async dispatch,
using talloc_steal(). The user plane mapping via MGW and UPF do that.
- if any code path does msgb_free(), that would be no problem either
(but none do so now, for simplicity).
== Layer separation ==
Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx
via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down()
directly.
== Memory model for decoded ranap_message IEs ==
Use a talloc destructor to make sure that the ranap_message IEs are
always implicitly freed upon talloc_free(), so that no code path can
possibly forget to do so.
== Implicit cleanup by talloc ==
Use talloc scoping to remove a bunch of explicit cleanup code. For
example, make a chached message a talloc child of its handler:
talloc_steal(mgw_fsm_priv, message);
mgw_fsm_priv->ranap_rab_ass_req_message = message;
and later implicitly free 'message' by only freeing the handler:
talloc_free(mgw_fsm_priv)
Related: SYS#6297
Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
|
|
|
struct msgb *msg;
|
|
|
|
|
2022-01-06 16:05:08 +00:00
|
|
|
LOGPFSML(fi, LOGL_ERROR,
|
|
|
|
"The RAB-AssignmentResponse contains a RAB-FailedList, RAB-Assignment (%u) failed.\n",
|
|
|
|
mgw_fsm_priv->rab_id);
|
|
|
|
|
|
|
|
/* Forward the RAB-AssignmentResponse transparently. This will ensure that the MSC is informed
|
|
|
|
* about the problem. */
|
|
|
|
LOGPFSML(fi, LOGL_DEBUG, "forwarding unmodified RAB-AssignmentResponse to MSC\n");
|
context map: introduce RUA and SCCP FSMs to fix leaks
Refactor the entire RUA <-> SCCP connection-oriented message forwarding:
- conquer confusion about hnbgw_context_map release behavior, and
- eradicate SCCP connection leaks.
Finer points:
== Context map state ==
So far, we had a single context map state and some flags to keep track
of both the RUA and the SCCP connections. It was easy to miss connection
cleanup steps, especially on the SCCP side.
Instead, the two FSMs clearly define the RUA and SCCP conn states
separately, and each side takes care of its own release needs for all
possible scenarios.
- When both RUA and SCCP are released, the context map is discarded.
- A context map can stay around to wait for proper SCCP release, even if
the RUA side has lost the HNB connection.
- Completely drop the async "context mapper garbage collection", because
the FSMs clarify the release and free steps, synchronously.
- We still keep a (simplified) enum for global context map state, but
this is only used so that VTY reporting remains mostly unchanged.
== Context map cleanup confusion ==
The function context_map_hnb_released() was the general cleanup function
for a context map. Instead, add separate context_map_free().
== Free context maps separately from HNB ==
When a HNB releases, talloc_steal() the context maps out of the HNB
specific hnb_ctx, so that they are not freed along with the HNB state,
possibly leaving SCCP connections afloat.
(It is still nice to normally keep context maps as talloc children of
their respective hnb_ctx, so talloc reports show which belongs to
which.)
So far, context map handling found the global hnb_gw pointer via
map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time.
Instead, use a separate hnb_gw pointer in map->gw.
== RUA procedure codes vs. SCCP prims ==
So far, the RUA rx side composed SCCP prims to pass on:
RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx
That is a source of confusion: a RUA procedure code should not translate
1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts
below).
Instead, move SCCP prim composition over to the SCCP side, using FSM
events to forward:
RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx
+RANAP +RANAP +RANAP
RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx
+RANAP +RANAP +RANAP
Hence choose the correct prim according to the SCCP FSM state.
- in hnbgw_rua.c, use RUA procedure codes, not prim types.
- via the new FSM events' data args, pass msgb containing RANAP PDUs.
== Fix SCCP Release behavior ==
So far, the normal conn release behavior was
HNB HNBGW CN
| --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete
| | <--SCCP-RLC-------- | (no data)
Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2
'Connection release':
The MSC sends a SCCP released message. This message shall not contain
any user data field.
i.e.:
HNB HNBGW CN
| --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete
| | <--SCCP-Released---- | (no data)
| | ---SCCP-RLC--------> | (no data)
(Side note, the final SCCP Release Confirm step is taken care of
implicitly by libosmo-sigtran's sccp_scoc.c FSM.)
If the CN fails to respond with SCCP-Released, on new X31 timeout,
osmo-hnbgw will send an SCCP Released to the CN as fallback.
== Memory model for message dispatch ==
So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP
handling code, and the final dispatch freed it. Every error path had to
take care not to leak any oph.
Instead, use a much easier and much more leakage proof memory model,
inspired by fixeria:
- on rx, dispatch RANAP msgb that live in OTC_SELECT.
- no code path needs to msgb_free() -- the msgb is discarded via
OTC_SELECT when handling is done, error or no error.
- any code path may also choose to store the msgb for async dispatch,
using talloc_steal(). The user plane mapping via MGW and UPF do that.
- if any code path does msgb_free(), that would be no problem either
(but none do so now, for simplicity).
== Layer separation ==
Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx
via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down()
directly.
== Memory model for decoded ranap_message IEs ==
Use a talloc destructor to make sure that the ranap_message IEs are
always implicitly freed upon talloc_free(), so that no code path can
possibly forget to do so.
== Implicit cleanup by talloc ==
Use talloc scoping to remove a bunch of explicit cleanup code. For
example, make a chached message a talloc child of its handler:
talloc_steal(mgw_fsm_priv, message);
mgw_fsm_priv->ranap_rab_ass_req_message = message;
and later implicitly free 'message' by only freeing the handler:
talloc_free(mgw_fsm_priv)
Related: SYS#6297
Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
|
|
|
|
|
|
|
msg = mgw_fsm_priv->ranap_rab_ass_resp_msgb;
|
|
|
|
mgw_fsm_priv->ranap_rab_ass_resp_msgb = NULL;
|
|
|
|
talloc_steal(OTC_SELECT, msg);
|
|
|
|
|
|
|
|
rc = map_sccp_dispatch(map, MAP_SCCP_EV_TX_DATA_REQUEST, msg);
|
2022-01-06 16:05:08 +00:00
|
|
|
if (rc < 0) {
|
|
|
|
LOGPFSML(fi, LOGL_DEBUG, "failed to forward RAB-AssignmentResponse message\n");
|
|
|
|
osmo_fsm_inst_state_chg(fi, MGW_ST_FAILURE, 0, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Even though this is a failure situation, we still release normally as the error is located
|
|
|
|
* at the HNB. */
|
|
|
|
osmo_fsm_inst_state_chg(fi, MGW_ST_RELEASE, 0, 0);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* The RAB-ID we are dealing with is not on an FailedList and we were unable to parse the response
|
|
|
|
* normally. This is a situation we cannot recover from. */
|
|
|
|
LOGPFSML(fi, LOGL_ERROR, "Failed to extract RTP IP-address and Port from RAB-AssignmentResponse\n");
|
|
|
|
osmo_fsm_inst_state_chg(fi, MGW_ST_FAILURE, 0, 0);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
rc = osmo_sockaddr_str_from_sockaddr(&addr_str, &addr.u.sas);
|
|
|
|
if (rc < 0) {
|
|
|
|
LOGPFSML(fi, LOGL_ERROR, "Invalid RTP IP-address or Port in RAB-AssignmentResponse\n");
|
|
|
|
osmo_fsm_inst_state_chg(fi, MGW_ST_FAILURE, 0, 0);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
osmo_strlcpy(mgw_info.addr, addr_str.ip, sizeof(mgw_info.addr));
|
|
|
|
mgw_info.port = addr_str.port;
|
|
|
|
|
|
|
|
osmo_mgcpc_ep_ci_request(mgw_fsm_priv->mgcpc_ep_ci_hnb, MGCP_VERB_MDCX, &mgw_info, fi, MGW_EV_MGCP_OK,
|
|
|
|
MGW_EV_MGCP_FAIL, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mgw_fsm_mdcx_hnb(struct osmo_fsm_inst *fi, uint32_t event, void *data)
|
|
|
|
{
|
|
|
|
struct mgw_fsm_priv *mgw_fsm_priv = fi->priv;
|
|
|
|
const struct mgcp_conn_peer *mgw_info;
|
|
|
|
|
|
|
|
switch (event) {
|
|
|
|
case MGW_EV_MGCP_OK:
|
|
|
|
mgw_info = osmo_mgcpc_ep_ci_get_rtp_info(mgw_fsm_priv->mgcpc_ep_ci_hnb);
|
|
|
|
if (!mgw_info) {
|
2022-06-14 15:50:46 +00:00
|
|
|
LOGPFSML(fi, LOGL_ERROR, "Got no RTP info response from MGW\n");
|
2022-01-06 16:05:08 +00:00
|
|
|
osmo_fsm_inst_state_chg(fi, MGW_ST_FAILURE, 0, 0);
|
|
|
|
return;
|
|
|
|
}
|
2022-06-14 16:39:01 +00:00
|
|
|
mgw_fsm_state_chg(fi, MGW_ST_CRCX_MSC);
|
2022-01-06 16:05:08 +00:00
|
|
|
return;
|
|
|
|
default:
|
|
|
|
OSMO_ASSERT(false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mgw_fsm_crcx_msc_onenter(struct osmo_fsm_inst *fi, uint32_t prev_state)
|
|
|
|
{
|
|
|
|
struct mgw_fsm_priv *mgw_fsm_priv = fi->priv;
|
|
|
|
struct hnbgw_context_map *map = mgw_fsm_priv->map;
|
|
|
|
struct mgcp_conn_peer mgw_info;
|
|
|
|
|
|
|
|
LOGPFSML(fi, LOGL_DEBUG, "creating MSC side call-leg on MGW...\n");
|
|
|
|
|
|
|
|
mgw_info = (struct mgcp_conn_peer) {
|
|
|
|
.call_id = (map->rua_ctx_id << 8) | mgw_fsm_priv->rab_id,
|
|
|
|
.ptime = 20,
|
|
|
|
.port = mgw_fsm_priv->msc_rtp_port,
|
|
|
|
};
|
|
|
|
|
|
|
|
osmo_strlcpy(mgw_info.addr, mgw_fsm_priv->msc_rtp_addr, sizeof(mgw_info.addr));
|
|
|
|
mgw_info.codecs[0] = CODEC_IUFP;
|
|
|
|
mgw_info.codecs_len = 1;
|
|
|
|
|
|
|
|
mgw_fsm_priv->mgcpc_ep_ci_msc = osmo_mgcpc_ep_ci_add(mgw_fsm_priv->mgcpc_ep, "to-MSC");
|
|
|
|
osmo_mgcpc_ep_ci_request(mgw_fsm_priv->mgcpc_ep_ci_msc, MGCP_VERB_CRCX, &mgw_info, fi, MGW_EV_MGCP_OK,
|
|
|
|
MGW_EV_MGCP_FAIL, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mgw_fsm_crcx_msc(struct osmo_fsm_inst *fi, uint32_t event, void *data)
|
|
|
|
{
|
|
|
|
struct mgw_fsm_priv *mgw_fsm_priv = fi->priv;
|
|
|
|
const struct mgcp_conn_peer *mgw_info;
|
|
|
|
struct osmo_sockaddr addr;
|
|
|
|
struct osmo_sockaddr_str addr_str;
|
|
|
|
int rc;
|
|
|
|
int msg_max_len;
|
|
|
|
RANAP_RAB_AssignmentResponseIEs_t *ies;
|
|
|
|
|
|
|
|
switch (event) {
|
|
|
|
case MGW_EV_MGCP_OK:
|
|
|
|
ies = &mgw_fsm_priv->ranap_rab_ass_resp_message->msg.raB_AssignmentResponseIEs;
|
|
|
|
|
|
|
|
mgw_info = osmo_mgcpc_ep_ci_get_rtp_info(mgw_fsm_priv->mgcpc_ep_ci_msc);
|
|
|
|
if (!mgw_info) {
|
|
|
|
LOGPFSML(fi, LOGL_ERROR, "Got no response from MGW\n");
|
|
|
|
osmo_fsm_inst_state_chg(fi, MGW_ST_FAILURE, 0, 0);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Replace RTP IP-Address/Port in ranap message container */
|
|
|
|
if (strchr(mgw_info->addr, '.'))
|
|
|
|
addr_str.af = AF_INET;
|
|
|
|
else
|
|
|
|
addr_str.af = AF_INET6;
|
|
|
|
addr_str.port = mgw_info->port;
|
|
|
|
osmo_strlcpy(addr_str.ip, mgw_info->addr, sizeof(addr_str.ip));
|
|
|
|
rc = osmo_sockaddr_str_to_sockaddr(&addr_str, &addr.u.sas);
|
|
|
|
if (rc < 0) {
|
|
|
|
LOGPFSML(fi, LOGL_ERROR,
|
|
|
|
"Failed to convert RTP IP-address (%s) and Port (%u) to its binary representation\n",
|
|
|
|
mgw_info->addr, mgw_info->port);
|
|
|
|
osmo_fsm_inst_state_chg(fi, MGW_ST_FAILURE, 0, 0);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
rc = ranap_rab_ass_resp_ies_replace_inet_addr(ies, &addr, mgw_fsm_priv->rab_id);
|
|
|
|
if (rc < 0) {
|
|
|
|
LOGPFSML(fi, LOGL_ERROR,
|
|
|
|
"Failed to replace RTP IP-address (%s) and Port (%u) in RAB-AssignmentResponse\n",
|
|
|
|
mgw_info->addr, mgw_info->port);
|
|
|
|
osmo_fsm_inst_state_chg(fi, MGW_ST_FAILURE, 0, 0);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* When the modified ranap message container is re-encoded, the resulting message might be larger then
|
|
|
|
* the original message. Ensure that there is enough room in l2h to grow. (The current implementation
|
|
|
|
* should yield a message with the same size, but there is no guarantee for that) */
|
|
|
|
msg_max_len =
|
context map: introduce RUA and SCCP FSMs to fix leaks
Refactor the entire RUA <-> SCCP connection-oriented message forwarding:
- conquer confusion about hnbgw_context_map release behavior, and
- eradicate SCCP connection leaks.
Finer points:
== Context map state ==
So far, we had a single context map state and some flags to keep track
of both the RUA and the SCCP connections. It was easy to miss connection
cleanup steps, especially on the SCCP side.
Instead, the two FSMs clearly define the RUA and SCCP conn states
separately, and each side takes care of its own release needs for all
possible scenarios.
- When both RUA and SCCP are released, the context map is discarded.
- A context map can stay around to wait for proper SCCP release, even if
the RUA side has lost the HNB connection.
- Completely drop the async "context mapper garbage collection", because
the FSMs clarify the release and free steps, synchronously.
- We still keep a (simplified) enum for global context map state, but
this is only used so that VTY reporting remains mostly unchanged.
== Context map cleanup confusion ==
The function context_map_hnb_released() was the general cleanup function
for a context map. Instead, add separate context_map_free().
== Free context maps separately from HNB ==
When a HNB releases, talloc_steal() the context maps out of the HNB
specific hnb_ctx, so that they are not freed along with the HNB state,
possibly leaving SCCP connections afloat.
(It is still nice to normally keep context maps as talloc children of
their respective hnb_ctx, so talloc reports show which belongs to
which.)
So far, context map handling found the global hnb_gw pointer via
map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time.
Instead, use a separate hnb_gw pointer in map->gw.
== RUA procedure codes vs. SCCP prims ==
So far, the RUA rx side composed SCCP prims to pass on:
RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx
That is a source of confusion: a RUA procedure code should not translate
1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts
below).
Instead, move SCCP prim composition over to the SCCP side, using FSM
events to forward:
RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx
+RANAP +RANAP +RANAP
RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx
+RANAP +RANAP +RANAP
Hence choose the correct prim according to the SCCP FSM state.
- in hnbgw_rua.c, use RUA procedure codes, not prim types.
- via the new FSM events' data args, pass msgb containing RANAP PDUs.
== Fix SCCP Release behavior ==
So far, the normal conn release behavior was
HNB HNBGW CN
| --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete
| | <--SCCP-RLC-------- | (no data)
Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2
'Connection release':
The MSC sends a SCCP released message. This message shall not contain
any user data field.
i.e.:
HNB HNBGW CN
| --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete
| | <--SCCP-Released---- | (no data)
| | ---SCCP-RLC--------> | (no data)
(Side note, the final SCCP Release Confirm step is taken care of
implicitly by libosmo-sigtran's sccp_scoc.c FSM.)
If the CN fails to respond with SCCP-Released, on new X31 timeout,
osmo-hnbgw will send an SCCP Released to the CN as fallback.
== Memory model for message dispatch ==
So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP
handling code, and the final dispatch freed it. Every error path had to
take care not to leak any oph.
Instead, use a much easier and much more leakage proof memory model,
inspired by fixeria:
- on rx, dispatch RANAP msgb that live in OTC_SELECT.
- no code path needs to msgb_free() -- the msgb is discarded via
OTC_SELECT when handling is done, error or no error.
- any code path may also choose to store the msgb for async dispatch,
using talloc_steal(). The user plane mapping via MGW and UPF do that.
- if any code path does msgb_free(), that would be no problem either
(but none do so now, for simplicity).
== Layer separation ==
Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx
via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down()
directly.
== Memory model for decoded ranap_message IEs ==
Use a talloc destructor to make sure that the ranap_message IEs are
always implicitly freed upon talloc_free(), so that no code path can
possibly forget to do so.
== Implicit cleanup by talloc ==
Use talloc scoping to remove a bunch of explicit cleanup code. For
example, make a chached message a talloc child of its handler:
talloc_steal(mgw_fsm_priv, message);
mgw_fsm_priv->ranap_rab_ass_req_message = message;
and later implicitly free 'message' by only freeing the handler:
talloc_free(mgw_fsm_priv)
Related: SYS#6297
Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
|
|
|
msgb_l2len(mgw_fsm_priv->ranap_rab_ass_resp_msgb) +
|
|
|
|
msgb_tailroom(mgw_fsm_priv->ranap_rab_ass_resp_msgb);
|
|
|
|
rc = msgb_resize_area(mgw_fsm_priv->ranap_rab_ass_resp_msgb,
|
|
|
|
mgw_fsm_priv->ranap_rab_ass_resp_msgb->l2h,
|
|
|
|
msgb_l2len(mgw_fsm_priv->ranap_rab_ass_resp_msgb), msg_max_len);
|
2022-01-06 16:05:08 +00:00
|
|
|
OSMO_ASSERT(rc == 0);
|
|
|
|
|
context map: introduce RUA and SCCP FSMs to fix leaks
Refactor the entire RUA <-> SCCP connection-oriented message forwarding:
- conquer confusion about hnbgw_context_map release behavior, and
- eradicate SCCP connection leaks.
Finer points:
== Context map state ==
So far, we had a single context map state and some flags to keep track
of both the RUA and the SCCP connections. It was easy to miss connection
cleanup steps, especially on the SCCP side.
Instead, the two FSMs clearly define the RUA and SCCP conn states
separately, and each side takes care of its own release needs for all
possible scenarios.
- When both RUA and SCCP are released, the context map is discarded.
- A context map can stay around to wait for proper SCCP release, even if
the RUA side has lost the HNB connection.
- Completely drop the async "context mapper garbage collection", because
the FSMs clarify the release and free steps, synchronously.
- We still keep a (simplified) enum for global context map state, but
this is only used so that VTY reporting remains mostly unchanged.
== Context map cleanup confusion ==
The function context_map_hnb_released() was the general cleanup function
for a context map. Instead, add separate context_map_free().
== Free context maps separately from HNB ==
When a HNB releases, talloc_steal() the context maps out of the HNB
specific hnb_ctx, so that they are not freed along with the HNB state,
possibly leaving SCCP connections afloat.
(It is still nice to normally keep context maps as talloc children of
their respective hnb_ctx, so talloc reports show which belongs to
which.)
So far, context map handling found the global hnb_gw pointer via
map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time.
Instead, use a separate hnb_gw pointer in map->gw.
== RUA procedure codes vs. SCCP prims ==
So far, the RUA rx side composed SCCP prims to pass on:
RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx
That is a source of confusion: a RUA procedure code should not translate
1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts
below).
Instead, move SCCP prim composition over to the SCCP side, using FSM
events to forward:
RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx
+RANAP +RANAP +RANAP
RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx
+RANAP +RANAP +RANAP
Hence choose the correct prim according to the SCCP FSM state.
- in hnbgw_rua.c, use RUA procedure codes, not prim types.
- via the new FSM events' data args, pass msgb containing RANAP PDUs.
== Fix SCCP Release behavior ==
So far, the normal conn release behavior was
HNB HNBGW CN
| --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete
| | <--SCCP-RLC-------- | (no data)
Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2
'Connection release':
The MSC sends a SCCP released message. This message shall not contain
any user data field.
i.e.:
HNB HNBGW CN
| --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete
| | <--SCCP-Released---- | (no data)
| | ---SCCP-RLC--------> | (no data)
(Side note, the final SCCP Release Confirm step is taken care of
implicitly by libosmo-sigtran's sccp_scoc.c FSM.)
If the CN fails to respond with SCCP-Released, on new X31 timeout,
osmo-hnbgw will send an SCCP Released to the CN as fallback.
== Memory model for message dispatch ==
So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP
handling code, and the final dispatch freed it. Every error path had to
take care not to leak any oph.
Instead, use a much easier and much more leakage proof memory model,
inspired by fixeria:
- on rx, dispatch RANAP msgb that live in OTC_SELECT.
- no code path needs to msgb_free() -- the msgb is discarded via
OTC_SELECT when handling is done, error or no error.
- any code path may also choose to store the msgb for async dispatch,
using talloc_steal(). The user plane mapping via MGW and UPF do that.
- if any code path does msgb_free(), that would be no problem either
(but none do so now, for simplicity).
== Layer separation ==
Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx
via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down()
directly.
== Memory model for decoded ranap_message IEs ==
Use a talloc destructor to make sure that the ranap_message IEs are
always implicitly freed upon talloc_free(), so that no code path can
possibly forget to do so.
== Implicit cleanup by talloc ==
Use talloc scoping to remove a bunch of explicit cleanup code. For
example, make a chached message a talloc child of its handler:
talloc_steal(mgw_fsm_priv, message);
mgw_fsm_priv->ranap_rab_ass_req_message = message;
and later implicitly free 'message' by only freeing the handler:
talloc_free(mgw_fsm_priv)
Related: SYS#6297
Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
|
|
|
rc = ranap_rab_ass_resp_encode(msgb_l2(mgw_fsm_priv->ranap_rab_ass_resp_msgb),
|
|
|
|
msgb_l2len(mgw_fsm_priv->ranap_rab_ass_resp_msgb), ies);
|
2022-01-06 16:05:08 +00:00
|
|
|
if (rc < 0) {
|
|
|
|
LOGPFSML(fi, LOGL_ERROR, "failed to re-encode RAB-AssignmentResponse message\n");
|
|
|
|
osmo_fsm_inst_state_chg(fi, MGW_ST_FAILURE, 0, 0);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Resize l2h back to the actual message length */
|
context map: introduce RUA and SCCP FSMs to fix leaks
Refactor the entire RUA <-> SCCP connection-oriented message forwarding:
- conquer confusion about hnbgw_context_map release behavior, and
- eradicate SCCP connection leaks.
Finer points:
== Context map state ==
So far, we had a single context map state and some flags to keep track
of both the RUA and the SCCP connections. It was easy to miss connection
cleanup steps, especially on the SCCP side.
Instead, the two FSMs clearly define the RUA and SCCP conn states
separately, and each side takes care of its own release needs for all
possible scenarios.
- When both RUA and SCCP are released, the context map is discarded.
- A context map can stay around to wait for proper SCCP release, even if
the RUA side has lost the HNB connection.
- Completely drop the async "context mapper garbage collection", because
the FSMs clarify the release and free steps, synchronously.
- We still keep a (simplified) enum for global context map state, but
this is only used so that VTY reporting remains mostly unchanged.
== Context map cleanup confusion ==
The function context_map_hnb_released() was the general cleanup function
for a context map. Instead, add separate context_map_free().
== Free context maps separately from HNB ==
When a HNB releases, talloc_steal() the context maps out of the HNB
specific hnb_ctx, so that they are not freed along with the HNB state,
possibly leaving SCCP connections afloat.
(It is still nice to normally keep context maps as talloc children of
their respective hnb_ctx, so talloc reports show which belongs to
which.)
So far, context map handling found the global hnb_gw pointer via
map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time.
Instead, use a separate hnb_gw pointer in map->gw.
== RUA procedure codes vs. SCCP prims ==
So far, the RUA rx side composed SCCP prims to pass on:
RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx
That is a source of confusion: a RUA procedure code should not translate
1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts
below).
Instead, move SCCP prim composition over to the SCCP side, using FSM
events to forward:
RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx
+RANAP +RANAP +RANAP
RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx
+RANAP +RANAP +RANAP
Hence choose the correct prim according to the SCCP FSM state.
- in hnbgw_rua.c, use RUA procedure codes, not prim types.
- via the new FSM events' data args, pass msgb containing RANAP PDUs.
== Fix SCCP Release behavior ==
So far, the normal conn release behavior was
HNB HNBGW CN
| --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete
| | <--SCCP-RLC-------- | (no data)
Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2
'Connection release':
The MSC sends a SCCP released message. This message shall not contain
any user data field.
i.e.:
HNB HNBGW CN
| --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete
| | <--SCCP-Released---- | (no data)
| | ---SCCP-RLC--------> | (no data)
(Side note, the final SCCP Release Confirm step is taken care of
implicitly by libosmo-sigtran's sccp_scoc.c FSM.)
If the CN fails to respond with SCCP-Released, on new X31 timeout,
osmo-hnbgw will send an SCCP Released to the CN as fallback.
== Memory model for message dispatch ==
So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP
handling code, and the final dispatch freed it. Every error path had to
take care not to leak any oph.
Instead, use a much easier and much more leakage proof memory model,
inspired by fixeria:
- on rx, dispatch RANAP msgb that live in OTC_SELECT.
- no code path needs to msgb_free() -- the msgb is discarded via
OTC_SELECT when handling is done, error or no error.
- any code path may also choose to store the msgb for async dispatch,
using talloc_steal(). The user plane mapping via MGW and UPF do that.
- if any code path does msgb_free(), that would be no problem either
(but none do so now, for simplicity).
== Layer separation ==
Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx
via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down()
directly.
== Memory model for decoded ranap_message IEs ==
Use a talloc destructor to make sure that the ranap_message IEs are
always implicitly freed upon talloc_free(), so that no code path can
possibly forget to do so.
== Implicit cleanup by talloc ==
Use talloc scoping to remove a bunch of explicit cleanup code. For
example, make a chached message a talloc child of its handler:
talloc_steal(mgw_fsm_priv, message);
mgw_fsm_priv->ranap_rab_ass_req_message = message;
and later implicitly free 'message' by only freeing the handler:
talloc_free(mgw_fsm_priv)
Related: SYS#6297
Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
|
|
|
rc = msgb_resize_area(mgw_fsm_priv->ranap_rab_ass_resp_msgb,
|
|
|
|
mgw_fsm_priv->ranap_rab_ass_resp_msgb->l2h,
|
|
|
|
msgb_l2len(mgw_fsm_priv->ranap_rab_ass_resp_msgb), rc);
|
2022-01-06 16:05:08 +00:00
|
|
|
OSMO_ASSERT(rc == 0);
|
|
|
|
|
|
|
|
/* When the established state is entered, the modified RAB AssignmentResponse is forwarded to the MSC.
|
|
|
|
* The call is then established any way may stay for an indefinate amount of time in this state until
|
|
|
|
* there is an IU Release happening. */
|
|
|
|
osmo_fsm_inst_state_chg(fi, MGW_ST_ESTABLISHED, 0, 0);
|
|
|
|
return;
|
|
|
|
default:
|
|
|
|
OSMO_ASSERT(false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mgw_fsm_established_onenter(struct osmo_fsm_inst *fi, uint32_t prev_state)
|
|
|
|
{
|
|
|
|
struct mgw_fsm_priv *mgw_fsm_priv = fi->priv;
|
|
|
|
struct hnbgw_context_map *map = mgw_fsm_priv->map;
|
context map: introduce RUA and SCCP FSMs to fix leaks
Refactor the entire RUA <-> SCCP connection-oriented message forwarding:
- conquer confusion about hnbgw_context_map release behavior, and
- eradicate SCCP connection leaks.
Finer points:
== Context map state ==
So far, we had a single context map state and some flags to keep track
of both the RUA and the SCCP connections. It was easy to miss connection
cleanup steps, especially on the SCCP side.
Instead, the two FSMs clearly define the RUA and SCCP conn states
separately, and each side takes care of its own release needs for all
possible scenarios.
- When both RUA and SCCP are released, the context map is discarded.
- A context map can stay around to wait for proper SCCP release, even if
the RUA side has lost the HNB connection.
- Completely drop the async "context mapper garbage collection", because
the FSMs clarify the release and free steps, synchronously.
- We still keep a (simplified) enum for global context map state, but
this is only used so that VTY reporting remains mostly unchanged.
== Context map cleanup confusion ==
The function context_map_hnb_released() was the general cleanup function
for a context map. Instead, add separate context_map_free().
== Free context maps separately from HNB ==
When a HNB releases, talloc_steal() the context maps out of the HNB
specific hnb_ctx, so that they are not freed along with the HNB state,
possibly leaving SCCP connections afloat.
(It is still nice to normally keep context maps as talloc children of
their respective hnb_ctx, so talloc reports show which belongs to
which.)
So far, context map handling found the global hnb_gw pointer via
map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time.
Instead, use a separate hnb_gw pointer in map->gw.
== RUA procedure codes vs. SCCP prims ==
So far, the RUA rx side composed SCCP prims to pass on:
RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx
That is a source of confusion: a RUA procedure code should not translate
1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts
below).
Instead, move SCCP prim composition over to the SCCP side, using FSM
events to forward:
RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx
+RANAP +RANAP +RANAP
RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx
+RANAP +RANAP +RANAP
Hence choose the correct prim according to the SCCP FSM state.
- in hnbgw_rua.c, use RUA procedure codes, not prim types.
- via the new FSM events' data args, pass msgb containing RANAP PDUs.
== Fix SCCP Release behavior ==
So far, the normal conn release behavior was
HNB HNBGW CN
| --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete
| | <--SCCP-RLC-------- | (no data)
Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2
'Connection release':
The MSC sends a SCCP released message. This message shall not contain
any user data field.
i.e.:
HNB HNBGW CN
| --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete
| | <--SCCP-Released---- | (no data)
| | ---SCCP-RLC--------> | (no data)
(Side note, the final SCCP Release Confirm step is taken care of
implicitly by libosmo-sigtran's sccp_scoc.c FSM.)
If the CN fails to respond with SCCP-Released, on new X31 timeout,
osmo-hnbgw will send an SCCP Released to the CN as fallback.
== Memory model for message dispatch ==
So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP
handling code, and the final dispatch freed it. Every error path had to
take care not to leak any oph.
Instead, use a much easier and much more leakage proof memory model,
inspired by fixeria:
- on rx, dispatch RANAP msgb that live in OTC_SELECT.
- no code path needs to msgb_free() -- the msgb is discarded via
OTC_SELECT when handling is done, error or no error.
- any code path may also choose to store the msgb for async dispatch,
using talloc_steal(). The user plane mapping via MGW and UPF do that.
- if any code path does msgb_free(), that would be no problem either
(but none do so now, for simplicity).
== Layer separation ==
Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx
via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down()
directly.
== Memory model for decoded ranap_message IEs ==
Use a talloc destructor to make sure that the ranap_message IEs are
always implicitly freed upon talloc_free(), so that no code path can
possibly forget to do so.
== Implicit cleanup by talloc ==
Use talloc scoping to remove a bunch of explicit cleanup code. For
example, make a chached message a talloc child of its handler:
talloc_steal(mgw_fsm_priv, message);
mgw_fsm_priv->ranap_rab_ass_req_message = message;
and later implicitly free 'message' by only freeing the handler:
talloc_free(mgw_fsm_priv)
Related: SYS#6297
Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
|
|
|
struct msgb *ranap_msg;
|
2022-01-06 16:05:08 +00:00
|
|
|
int rc;
|
|
|
|
|
|
|
|
LOGPFSML(fi, LOGL_DEBUG, "forwarding modified RAB-AssignmentResponse to MSC\n");
|
context map: introduce RUA and SCCP FSMs to fix leaks
Refactor the entire RUA <-> SCCP connection-oriented message forwarding:
- conquer confusion about hnbgw_context_map release behavior, and
- eradicate SCCP connection leaks.
Finer points:
== Context map state ==
So far, we had a single context map state and some flags to keep track
of both the RUA and the SCCP connections. It was easy to miss connection
cleanup steps, especially on the SCCP side.
Instead, the two FSMs clearly define the RUA and SCCP conn states
separately, and each side takes care of its own release needs for all
possible scenarios.
- When both RUA and SCCP are released, the context map is discarded.
- A context map can stay around to wait for proper SCCP release, even if
the RUA side has lost the HNB connection.
- Completely drop the async "context mapper garbage collection", because
the FSMs clarify the release and free steps, synchronously.
- We still keep a (simplified) enum for global context map state, but
this is only used so that VTY reporting remains mostly unchanged.
== Context map cleanup confusion ==
The function context_map_hnb_released() was the general cleanup function
for a context map. Instead, add separate context_map_free().
== Free context maps separately from HNB ==
When a HNB releases, talloc_steal() the context maps out of the HNB
specific hnb_ctx, so that they are not freed along with the HNB state,
possibly leaving SCCP connections afloat.
(It is still nice to normally keep context maps as talloc children of
their respective hnb_ctx, so talloc reports show which belongs to
which.)
So far, context map handling found the global hnb_gw pointer via
map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time.
Instead, use a separate hnb_gw pointer in map->gw.
== RUA procedure codes vs. SCCP prims ==
So far, the RUA rx side composed SCCP prims to pass on:
RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx
That is a source of confusion: a RUA procedure code should not translate
1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts
below).
Instead, move SCCP prim composition over to the SCCP side, using FSM
events to forward:
RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx
+RANAP +RANAP +RANAP
RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx
+RANAP +RANAP +RANAP
Hence choose the correct prim according to the SCCP FSM state.
- in hnbgw_rua.c, use RUA procedure codes, not prim types.
- via the new FSM events' data args, pass msgb containing RANAP PDUs.
== Fix SCCP Release behavior ==
So far, the normal conn release behavior was
HNB HNBGW CN
| --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete
| | <--SCCP-RLC-------- | (no data)
Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2
'Connection release':
The MSC sends a SCCP released message. This message shall not contain
any user data field.
i.e.:
HNB HNBGW CN
| --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete
| | <--SCCP-Released---- | (no data)
| | ---SCCP-RLC--------> | (no data)
(Side note, the final SCCP Release Confirm step is taken care of
implicitly by libosmo-sigtran's sccp_scoc.c FSM.)
If the CN fails to respond with SCCP-Released, on new X31 timeout,
osmo-hnbgw will send an SCCP Released to the CN as fallback.
== Memory model for message dispatch ==
So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP
handling code, and the final dispatch freed it. Every error path had to
take care not to leak any oph.
Instead, use a much easier and much more leakage proof memory model,
inspired by fixeria:
- on rx, dispatch RANAP msgb that live in OTC_SELECT.
- no code path needs to msgb_free() -- the msgb is discarded via
OTC_SELECT when handling is done, error or no error.
- any code path may also choose to store the msgb for async dispatch,
using talloc_steal(). The user plane mapping via MGW and UPF do that.
- if any code path does msgb_free(), that would be no problem either
(but none do so now, for simplicity).
== Layer separation ==
Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx
via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down()
directly.
== Memory model for decoded ranap_message IEs ==
Use a talloc destructor to make sure that the ranap_message IEs are
always implicitly freed upon talloc_free(), so that no code path can
possibly forget to do so.
== Implicit cleanup by talloc ==
Use talloc scoping to remove a bunch of explicit cleanup code. For
example, make a chached message a talloc child of its handler:
talloc_steal(mgw_fsm_priv, message);
mgw_fsm_priv->ranap_rab_ass_req_message = message;
and later implicitly free 'message' by only freeing the handler:
talloc_free(mgw_fsm_priv)
Related: SYS#6297
Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
|
|
|
|
|
|
|
ranap_msg = mgw_fsm_priv->ranap_rab_ass_resp_msgb;
|
|
|
|
mgw_fsm_priv->ranap_rab_ass_resp_msgb = NULL;
|
|
|
|
talloc_steal(OTC_SELECT, ranap_msg);
|
|
|
|
|
|
|
|
rc = map_sccp_dispatch(map, MAP_SCCP_EV_TX_DATA_REQUEST, ranap_msg);
|
2022-01-06 16:05:08 +00:00
|
|
|
if (rc < 0) {
|
|
|
|
LOGPFSML(fi, LOGL_DEBUG, "failed to forward RAB-AssignmentResponse message\n");
|
|
|
|
osmo_fsm_inst_state_chg(fi, MGW_ST_FAILURE, 0, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
LOGPFSML(fi, LOGL_DEBUG, "HNB and MSC side call-legs completed!\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mgw_fsm_release_onenter(struct osmo_fsm_inst *fi, uint32_t prev_state)
|
|
|
|
{
|
|
|
|
osmo_fsm_inst_term(fi, OSMO_FSM_TERM_REGULAR, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mgw_fsm_failure_onenter(struct osmo_fsm_inst *fi, uint32_t prev_state)
|
|
|
|
{
|
|
|
|
struct mgw_fsm_priv *mgw_fsm_priv = fi->priv;
|
|
|
|
tx_release_req(mgw_fsm_priv->map);
|
|
|
|
osmo_fsm_inst_term(fi, OSMO_FSM_TERM_ERROR, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mgw_fsm_allstate_action(struct osmo_fsm_inst *fi, uint32_t event, void *data)
|
|
|
|
{
|
|
|
|
struct mgw_fsm_priv *mgw_fsm_priv = fi->priv;
|
|
|
|
|
|
|
|
switch (event) {
|
|
|
|
case MGW_EV_MGCP_TERM:
|
2022-10-19 12:19:22 +00:00
|
|
|
/* Put MGCP client back into MGW pool */
|
2023-01-17 22:27:03 +00:00
|
|
|
if (mgw_fsm_priv->mgcpc) {
|
|
|
|
mgcp_client_pool_put(mgw_fsm_priv->mgcpc);
|
|
|
|
mgw_fsm_priv->mgcpc = NULL;
|
|
|
|
}
|
2022-01-06 16:05:08 +00:00
|
|
|
mgw_fsm_priv->mgcpc_ep = NULL;
|
|
|
|
LOGPFSML(fi, LOGL_ERROR, "Media gateway failed\n");
|
|
|
|
osmo_fsm_inst_state_chg(fi, MGW_ST_FAILURE, 0, 0);
|
|
|
|
return;
|
|
|
|
case MGW_EV_MGCP_FAIL:
|
|
|
|
LOGPFSML(fi, LOGL_ERROR, "Media gateway failed to switch RTP streams\n");
|
|
|
|
osmo_fsm_inst_state_chg(fi, MGW_ST_FAILURE, 0, 0);
|
|
|
|
return;
|
|
|
|
case MGW_EV_RELEASE:
|
|
|
|
osmo_fsm_inst_state_chg(fi, MGW_ST_RELEASE, 0, 0);
|
|
|
|
return;
|
|
|
|
default:
|
|
|
|
OSMO_ASSERT(false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int mgw_fsm_timer_cb(struct osmo_fsm_inst *fi)
|
|
|
|
{
|
|
|
|
osmo_fsm_inst_term(fi, OSMO_FSM_TERM_ERROR, NULL);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-06-14 16:29:56 +00:00
|
|
|
static void mgw_fsm_cleanup(struct osmo_fsm_inst *fi, enum osmo_fsm_term_cause cause)
|
2022-01-06 16:05:08 +00:00
|
|
|
{
|
2022-06-14 16:29:56 +00:00
|
|
|
struct mgw_fsm_priv *mgw_fsm_priv = fi->priv;
|
2022-01-06 16:05:08 +00:00
|
|
|
talloc_free(mgw_fsm_priv);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mgw_fsm_pre_term(struct osmo_fsm_inst *fi, enum osmo_fsm_term_cause cause)
|
|
|
|
{
|
|
|
|
struct mgw_fsm_priv *mgw_fsm_priv = fi->priv;
|
|
|
|
struct hnbgw_context_map *map = mgw_fsm_priv->map;
|
|
|
|
|
|
|
|
if (mgw_fsm_priv->mgcpc_ep) {
|
2022-10-19 12:19:22 +00:00
|
|
|
/* Put MGCP client back into MGW pool */
|
|
|
|
struct mgcp_client *mgcp_client = osmo_mgcpc_ep_client(mgw_fsm_priv->mgcpc_ep);
|
|
|
|
mgcp_client_pool_put(mgcp_client);
|
|
|
|
|
2022-01-06 16:05:08 +00:00
|
|
|
osmo_mgcpc_ep_clear(mgw_fsm_priv->mgcpc_ep);
|
|
|
|
mgw_fsm_priv->mgcpc_ep = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Remove FSM from the context map. This will make this FSM unreachable for events coming from outside */
|
|
|
|
map->mgw_fi = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct osmo_fsm_state mgw_fsm_states[] = {
|
|
|
|
[MGW_ST_CRCX_HNB] = {
|
|
|
|
.name = "MGW_ST_CRCX_HNB",
|
|
|
|
.onenter = mgw_fsm_crcx_hnb_onenter,
|
|
|
|
.action = mgw_fsm_crcx_hnb,
|
|
|
|
.in_event_mask =
|
|
|
|
S(MGW_EV_MGCP_OK),
|
|
|
|
.out_state_mask =
|
|
|
|
S(MGW_ST_ASSIGN) |
|
|
|
|
S(MGW_ST_FAILURE) |
|
|
|
|
S(MGW_ST_RELEASE) |
|
|
|
|
S(MGW_ST_CRCX_HNB),
|
|
|
|
},
|
|
|
|
[MGW_ST_ASSIGN] = {
|
|
|
|
.name = "MGW_ST_ASSIGN",
|
|
|
|
.onenter = mgw_fsm_assign_onenter,
|
|
|
|
.action = mgw_fsm_assign,
|
|
|
|
.in_event_mask = S(MGW_EV_RAB_ASS_RESP),
|
|
|
|
.out_state_mask =
|
|
|
|
S(MGW_ST_MDCX_HNB) |
|
|
|
|
S(MGW_ST_FAILURE) |
|
|
|
|
S(MGW_ST_RELEASE),
|
|
|
|
},
|
|
|
|
[MGW_ST_MDCX_HNB] = {
|
|
|
|
.name = "MGW_ST_MDCX_HNB",
|
|
|
|
.onenter = mgw_fsm_mdcx_hnb_onenter,
|
|
|
|
.action = mgw_fsm_mdcx_hnb,
|
|
|
|
.in_event_mask =
|
|
|
|
S(MGW_EV_MGCP_OK),
|
|
|
|
.out_state_mask =
|
|
|
|
S(MGW_ST_CRCX_MSC) |
|
|
|
|
S(MGW_ST_FAILURE) |
|
|
|
|
S(MGW_ST_RELEASE),
|
|
|
|
},
|
|
|
|
[MGW_ST_CRCX_MSC] = {
|
|
|
|
.name = "MGW_ST_CRCX_MSC",
|
|
|
|
.onenter = mgw_fsm_crcx_msc_onenter,
|
|
|
|
.action = mgw_fsm_crcx_msc,
|
|
|
|
.in_event_mask =
|
|
|
|
S(MGW_EV_MGCP_OK),
|
|
|
|
.out_state_mask =
|
|
|
|
S(MGW_ST_ESTABLISHED) |
|
|
|
|
S(MGW_ST_FAILURE) |
|
|
|
|
S(MGW_ST_RELEASE),
|
|
|
|
},
|
|
|
|
[MGW_ST_ESTABLISHED] = {
|
|
|
|
.name = "MGW_ST_ESTABLISHED",
|
|
|
|
.onenter = mgw_fsm_established_onenter,
|
|
|
|
.in_event_mask = 0,
|
|
|
|
.out_state_mask =
|
|
|
|
S(MGW_ST_FAILURE) |
|
|
|
|
S(MGW_ST_RELEASE),
|
|
|
|
},
|
|
|
|
[MGW_ST_RELEASE] = {
|
|
|
|
.name = "MGW_ST_RELEASE",
|
|
|
|
.onenter = mgw_fsm_release_onenter,
|
|
|
|
.in_event_mask = 0,
|
|
|
|
.out_state_mask = 0,
|
|
|
|
},
|
|
|
|
[MGW_ST_FAILURE] = {
|
|
|
|
.name = "MGW_ST_FAILURE",
|
|
|
|
.onenter = mgw_fsm_failure_onenter,
|
|
|
|
.in_event_mask = 0,
|
|
|
|
.out_state_mask = 0,
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct osmo_fsm mgw_fsm = {
|
|
|
|
.name = "mgw",
|
|
|
|
.states = mgw_fsm_states,
|
|
|
|
.num_states = ARRAY_SIZE(mgw_fsm_states),
|
|
|
|
.log_subsys = DMGW,
|
|
|
|
.event_names = mgw_fsm_event_names,
|
|
|
|
.allstate_action = mgw_fsm_allstate_action,
|
|
|
|
.allstate_event_mask = S(MGW_EV_MGCP_TERM) | S(MGW_EV_RELEASE) | S(MGW_EV_MGCP_FAIL),
|
|
|
|
.timer_cb = mgw_fsm_timer_cb,
|
|
|
|
.cleanup = mgw_fsm_cleanup,
|
|
|
|
.pre_term = mgw_fsm_pre_term,
|
|
|
|
};
|
|
|
|
|
|
|
|
/* The MSC may ask to release a specific RAB within a RAB-AssignmentRequest */
|
2023-03-20 14:47:28 +00:00
|
|
|
static int release_mgw_fsm(struct hnbgw_context_map *map, struct msgb *ranap_msg)
|
2022-01-06 16:05:08 +00:00
|
|
|
{
|
|
|
|
struct osmo_fsm_inst *fi = map->mgw_fi;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
/* Forward the unmodifed RAB-AssignmentRequest to HNB, so that the HNB is informed about the RAB release as
|
|
|
|
* well */
|
|
|
|
LOGPFSML(fi, LOGL_DEBUG, "forwarding unmodified RAB-AssignmentRequest to HNB\n");
|
context map: introduce RUA and SCCP FSMs to fix leaks
Refactor the entire RUA <-> SCCP connection-oriented message forwarding:
- conquer confusion about hnbgw_context_map release behavior, and
- eradicate SCCP connection leaks.
Finer points:
== Context map state ==
So far, we had a single context map state and some flags to keep track
of both the RUA and the SCCP connections. It was easy to miss connection
cleanup steps, especially on the SCCP side.
Instead, the two FSMs clearly define the RUA and SCCP conn states
separately, and each side takes care of its own release needs for all
possible scenarios.
- When both RUA and SCCP are released, the context map is discarded.
- A context map can stay around to wait for proper SCCP release, even if
the RUA side has lost the HNB connection.
- Completely drop the async "context mapper garbage collection", because
the FSMs clarify the release and free steps, synchronously.
- We still keep a (simplified) enum for global context map state, but
this is only used so that VTY reporting remains mostly unchanged.
== Context map cleanup confusion ==
The function context_map_hnb_released() was the general cleanup function
for a context map. Instead, add separate context_map_free().
== Free context maps separately from HNB ==
When a HNB releases, talloc_steal() the context maps out of the HNB
specific hnb_ctx, so that they are not freed along with the HNB state,
possibly leaving SCCP connections afloat.
(It is still nice to normally keep context maps as talloc children of
their respective hnb_ctx, so talloc reports show which belongs to
which.)
So far, context map handling found the global hnb_gw pointer via
map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time.
Instead, use a separate hnb_gw pointer in map->gw.
== RUA procedure codes vs. SCCP prims ==
So far, the RUA rx side composed SCCP prims to pass on:
RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx
That is a source of confusion: a RUA procedure code should not translate
1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts
below).
Instead, move SCCP prim composition over to the SCCP side, using FSM
events to forward:
RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx
+RANAP +RANAP +RANAP
RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx
+RANAP +RANAP +RANAP
Hence choose the correct prim according to the SCCP FSM state.
- in hnbgw_rua.c, use RUA procedure codes, not prim types.
- via the new FSM events' data args, pass msgb containing RANAP PDUs.
== Fix SCCP Release behavior ==
So far, the normal conn release behavior was
HNB HNBGW CN
| --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete
| | <--SCCP-RLC-------- | (no data)
Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2
'Connection release':
The MSC sends a SCCP released message. This message shall not contain
any user data field.
i.e.:
HNB HNBGW CN
| --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete
| | <--SCCP-Released---- | (no data)
| | ---SCCP-RLC--------> | (no data)
(Side note, the final SCCP Release Confirm step is taken care of
implicitly by libosmo-sigtran's sccp_scoc.c FSM.)
If the CN fails to respond with SCCP-Released, on new X31 timeout,
osmo-hnbgw will send an SCCP Released to the CN as fallback.
== Memory model for message dispatch ==
So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP
handling code, and the final dispatch freed it. Every error path had to
take care not to leak any oph.
Instead, use a much easier and much more leakage proof memory model,
inspired by fixeria:
- on rx, dispatch RANAP msgb that live in OTC_SELECT.
- no code path needs to msgb_free() -- the msgb is discarded via
OTC_SELECT when handling is done, error or no error.
- any code path may also choose to store the msgb for async dispatch,
using talloc_steal(). The user plane mapping via MGW and UPF do that.
- if any code path does msgb_free(), that would be no problem either
(but none do so now, for simplicity).
== Layer separation ==
Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx
via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down()
directly.
== Memory model for decoded ranap_message IEs ==
Use a talloc destructor to make sure that the ranap_message IEs are
always implicitly freed upon talloc_free(), so that no code path can
possibly forget to do so.
== Implicit cleanup by talloc ==
Use talloc scoping to remove a bunch of explicit cleanup code. For
example, make a chached message a talloc child of its handler:
talloc_steal(mgw_fsm_priv, message);
mgw_fsm_priv->ranap_rab_ass_req_message = message;
and later implicitly free 'message' by only freeing the handler:
talloc_free(mgw_fsm_priv)
Related: SYS#6297
Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
|
|
|
rc = map_rua_dispatch(map, MAP_RUA_EV_TX_DIRECT_TRANSFER, ranap_msg);
|
2023-03-20 14:47:28 +00:00
|
|
|
if (rc < 0) {
|
|
|
|
LOGPFSML(fi, LOGL_DEBUG, "cannot forward RAB-AssignmentRequest to HNB\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2022-01-06 16:05:08 +00:00
|
|
|
|
|
|
|
/* Release the FSM normally */
|
|
|
|
osmo_fsm_inst_state_chg(fi, MGW_ST_RELEASE, 0, 0);
|
2023-03-20 14:47:28 +00:00
|
|
|
return 0;
|
|
|
|
}
|
2022-01-06 16:05:08 +00:00
|
|
|
|
2023-03-20 14:47:28 +00:00
|
|
|
/* Check if the message contains a RAB-ReleaseItem that matches the RAB-ID that is managed by the given context map */
|
|
|
|
static bool is_our_rab_release(struct hnbgw_context_map *map, ranap_message *message)
|
|
|
|
{
|
|
|
|
bool rab_release_req;
|
|
|
|
struct osmo_fsm_inst *fi = map->mgw_fi;
|
|
|
|
struct mgw_fsm_priv *mgw_fsm_priv = fi->priv;
|
|
|
|
|
|
|
|
/* Check if the RAB that is handled by this FSM is addressed by the release request */
|
|
|
|
rab_release_req = ranap_rab_ass_req_ies_check_release(&message->msg.raB_AssignmentRequestIEs,
|
|
|
|
mgw_fsm_priv->rab_id);
|
|
|
|
if (!rab_release_req) {
|
|
|
|
LOGPFSML(map->mgw_fi, LOGL_ERROR, "RAB-AssignmentRequest does not contain any RAB-RelaseItem with RAB-ID %u\n", mgw_fsm_priv->rab_id);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
LOGPFSML(map->mgw_fi, LOGL_NOTICE, "MSC asked to release RAB-ID %u\n", mgw_fsm_priv->rab_id);
|
|
|
|
|
|
|
|
return true;
|
2022-01-06 16:05:08 +00:00
|
|
|
}
|
|
|
|
|
context map: introduce RUA and SCCP FSMs to fix leaks
Refactor the entire RUA <-> SCCP connection-oriented message forwarding:
- conquer confusion about hnbgw_context_map release behavior, and
- eradicate SCCP connection leaks.
Finer points:
== Context map state ==
So far, we had a single context map state and some flags to keep track
of both the RUA and the SCCP connections. It was easy to miss connection
cleanup steps, especially on the SCCP side.
Instead, the two FSMs clearly define the RUA and SCCP conn states
separately, and each side takes care of its own release needs for all
possible scenarios.
- When both RUA and SCCP are released, the context map is discarded.
- A context map can stay around to wait for proper SCCP release, even if
the RUA side has lost the HNB connection.
- Completely drop the async "context mapper garbage collection", because
the FSMs clarify the release and free steps, synchronously.
- We still keep a (simplified) enum for global context map state, but
this is only used so that VTY reporting remains mostly unchanged.
== Context map cleanup confusion ==
The function context_map_hnb_released() was the general cleanup function
for a context map. Instead, add separate context_map_free().
== Free context maps separately from HNB ==
When a HNB releases, talloc_steal() the context maps out of the HNB
specific hnb_ctx, so that they are not freed along with the HNB state,
possibly leaving SCCP connections afloat.
(It is still nice to normally keep context maps as talloc children of
their respective hnb_ctx, so talloc reports show which belongs to
which.)
So far, context map handling found the global hnb_gw pointer via
map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time.
Instead, use a separate hnb_gw pointer in map->gw.
== RUA procedure codes vs. SCCP prims ==
So far, the RUA rx side composed SCCP prims to pass on:
RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx
That is a source of confusion: a RUA procedure code should not translate
1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts
below).
Instead, move SCCP prim composition over to the SCCP side, using FSM
events to forward:
RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx
+RANAP +RANAP +RANAP
RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx
+RANAP +RANAP +RANAP
Hence choose the correct prim according to the SCCP FSM state.
- in hnbgw_rua.c, use RUA procedure codes, not prim types.
- via the new FSM events' data args, pass msgb containing RANAP PDUs.
== Fix SCCP Release behavior ==
So far, the normal conn release behavior was
HNB HNBGW CN
| --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete
| | <--SCCP-RLC-------- | (no data)
Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2
'Connection release':
The MSC sends a SCCP released message. This message shall not contain
any user data field.
i.e.:
HNB HNBGW CN
| --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete
| | <--SCCP-Released---- | (no data)
| | ---SCCP-RLC--------> | (no data)
(Side note, the final SCCP Release Confirm step is taken care of
implicitly by libosmo-sigtran's sccp_scoc.c FSM.)
If the CN fails to respond with SCCP-Released, on new X31 timeout,
osmo-hnbgw will send an SCCP Released to the CN as fallback.
== Memory model for message dispatch ==
So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP
handling code, and the final dispatch freed it. Every error path had to
take care not to leak any oph.
Instead, use a much easier and much more leakage proof memory model,
inspired by fixeria:
- on rx, dispatch RANAP msgb that live in OTC_SELECT.
- no code path needs to msgb_free() -- the msgb is discarded via
OTC_SELECT when handling is done, error or no error.
- any code path may also choose to store the msgb for async dispatch,
using talloc_steal(). The user plane mapping via MGW and UPF do that.
- if any code path does msgb_free(), that would be no problem either
(but none do so now, for simplicity).
== Layer separation ==
Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx
via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down()
directly.
== Memory model for decoded ranap_message IEs ==
Use a talloc destructor to make sure that the ranap_message IEs are
always implicitly freed upon talloc_free(), so that no code path can
possibly forget to do so.
== Implicit cleanup by talloc ==
Use talloc scoping to remove a bunch of explicit cleanup code. For
example, make a chached message a talloc child of its handler:
talloc_steal(mgw_fsm_priv, message);
mgw_fsm_priv->ranap_rab_ass_req_message = message;
and later implicitly free 'message' by only freeing the handler:
talloc_free(mgw_fsm_priv)
Related: SYS#6297
Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
|
|
|
/*! Allocate MGW FSM and handle RANAP RAB AssignmentRequest.
|
|
|
|
* \param[in] map hnbgw context map that is responsible for this call.
|
|
|
|
* \param[in] ranap_msg msgb containing RANAP RAB AssignmentRequest at msgb_l2(), allocated in OTC_SELECT.
|
|
|
|
* This function may talloc_steal(ranap_msg) to keep it for later.
|
|
|
|
* \param[in] message decoded RANAP message container, allocated in OTC_SELECT.
|
|
|
|
* This function may talloc_steal(message) to keep it for later.
|
2022-01-06 16:05:08 +00:00
|
|
|
* \returns 0 on success; negative on error. */
|
context map: introduce RUA and SCCP FSMs to fix leaks
Refactor the entire RUA <-> SCCP connection-oriented message forwarding:
- conquer confusion about hnbgw_context_map release behavior, and
- eradicate SCCP connection leaks.
Finer points:
== Context map state ==
So far, we had a single context map state and some flags to keep track
of both the RUA and the SCCP connections. It was easy to miss connection
cleanup steps, especially on the SCCP side.
Instead, the two FSMs clearly define the RUA and SCCP conn states
separately, and each side takes care of its own release needs for all
possible scenarios.
- When both RUA and SCCP are released, the context map is discarded.
- A context map can stay around to wait for proper SCCP release, even if
the RUA side has lost the HNB connection.
- Completely drop the async "context mapper garbage collection", because
the FSMs clarify the release and free steps, synchronously.
- We still keep a (simplified) enum for global context map state, but
this is only used so that VTY reporting remains mostly unchanged.
== Context map cleanup confusion ==
The function context_map_hnb_released() was the general cleanup function
for a context map. Instead, add separate context_map_free().
== Free context maps separately from HNB ==
When a HNB releases, talloc_steal() the context maps out of the HNB
specific hnb_ctx, so that they are not freed along with the HNB state,
possibly leaving SCCP connections afloat.
(It is still nice to normally keep context maps as talloc children of
their respective hnb_ctx, so talloc reports show which belongs to
which.)
So far, context map handling found the global hnb_gw pointer via
map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time.
Instead, use a separate hnb_gw pointer in map->gw.
== RUA procedure codes vs. SCCP prims ==
So far, the RUA rx side composed SCCP prims to pass on:
RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx
That is a source of confusion: a RUA procedure code should not translate
1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts
below).
Instead, move SCCP prim composition over to the SCCP side, using FSM
events to forward:
RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx
+RANAP +RANAP +RANAP
RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx
+RANAP +RANAP +RANAP
Hence choose the correct prim according to the SCCP FSM state.
- in hnbgw_rua.c, use RUA procedure codes, not prim types.
- via the new FSM events' data args, pass msgb containing RANAP PDUs.
== Fix SCCP Release behavior ==
So far, the normal conn release behavior was
HNB HNBGW CN
| --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete
| | <--SCCP-RLC-------- | (no data)
Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2
'Connection release':
The MSC sends a SCCP released message. This message shall not contain
any user data field.
i.e.:
HNB HNBGW CN
| --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete
| | <--SCCP-Released---- | (no data)
| | ---SCCP-RLC--------> | (no data)
(Side note, the final SCCP Release Confirm step is taken care of
implicitly by libosmo-sigtran's sccp_scoc.c FSM.)
If the CN fails to respond with SCCP-Released, on new X31 timeout,
osmo-hnbgw will send an SCCP Released to the CN as fallback.
== Memory model for message dispatch ==
So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP
handling code, and the final dispatch freed it. Every error path had to
take care not to leak any oph.
Instead, use a much easier and much more leakage proof memory model,
inspired by fixeria:
- on rx, dispatch RANAP msgb that live in OTC_SELECT.
- no code path needs to msgb_free() -- the msgb is discarded via
OTC_SELECT when handling is done, error or no error.
- any code path may also choose to store the msgb for async dispatch,
using talloc_steal(). The user plane mapping via MGW and UPF do that.
- if any code path does msgb_free(), that would be no problem either
(but none do so now, for simplicity).
== Layer separation ==
Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx
via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down()
directly.
== Memory model for decoded ranap_message IEs ==
Use a talloc destructor to make sure that the ranap_message IEs are
always implicitly freed upon talloc_free(), so that no code path can
possibly forget to do so.
== Implicit cleanup by talloc ==
Use talloc scoping to remove a bunch of explicit cleanup code. For
example, make a chached message a talloc child of its handler:
talloc_steal(mgw_fsm_priv, message);
mgw_fsm_priv->ranap_rab_ass_req_message = message;
and later implicitly free 'message' by only freeing the handler:
talloc_free(mgw_fsm_priv)
Related: SYS#6297
Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
|
|
|
int handle_rab_ass_req(struct hnbgw_context_map *map, struct msgb *ranap_msg, ranap_message *message)
|
2022-01-06 16:05:08 +00:00
|
|
|
{
|
|
|
|
static bool initialized = false;
|
|
|
|
struct mgw_fsm_priv *mgw_fsm_priv;
|
|
|
|
char fsm_name[255];
|
|
|
|
|
|
|
|
/* Initialize FSM if not done yet */
|
|
|
|
if (!initialized) {
|
|
|
|
OSMO_ASSERT(osmo_fsm_register(&mgw_fsm) == 0);
|
|
|
|
initialized = true;
|
|
|
|
}
|
|
|
|
|
2022-06-14 16:07:52 +00:00
|
|
|
/* The RTP stream negotiation usually begins with a RAB-AssignmentRequest and ends with an IU-Release, however
|
2023-03-20 13:32:42 +00:00
|
|
|
* it may also be that the MSC decides to release the RAB with a dedicated RAB-AssignmentRequest that contains
|
2022-01-06 16:05:08 +00:00
|
|
|
* a ReleaseList. In this case an FSM will already be present. */
|
|
|
|
if (map->mgw_fi) {
|
2023-03-20 14:47:28 +00:00
|
|
|
/* Check if the RAB-AssignmentRequest contains a RAB-ReleaseItem that matches the RAB-ID we are
|
|
|
|
* managing in this HNBGW context map. */
|
|
|
|
if (is_our_rab_release(map, message))
|
|
|
|
return release_mgw_fsm(map, ranap_msg);
|
|
|
|
|
|
|
|
/* The RAB-ReleaseItem in the incoming message should match the RAB ID we are managing. A mismatch may
|
|
|
|
* mean that there is an inconsistency between the HNBGW and the MSC state and the MGW FSM on the HNBGW
|
|
|
|
* side may serve an abandonned connection, which we will now close. However we must also assume that
|
|
|
|
* the incoming message may still contain a RAB-Assignment for a new RTP stream, so we still must
|
|
|
|
* continue with the message evaluation. */
|
2022-06-14 16:11:22 +00:00
|
|
|
osmo_fsm_inst_state_chg(map->mgw_fi, MGW_ST_FAILURE, 0, 0);
|
|
|
|
OSMO_ASSERT(map->mgw_fi == NULL);
|
2022-01-06 16:05:08 +00:00
|
|
|
}
|
|
|
|
|
2022-02-24 13:41:03 +00:00
|
|
|
/* This FSM only supports RAB assignments with a single RAB assignment only. This limitation has been taken
|
|
|
|
* into account under the assumption that voice calls typically require a single RAB only. Nevertheless, we
|
|
|
|
* will block all incoming RAB assignments that try to assign more (or less) than one RAB. */
|
|
|
|
if (ranap_rab_ass_req_ies_get_count(&message->msg.raB_AssignmentRequestIEs) != 1) {
|
|
|
|
LOGP(DMGW, LOGL_ERROR,
|
2023-03-20 14:51:22 +00:00
|
|
|
"%s() rua_ctx_id=%d, RAB-AssignmentRequest with more than one RAB assignment -- abort!\n",
|
|
|
|
__func__, map->rua_ctx_id);
|
2022-06-14 16:29:56 +00:00
|
|
|
tx_release_req(map);
|
|
|
|
return -1;
|
2022-01-06 16:05:08 +00:00
|
|
|
}
|
|
|
|
|
2022-06-14 16:29:56 +00:00
|
|
|
mgw_fsm_priv = talloc_zero(map, struct mgw_fsm_priv);
|
2022-01-06 16:05:08 +00:00
|
|
|
mgw_fsm_priv->map = map;
|
context map: introduce RUA and SCCP FSMs to fix leaks
Refactor the entire RUA <-> SCCP connection-oriented message forwarding:
- conquer confusion about hnbgw_context_map release behavior, and
- eradicate SCCP connection leaks.
Finer points:
== Context map state ==
So far, we had a single context map state and some flags to keep track
of both the RUA and the SCCP connections. It was easy to miss connection
cleanup steps, especially on the SCCP side.
Instead, the two FSMs clearly define the RUA and SCCP conn states
separately, and each side takes care of its own release needs for all
possible scenarios.
- When both RUA and SCCP are released, the context map is discarded.
- A context map can stay around to wait for proper SCCP release, even if
the RUA side has lost the HNB connection.
- Completely drop the async "context mapper garbage collection", because
the FSMs clarify the release and free steps, synchronously.
- We still keep a (simplified) enum for global context map state, but
this is only used so that VTY reporting remains mostly unchanged.
== Context map cleanup confusion ==
The function context_map_hnb_released() was the general cleanup function
for a context map. Instead, add separate context_map_free().
== Free context maps separately from HNB ==
When a HNB releases, talloc_steal() the context maps out of the HNB
specific hnb_ctx, so that they are not freed along with the HNB state,
possibly leaving SCCP connections afloat.
(It is still nice to normally keep context maps as talloc children of
their respective hnb_ctx, so talloc reports show which belongs to
which.)
So far, context map handling found the global hnb_gw pointer via
map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time.
Instead, use a separate hnb_gw pointer in map->gw.
== RUA procedure codes vs. SCCP prims ==
So far, the RUA rx side composed SCCP prims to pass on:
RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx
That is a source of confusion: a RUA procedure code should not translate
1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts
below).
Instead, move SCCP prim composition over to the SCCP side, using FSM
events to forward:
RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx
+RANAP +RANAP +RANAP
RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx
+RANAP +RANAP +RANAP
Hence choose the correct prim according to the SCCP FSM state.
- in hnbgw_rua.c, use RUA procedure codes, not prim types.
- via the new FSM events' data args, pass msgb containing RANAP PDUs.
== Fix SCCP Release behavior ==
So far, the normal conn release behavior was
HNB HNBGW CN
| --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete
| | <--SCCP-RLC-------- | (no data)
Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2
'Connection release':
The MSC sends a SCCP released message. This message shall not contain
any user data field.
i.e.:
HNB HNBGW CN
| --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete
| | <--SCCP-Released---- | (no data)
| | ---SCCP-RLC--------> | (no data)
(Side note, the final SCCP Release Confirm step is taken care of
implicitly by libosmo-sigtran's sccp_scoc.c FSM.)
If the CN fails to respond with SCCP-Released, on new X31 timeout,
osmo-hnbgw will send an SCCP Released to the CN as fallback.
== Memory model for message dispatch ==
So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP
handling code, and the final dispatch freed it. Every error path had to
take care not to leak any oph.
Instead, use a much easier and much more leakage proof memory model,
inspired by fixeria:
- on rx, dispatch RANAP msgb that live in OTC_SELECT.
- no code path needs to msgb_free() -- the msgb is discarded via
OTC_SELECT when handling is done, error or no error.
- any code path may also choose to store the msgb for async dispatch,
using talloc_steal(). The user plane mapping via MGW and UPF do that.
- if any code path does msgb_free(), that would be no problem either
(but none do so now, for simplicity).
== Layer separation ==
Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx
via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down()
directly.
== Memory model for decoded ranap_message IEs ==
Use a talloc destructor to make sure that the ranap_message IEs are
always implicitly freed upon talloc_free(), so that no code path can
possibly forget to do so.
== Implicit cleanup by talloc ==
Use talloc scoping to remove a bunch of explicit cleanup code. For
example, make a chached message a talloc child of its handler:
talloc_steal(mgw_fsm_priv, message);
mgw_fsm_priv->ranap_rab_ass_req_message = message;
and later implicitly free 'message' by only freeing the handler:
talloc_free(mgw_fsm_priv)
Related: SYS#6297
Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
|
|
|
|
|
|
|
talloc_steal(mgw_fsm_priv, message);
|
2022-06-14 16:29:56 +00:00
|
|
|
mgw_fsm_priv->ranap_rab_ass_req_message = message;
|
|
|
|
|
|
|
|
/* Allocate FSM */
|
2022-01-06 16:05:08 +00:00
|
|
|
snprintf(fsm_name, sizeof(fsm_name), "mgw-fsm-%u-%u", map->rua_ctx_id, mgw_fsm_priv->rab_id);
|
2022-06-14 16:11:22 +00:00
|
|
|
map->mgw_fi = osmo_fsm_inst_alloc(&mgw_fsm, map, mgw_fsm_priv, LOGL_DEBUG, fsm_name);
|
2022-01-06 16:05:08 +00:00
|
|
|
|
2022-06-14 16:29:56 +00:00
|
|
|
/* Start the FSM */
|
|
|
|
mgw_fsm_state_chg(map->mgw_fi, MGW_ST_CRCX_HNB);
|
2022-01-06 16:05:08 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*! Handlie RANAP RAB AssignmentResponse (deliver message, complete RTP stream switching).
|
context map: introduce RUA and SCCP FSMs to fix leaks
Refactor the entire RUA <-> SCCP connection-oriented message forwarding:
- conquer confusion about hnbgw_context_map release behavior, and
- eradicate SCCP connection leaks.
Finer points:
== Context map state ==
So far, we had a single context map state and some flags to keep track
of both the RUA and the SCCP connections. It was easy to miss connection
cleanup steps, especially on the SCCP side.
Instead, the two FSMs clearly define the RUA and SCCP conn states
separately, and each side takes care of its own release needs for all
possible scenarios.
- When both RUA and SCCP are released, the context map is discarded.
- A context map can stay around to wait for proper SCCP release, even if
the RUA side has lost the HNB connection.
- Completely drop the async "context mapper garbage collection", because
the FSMs clarify the release and free steps, synchronously.
- We still keep a (simplified) enum for global context map state, but
this is only used so that VTY reporting remains mostly unchanged.
== Context map cleanup confusion ==
The function context_map_hnb_released() was the general cleanup function
for a context map. Instead, add separate context_map_free().
== Free context maps separately from HNB ==
When a HNB releases, talloc_steal() the context maps out of the HNB
specific hnb_ctx, so that they are not freed along with the HNB state,
possibly leaving SCCP connections afloat.
(It is still nice to normally keep context maps as talloc children of
their respective hnb_ctx, so talloc reports show which belongs to
which.)
So far, context map handling found the global hnb_gw pointer via
map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time.
Instead, use a separate hnb_gw pointer in map->gw.
== RUA procedure codes vs. SCCP prims ==
So far, the RUA rx side composed SCCP prims to pass on:
RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx
That is a source of confusion: a RUA procedure code should not translate
1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts
below).
Instead, move SCCP prim composition over to the SCCP side, using FSM
events to forward:
RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx
+RANAP +RANAP +RANAP
RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx
+RANAP +RANAP +RANAP
Hence choose the correct prim according to the SCCP FSM state.
- in hnbgw_rua.c, use RUA procedure codes, not prim types.
- via the new FSM events' data args, pass msgb containing RANAP PDUs.
== Fix SCCP Release behavior ==
So far, the normal conn release behavior was
HNB HNBGW CN
| --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete
| | <--SCCP-RLC-------- | (no data)
Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2
'Connection release':
The MSC sends a SCCP released message. This message shall not contain
any user data field.
i.e.:
HNB HNBGW CN
| --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete
| | <--SCCP-Released---- | (no data)
| | ---SCCP-RLC--------> | (no data)
(Side note, the final SCCP Release Confirm step is taken care of
implicitly by libosmo-sigtran's sccp_scoc.c FSM.)
If the CN fails to respond with SCCP-Released, on new X31 timeout,
osmo-hnbgw will send an SCCP Released to the CN as fallback.
== Memory model for message dispatch ==
So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP
handling code, and the final dispatch freed it. Every error path had to
take care not to leak any oph.
Instead, use a much easier and much more leakage proof memory model,
inspired by fixeria:
- on rx, dispatch RANAP msgb that live in OTC_SELECT.
- no code path needs to msgb_free() -- the msgb is discarded via
OTC_SELECT when handling is done, error or no error.
- any code path may also choose to store the msgb for async dispatch,
using talloc_steal(). The user plane mapping via MGW and UPF do that.
- if any code path does msgb_free(), that would be no problem either
(but none do so now, for simplicity).
== Layer separation ==
Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx
via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down()
directly.
== Memory model for decoded ranap_message IEs ==
Use a talloc destructor to make sure that the ranap_message IEs are
always implicitly freed upon talloc_free(), so that no code path can
possibly forget to do so.
== Implicit cleanup by talloc ==
Use talloc scoping to remove a bunch of explicit cleanup code. For
example, make a chached message a talloc child of its handler:
talloc_steal(mgw_fsm_priv, message);
mgw_fsm_priv->ranap_rab_ass_req_message = message;
and later implicitly free 'message' by only freeing the handler:
talloc_free(mgw_fsm_priv)
Related: SYS#6297
Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
|
|
|
* \param[in] map hnbgw context map that is responsible for this call.
|
|
|
|
* \param[in] ranap_msg msgb containing RANAP RAB AssignmentResponse at msgb_l2(), allocated in OTC_SELECT.
|
|
|
|
* This function may talloc_steal(ranap_msg) to keep it for later.
|
|
|
|
* \param[in] message decoded RANAP message container, allocated in OTC_SELECT.
|
|
|
|
* This function may talloc_steal(message) to keep it for later.
|
2022-01-06 16:05:08 +00:00
|
|
|
* \returns 0 on success; negative on error. */
|
context map: introduce RUA and SCCP FSMs to fix leaks
Refactor the entire RUA <-> SCCP connection-oriented message forwarding:
- conquer confusion about hnbgw_context_map release behavior, and
- eradicate SCCP connection leaks.
Finer points:
== Context map state ==
So far, we had a single context map state and some flags to keep track
of both the RUA and the SCCP connections. It was easy to miss connection
cleanup steps, especially on the SCCP side.
Instead, the two FSMs clearly define the RUA and SCCP conn states
separately, and each side takes care of its own release needs for all
possible scenarios.
- When both RUA and SCCP are released, the context map is discarded.
- A context map can stay around to wait for proper SCCP release, even if
the RUA side has lost the HNB connection.
- Completely drop the async "context mapper garbage collection", because
the FSMs clarify the release and free steps, synchronously.
- We still keep a (simplified) enum for global context map state, but
this is only used so that VTY reporting remains mostly unchanged.
== Context map cleanup confusion ==
The function context_map_hnb_released() was the general cleanup function
for a context map. Instead, add separate context_map_free().
== Free context maps separately from HNB ==
When a HNB releases, talloc_steal() the context maps out of the HNB
specific hnb_ctx, so that they are not freed along with the HNB state,
possibly leaving SCCP connections afloat.
(It is still nice to normally keep context maps as talloc children of
their respective hnb_ctx, so talloc reports show which belongs to
which.)
So far, context map handling found the global hnb_gw pointer via
map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time.
Instead, use a separate hnb_gw pointer in map->gw.
== RUA procedure codes vs. SCCP prims ==
So far, the RUA rx side composed SCCP prims to pass on:
RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx
That is a source of confusion: a RUA procedure code should not translate
1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts
below).
Instead, move SCCP prim composition over to the SCCP side, using FSM
events to forward:
RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx
+RANAP +RANAP +RANAP
RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx
+RANAP +RANAP +RANAP
Hence choose the correct prim according to the SCCP FSM state.
- in hnbgw_rua.c, use RUA procedure codes, not prim types.
- via the new FSM events' data args, pass msgb containing RANAP PDUs.
== Fix SCCP Release behavior ==
So far, the normal conn release behavior was
HNB HNBGW CN
| --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete
| | <--SCCP-RLC-------- | (no data)
Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2
'Connection release':
The MSC sends a SCCP released message. This message shall not contain
any user data field.
i.e.:
HNB HNBGW CN
| --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete
| | <--SCCP-Released---- | (no data)
| | ---SCCP-RLC--------> | (no data)
(Side note, the final SCCP Release Confirm step is taken care of
implicitly by libosmo-sigtran's sccp_scoc.c FSM.)
If the CN fails to respond with SCCP-Released, on new X31 timeout,
osmo-hnbgw will send an SCCP Released to the CN as fallback.
== Memory model for message dispatch ==
So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP
handling code, and the final dispatch freed it. Every error path had to
take care not to leak any oph.
Instead, use a much easier and much more leakage proof memory model,
inspired by fixeria:
- on rx, dispatch RANAP msgb that live in OTC_SELECT.
- no code path needs to msgb_free() -- the msgb is discarded via
OTC_SELECT when handling is done, error or no error.
- any code path may also choose to store the msgb for async dispatch,
using talloc_steal(). The user plane mapping via MGW and UPF do that.
- if any code path does msgb_free(), that would be no problem either
(but none do so now, for simplicity).
== Layer separation ==
Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx
via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down()
directly.
== Memory model for decoded ranap_message IEs ==
Use a talloc destructor to make sure that the ranap_message IEs are
always implicitly freed upon talloc_free(), so that no code path can
possibly forget to do so.
== Implicit cleanup by talloc ==
Use talloc scoping to remove a bunch of explicit cleanup code. For
example, make a chached message a talloc child of its handler:
talloc_steal(mgw_fsm_priv, message);
mgw_fsm_priv->ranap_rab_ass_req_message = message;
and later implicitly free 'message' by only freeing the handler:
talloc_free(mgw_fsm_priv)
Related: SYS#6297
Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
|
|
|
int mgw_fsm_handle_rab_ass_resp(struct hnbgw_context_map *map, struct msgb *ranap_msg, ranap_message *message)
|
2022-01-06 16:05:08 +00:00
|
|
|
{
|
|
|
|
struct mgw_fsm_priv *mgw_fsm_priv;
|
|
|
|
|
context map: introduce RUA and SCCP FSMs to fix leaks
Refactor the entire RUA <-> SCCP connection-oriented message forwarding:
- conquer confusion about hnbgw_context_map release behavior, and
- eradicate SCCP connection leaks.
Finer points:
== Context map state ==
So far, we had a single context map state and some flags to keep track
of both the RUA and the SCCP connections. It was easy to miss connection
cleanup steps, especially on the SCCP side.
Instead, the two FSMs clearly define the RUA and SCCP conn states
separately, and each side takes care of its own release needs for all
possible scenarios.
- When both RUA and SCCP are released, the context map is discarded.
- A context map can stay around to wait for proper SCCP release, even if
the RUA side has lost the HNB connection.
- Completely drop the async "context mapper garbage collection", because
the FSMs clarify the release and free steps, synchronously.
- We still keep a (simplified) enum for global context map state, but
this is only used so that VTY reporting remains mostly unchanged.
== Context map cleanup confusion ==
The function context_map_hnb_released() was the general cleanup function
for a context map. Instead, add separate context_map_free().
== Free context maps separately from HNB ==
When a HNB releases, talloc_steal() the context maps out of the HNB
specific hnb_ctx, so that they are not freed along with the HNB state,
possibly leaving SCCP connections afloat.
(It is still nice to normally keep context maps as talloc children of
their respective hnb_ctx, so talloc reports show which belongs to
which.)
So far, context map handling found the global hnb_gw pointer via
map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time.
Instead, use a separate hnb_gw pointer in map->gw.
== RUA procedure codes vs. SCCP prims ==
So far, the RUA rx side composed SCCP prims to pass on:
RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx
That is a source of confusion: a RUA procedure code should not translate
1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts
below).
Instead, move SCCP prim composition over to the SCCP side, using FSM
events to forward:
RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx
+RANAP +RANAP +RANAP
RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx
+RANAP +RANAP +RANAP
Hence choose the correct prim according to the SCCP FSM state.
- in hnbgw_rua.c, use RUA procedure codes, not prim types.
- via the new FSM events' data args, pass msgb containing RANAP PDUs.
== Fix SCCP Release behavior ==
So far, the normal conn release behavior was
HNB HNBGW CN
| --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete
| | <--SCCP-RLC-------- | (no data)
Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2
'Connection release':
The MSC sends a SCCP released message. This message shall not contain
any user data field.
i.e.:
HNB HNBGW CN
| --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete
| | <--SCCP-Released---- | (no data)
| | ---SCCP-RLC--------> | (no data)
(Side note, the final SCCP Release Confirm step is taken care of
implicitly by libosmo-sigtran's sccp_scoc.c FSM.)
If the CN fails to respond with SCCP-Released, on new X31 timeout,
osmo-hnbgw will send an SCCP Released to the CN as fallback.
== Memory model for message dispatch ==
So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP
handling code, and the final dispatch freed it. Every error path had to
take care not to leak any oph.
Instead, use a much easier and much more leakage proof memory model,
inspired by fixeria:
- on rx, dispatch RANAP msgb that live in OTC_SELECT.
- no code path needs to msgb_free() -- the msgb is discarded via
OTC_SELECT when handling is done, error or no error.
- any code path may also choose to store the msgb for async dispatch,
using talloc_steal(). The user plane mapping via MGW and UPF do that.
- if any code path does msgb_free(), that would be no problem either
(but none do so now, for simplicity).
== Layer separation ==
Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx
via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down()
directly.
== Memory model for decoded ranap_message IEs ==
Use a talloc destructor to make sure that the ranap_message IEs are
always implicitly freed upon talloc_free(), so that no code path can
possibly forget to do so.
== Implicit cleanup by talloc ==
Use talloc scoping to remove a bunch of explicit cleanup code. For
example, make a chached message a talloc child of its handler:
talloc_steal(mgw_fsm_priv, message);
mgw_fsm_priv->ranap_rab_ass_req_message = message;
and later implicitly free 'message' by only freeing the handler:
talloc_free(mgw_fsm_priv)
Related: SYS#6297
Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
|
|
|
OSMO_ASSERT(ranap_msg);
|
2022-01-06 16:05:08 +00:00
|
|
|
|
|
|
|
if (!map->mgw_fi) {
|
|
|
|
/* NOTE: This situation is a corner-case. We may end up here when the co-located MGW caused a problem
|
|
|
|
* on the way between RANAP RAB Assignment Request and RANAP RAB Assignment Response. */
|
|
|
|
|
|
|
|
LOGP(DMGW, LOGL_ERROR,
|
2023-03-21 08:45:32 +00:00
|
|
|
"%s() rua_ctx_id=%d, no MGW fsm -- sending Iu-Release-Request!\n",
|
|
|
|
__func__, map->rua_ctx_id);
|
2022-01-06 16:05:08 +00:00
|
|
|
|
|
|
|
/* Send a release request, to make sure that the MSC is aware of the problem. */
|
|
|
|
tx_release_req(map);
|
2022-06-10 09:39:55 +00:00
|
|
|
return -1;
|
2022-01-06 16:05:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
mgw_fsm_priv = map->mgw_fi->priv;
|
context map: introduce RUA and SCCP FSMs to fix leaks
Refactor the entire RUA <-> SCCP connection-oriented message forwarding:
- conquer confusion about hnbgw_context_map release behavior, and
- eradicate SCCP connection leaks.
Finer points:
== Context map state ==
So far, we had a single context map state and some flags to keep track
of both the RUA and the SCCP connections. It was easy to miss connection
cleanup steps, especially on the SCCP side.
Instead, the two FSMs clearly define the RUA and SCCP conn states
separately, and each side takes care of its own release needs for all
possible scenarios.
- When both RUA and SCCP are released, the context map is discarded.
- A context map can stay around to wait for proper SCCP release, even if
the RUA side has lost the HNB connection.
- Completely drop the async "context mapper garbage collection", because
the FSMs clarify the release and free steps, synchronously.
- We still keep a (simplified) enum for global context map state, but
this is only used so that VTY reporting remains mostly unchanged.
== Context map cleanup confusion ==
The function context_map_hnb_released() was the general cleanup function
for a context map. Instead, add separate context_map_free().
== Free context maps separately from HNB ==
When a HNB releases, talloc_steal() the context maps out of the HNB
specific hnb_ctx, so that they are not freed along with the HNB state,
possibly leaving SCCP connections afloat.
(It is still nice to normally keep context maps as talloc children of
their respective hnb_ctx, so talloc reports show which belongs to
which.)
So far, context map handling found the global hnb_gw pointer via
map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time.
Instead, use a separate hnb_gw pointer in map->gw.
== RUA procedure codes vs. SCCP prims ==
So far, the RUA rx side composed SCCP prims to pass on:
RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx
That is a source of confusion: a RUA procedure code should not translate
1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts
below).
Instead, move SCCP prim composition over to the SCCP side, using FSM
events to forward:
RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx
+RANAP +RANAP +RANAP
RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx
+RANAP +RANAP +RANAP
Hence choose the correct prim according to the SCCP FSM state.
- in hnbgw_rua.c, use RUA procedure codes, not prim types.
- via the new FSM events' data args, pass msgb containing RANAP PDUs.
== Fix SCCP Release behavior ==
So far, the normal conn release behavior was
HNB HNBGW CN
| --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete
| | <--SCCP-RLC-------- | (no data)
Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2
'Connection release':
The MSC sends a SCCP released message. This message shall not contain
any user data field.
i.e.:
HNB HNBGW CN
| --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete
| | <--SCCP-Released---- | (no data)
| | ---SCCP-RLC--------> | (no data)
(Side note, the final SCCP Release Confirm step is taken care of
implicitly by libosmo-sigtran's sccp_scoc.c FSM.)
If the CN fails to respond with SCCP-Released, on new X31 timeout,
osmo-hnbgw will send an SCCP Released to the CN as fallback.
== Memory model for message dispatch ==
So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP
handling code, and the final dispatch freed it. Every error path had to
take care not to leak any oph.
Instead, use a much easier and much more leakage proof memory model,
inspired by fixeria:
- on rx, dispatch RANAP msgb that live in OTC_SELECT.
- no code path needs to msgb_free() -- the msgb is discarded via
OTC_SELECT when handling is done, error or no error.
- any code path may also choose to store the msgb for async dispatch,
using talloc_steal(). The user plane mapping via MGW and UPF do that.
- if any code path does msgb_free(), that would be no problem either
(but none do so now, for simplicity).
== Layer separation ==
Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx
via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down()
directly.
== Memory model for decoded ranap_message IEs ==
Use a talloc destructor to make sure that the ranap_message IEs are
always implicitly freed upon talloc_free(), so that no code path can
possibly forget to do so.
== Implicit cleanup by talloc ==
Use talloc scoping to remove a bunch of explicit cleanup code. For
example, make a chached message a talloc child of its handler:
talloc_steal(mgw_fsm_priv, message);
mgw_fsm_priv->ranap_rab_ass_req_message = message;
and later implicitly free 'message' by only freeing the handler:
talloc_free(mgw_fsm_priv)
Related: SYS#6297
Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
|
|
|
|
|
|
|
talloc_steal(mgw_fsm_priv, ranap_msg);
|
|
|
|
mgw_fsm_priv->ranap_rab_ass_resp_msgb = ranap_msg;
|
|
|
|
|
|
|
|
talloc_steal(mgw_fsm_priv, message);
|
2022-01-06 16:05:08 +00:00
|
|
|
mgw_fsm_priv->ranap_rab_ass_resp_message = message;
|
context map: introduce RUA and SCCP FSMs to fix leaks
Refactor the entire RUA <-> SCCP connection-oriented message forwarding:
- conquer confusion about hnbgw_context_map release behavior, and
- eradicate SCCP connection leaks.
Finer points:
== Context map state ==
So far, we had a single context map state and some flags to keep track
of both the RUA and the SCCP connections. It was easy to miss connection
cleanup steps, especially on the SCCP side.
Instead, the two FSMs clearly define the RUA and SCCP conn states
separately, and each side takes care of its own release needs for all
possible scenarios.
- When both RUA and SCCP are released, the context map is discarded.
- A context map can stay around to wait for proper SCCP release, even if
the RUA side has lost the HNB connection.
- Completely drop the async "context mapper garbage collection", because
the FSMs clarify the release and free steps, synchronously.
- We still keep a (simplified) enum for global context map state, but
this is only used so that VTY reporting remains mostly unchanged.
== Context map cleanup confusion ==
The function context_map_hnb_released() was the general cleanup function
for a context map. Instead, add separate context_map_free().
== Free context maps separately from HNB ==
When a HNB releases, talloc_steal() the context maps out of the HNB
specific hnb_ctx, so that they are not freed along with the HNB state,
possibly leaving SCCP connections afloat.
(It is still nice to normally keep context maps as talloc children of
their respective hnb_ctx, so talloc reports show which belongs to
which.)
So far, context map handling found the global hnb_gw pointer via
map->hnb_ctx->gw. But in fact, a HNB may disappear at any point in time.
Instead, use a separate hnb_gw pointer in map->gw.
== RUA procedure codes vs. SCCP prims ==
So far, the RUA rx side composed SCCP prims to pass on:
RUA rx ---SCCP-prim--> RANAP handling ---SCCP-prim--> SCCP tx
That is a source of confusion: a RUA procedure code should not translate
1:1 to SCCP prims, especially for RUA id-Disconnect (see release charts
below).
Instead, move SCCP prim composition over to the SCCP side, using FSM
events to forward:
RUA rx --event--> RUA FSM --event--> SCCP FSM --SCCP-prim--> SCCP tx
+RANAP +RANAP +RANAP
RUA tx <--RUA---- RUA FSM <--event-- SCCP FSM <--event-- SCCP rx
+RANAP +RANAP +RANAP
Hence choose the correct prim according to the SCCP FSM state.
- in hnbgw_rua.c, use RUA procedure codes, not prim types.
- via the new FSM events' data args, pass msgb containing RANAP PDUs.
== Fix SCCP Release behavior ==
So far, the normal conn release behavior was
HNB HNBGW CN
| --id-Disconnect--> | ---SCCP-Released--> | Iu-ReleaseComplete
| | <--SCCP-RLC-------- | (no data)
Instead, the SCCP release is now in accordance with 3GPP TS 48.006 9.2
'Connection release':
The MSC sends a SCCP released message. This message shall not contain
any user data field.
i.e.:
HNB HNBGW CN
| --id-Disconnect--> | ---Data-Form-1(!)--> | Iu-ReleaseComplete
| | <--SCCP-Released---- | (no data)
| | ---SCCP-RLC--------> | (no data)
(Side note, the final SCCP Release Confirm step is taken care of
implicitly by libosmo-sigtran's sccp_scoc.c FSM.)
If the CN fails to respond with SCCP-Released, on new X31 timeout,
osmo-hnbgw will send an SCCP Released to the CN as fallback.
== Memory model for message dispatch ==
So far, an osmo_scu_prim aka "oph" was passed between RUA and SCCP
handling code, and the final dispatch freed it. Every error path had to
take care not to leak any oph.
Instead, use a much easier and much more leakage proof memory model,
inspired by fixeria:
- on rx, dispatch RANAP msgb that live in OTC_SELECT.
- no code path needs to msgb_free() -- the msgb is discarded via
OTC_SELECT when handling is done, error or no error.
- any code path may also choose to store the msgb for async dispatch,
using talloc_steal(). The user plane mapping via MGW and UPF do that.
- if any code path does msgb_free(), that would be no problem either
(but none do so now, for simplicity).
== Layer separation ==
Dispatch *all* connection-oriented RUA tx via the RUA FSM and SCCP tx
via the SCCP FSM, do not call rua_tx_dt() or osmo_sccp_user_sap_down()
directly.
== Memory model for decoded ranap_message IEs ==
Use a talloc destructor to make sure that the ranap_message IEs are
always implicitly freed upon talloc_free(), so that no code path can
possibly forget to do so.
== Implicit cleanup by talloc ==
Use talloc scoping to remove a bunch of explicit cleanup code. For
example, make a chached message a talloc child of its handler:
talloc_steal(mgw_fsm_priv, message);
mgw_fsm_priv->ranap_rab_ass_req_message = message;
and later implicitly free 'message' by only freeing the handler:
talloc_free(mgw_fsm_priv)
Related: SYS#6297
Change-Id: I6ff7e36532ff57c6f2d3e7e419dd22ef27dafd19
2023-02-12 04:02:48 +00:00
|
|
|
|
2022-01-06 16:05:08 +00:00
|
|
|
osmo_fsm_inst_dispatch(map->mgw_fi, MGW_EV_RAB_ASS_RESP, NULL);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*! Release the FSM and clear its associated RTP streams.
|
2023-02-20 15:41:08 +00:00
|
|
|
* \ptmap[in] map hnbgw context map that is responsible for this call.
|
2022-01-06 16:05:08 +00:00
|
|
|
* \returns 0 on success; negative on error. */
|
|
|
|
int mgw_fsm_release(struct hnbgw_context_map *map)
|
|
|
|
{
|
2022-12-19 14:13:00 +00:00
|
|
|
if (!map->mgw_fi)
|
2022-01-06 16:05:08 +00:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
osmo_fsm_inst_dispatch(map->mgw_fi, MGW_EV_RELEASE, NULL);
|
|
|
|
return 0;
|
|
|
|
}
|