add osmo_fsm_set_dealloc_ctx(), to help with use-after-free

This is a simpler and more general solution to the problem so far solved by
osmo_fsm_term_safely(true). This extends use-after-free fixes to arbitrary
functions, not only FSM instances during termination.

The aim is to defer talloc_free() until back in the main loop.

Rationale: I discovered an osmo-msc use-after-free crash from an invalid
message, caused by this pattern:

void event_action()
{
       osmo_fsm_inst_dispatch(foo, FOO_EVENT, NULL);
       osmo_fsm_inst_dispatch(bar, BAR_EVENT, NULL);
}

Usually, FOO_EVENT takes successful action, and afterwards we also notify bar.
However, in this particular case, FOO_EVENT caused failure, and the immediate
error handling directly terminated and deallocated bar. In such a case,
dispatching BAR_EVENT causes a use-after-free; this constituted a DoS vector
just from sending messages that cause *any* failure during the first event
dispatch.

Instead, when this is enabled, we do not deallocate 'foo' until event_action()
has returned back to the main loop.

Test: duplicate fsm_dealloc_test.c using this, and print the number of items
deallocated in each test loop, to ensure the feature works. We also verify that
the deallocation safety works simply by fsm_dealloc_test.c not crashing.

We should probably follow up by refusing event dispatch and state transitions
for FSM instances that are terminating or already terminated:
see I0adc13a1a998e953b6c850efa2761350dd07e03a.

Change-Id: Ief4dba9ea587c9b4aea69993e965fbb20fb80e78
This commit is contained in:
Neels Hofmeyr 2019-10-04 20:37:17 +02:00
parent d4b4edd316
commit 988f6d72c5
4 changed files with 3511 additions and 29 deletions

View File

@ -122,6 +122,7 @@ struct osmo_fsm_inst {
void osmo_fsm_log_addr(bool log_addr);
void osmo_fsm_log_timeouts(bool log_timeouts);
void osmo_fsm_term_safely(bool term_safely);
void osmo_fsm_set_dealloc_ctx(void *ctx);
/*! Log using FSM instance's context, on explicit logging subsystem and level.
* \param fi An osmo_fsm_inst.

View File

@ -102,8 +102,20 @@ static __thread struct {
unsigned int depth;
/*! Talloc context to collect all deferred deallocations (FSM instances, and talloc objects if any). */
void *collect_ctx;
/*! See osmo_fsm_set_dealloc_ctx() */
void *fsm_dealloc_ctx;
} fsm_term_safely;
/*! Internal call to free an FSM instance, which redirects to the context set by osmo_fsm_set_dealloc_ctx() if any.
*/
static void fsm_free_or_steal(void *talloc_object)
{
if (fsm_term_safely.fsm_dealloc_ctx)
talloc_steal(fsm_term_safely.fsm_dealloc_ctx, talloc_object);
else
talloc_free(talloc_object);
}
/*! specify if FSM instance addresses should be logged or not
*
* By default, the FSM name includes the pointer address of the \ref
@ -139,11 +151,9 @@ void osmo_fsm_log_timeouts(bool log_timeouts)
/*! Enable safer way to deallocate cascades of terminating FSM instances.
*
* For legacy compatibility, this is disabled by default. In newer programs / releases, it is recommended to enable this
* feature during main() startup, since it greatly simplifies deallocating child, parent and other FSM instances without
* running into double-free or use-after-free scenarios. When enabled, this feature changes the order of logging, which
* may break legacy unit test expectations, and changes the order of deallocation to after the parent term event is
* dispatched.
* Note, using osmo_fsm_set_dealloc_ctx() is a more general solution to this same problem.
* Particularly, in a program using osmo_select_main_ctx(), the simplest solution to avoid most use-after-free problems
* from FSM instance deallocation is using osmo_fsm_set_dealloc_ctx(OTC_SELECT).
*
* When enabled, an FSM instance termination detects whether another FSM instance is already terminating, and instead of
* deallocating immediately, collects all terminating FSM instances in a talloc context, to be bulk deallocated once all
@ -155,6 +165,9 @@ void osmo_fsm_log_timeouts(bool log_timeouts)
*
* For illustration, see fsm_dealloc_test.c.
*
* When enabled, this feature changes the order of logging, which may break legacy unit test expectations, and changes
* the order of deallocation to after the parent term event is dispatched.
*
* \param[in] term_safely Pass true to switch to safer FSM instance termination behavior.
*/
void osmo_fsm_term_safely(bool term_safely)
@ -162,6 +175,31 @@ void osmo_fsm_term_safely(bool term_safely)
fsm_term_safely_enabled = term_safely;
}
/*! Instead of deallocating FSM instances, move them to the given talloc context.
*
* It is the caller's responsibility to clear this context to actually free the memory of terminated FSM instances.
* Make sure to not talloc_free(ctx) itself before setting a different osmo_fsm_set_dealloc_ctx(). To clear a ctx
* without the need to call osmo_fsm_set_dealloc_ctx() again, rather use talloc_free_children(ctx).
*
* For example, to defer deallocation to the next osmo_select_main_ctx() iteration, set this to OTC_SELECT.
*
* Deferring deallocation is the simplest solution to avoid most use-after-free problems from FSM instance deallocation.
* This is a simpler and more general solution than osmo_fsm_term_safely().
*
* To disable the feature again, pass NULL as ctx.
*
* Both osmo_fsm_term_safely() and osmo_fsm_set_dealloc_ctx() can be enabled at the same time, which will result in
* first collecting deallocated FSM instances in fsm_term_safely.collect_ctx, and finally reparenting that to the ctx
* passed here. However, in practice, it does not really make sense to enable both at the same time.
*
* \param ctx[in] Instead of talloc_free()int, talloc_steal() all future deallocated osmo_fsm_inst instances to this
* ctx. If NULL, go back to talloc_free() as usual.
*/
void osmo_fsm_set_dealloc_ctx(void *ctx)
{
fsm_term_safely.fsm_dealloc_ctx = ctx;
}
/*! talloc_free() the given object immediately, or once ongoing FSM terminations are done.
*
* If an FSM deallocation cascade is ongoing, talloc_steal() the given talloc_object into the talloc context that is
@ -185,7 +223,7 @@ void osmo_fsm_term_safely(bool term_safely)
static void osmo_fsm_defer_free(void *talloc_object)
{
if (!fsm_term_safely.depth) {
talloc_free(talloc_object);
fsm_free_or_steal(talloc_object);
return;
}
@ -412,7 +450,7 @@ struct osmo_fsm_inst *osmo_fsm_inst_alloc(struct osmo_fsm *fsm, void *ctx, void
osmo_timer_setup(&fi->timer, fsm_tmr_cb, fi);
if (osmo_fsm_inst_update_id(fi, id) < 0) {
talloc_free(fi);
fsm_free_or_steal(fi);
return NULL;
}
@ -529,11 +567,11 @@ void osmo_fsm_inst_free(struct osmo_fsm_inst *fi)
* deallocate separately to avoid use-after-free errors, put it in there and deallocate all at once. */
LOGPFSM(fi, "Deallocated, including all deferred deallocations\n");
osmo_fsm_defer_free(fi);
talloc_free(fsm_term_safely.collect_ctx);
fsm_free_or_steal(fsm_term_safely.collect_ctx);
fsm_term_safely.collect_ctx = NULL;
} else {
LOGPFSM(fi, "Deallocated\n");
talloc_free(fi);
fsm_free_or_steal(fi);
}
fsm_term_safely.root_fi = NULL;
}

View File

@ -355,7 +355,7 @@ void obj_term(struct obj *obj)
osmo_fsm_inst_term(obj->fi, OSMO_FSM_TERM_REGULAR, NULL);
}
void test_dealloc(enum objname trigger, bool by_destroy_event)
void test_dealloc(enum objname trigger, bool by_destroy_event, void *loop_ctx)
{
struct scene *s = scene_alloc();
const char *label = by_destroy_event ? "destroy-event" : "term";
@ -383,16 +383,63 @@ void test_dealloc(enum objname trigger, bool by_destroy_event)
LOGP(DLGLOBAL, LOGL_DEBUG, "--- %d objects remain. cleaning up\n", remain);
} else
LOGP(DLGLOBAL, LOGL_DEBUG, "--- all deallocated.\n");
if (loop_ctx) {
fprintf(stderr, "*** loop_ctx contains %zu blocks, deallocating.\n",
talloc_total_blocks(loop_ctx));
talloc_free_children(loop_ctx);
}
/* Silently free the remaining objects. */
scene_clean(s);
if (loop_ctx)
talloc_free_children(loop_ctx);
}
static void trigger_tests(void *loop_ctx)
{
size_t ctx_blocks;
size_t ctx_size;
enum objname trigger;
int by_destroy_event;
ctx_blocks = talloc_total_blocks(ctx);
ctx_size = talloc_total_size(ctx);
for (trigger = 0; trigger < scene_size; trigger++) {
for (by_destroy_event = 0; by_destroy_event < 2; by_destroy_event++) {
test_dealloc(trigger, (bool)by_destroy_event, loop_ctx);
if (ctx_blocks != talloc_total_blocks(ctx)
|| ctx_size != talloc_total_size(ctx)) {
talloc_report_full(ctx, stderr);
OSMO_ASSERT(false);
}
}
}
}
void test_osmo_fsm_term_safely()
{
fprintf(stderr, "\n\n%s()\n", __func__);
osmo_fsm_term_safely(true);
trigger_tests(NULL);
osmo_fsm_term_safely(false);
fprintf(stderr, "\n\n%s() done\n", __func__);
}
void test_osmo_fsm_set_dealloc_ctx()
{
fprintf(stderr, "\n\n%s()\n", __func__);
void *dealloc_ctx = talloc_named_const(ctx, 0, "fsm_dealloc");
osmo_fsm_set_dealloc_ctx(dealloc_ctx);
trigger_tests(dealloc_ctx);
osmo_fsm_set_dealloc_ctx(NULL);
fprintf(stderr, "\n\n%s() done\n", __func__);
}
int main(void)
{
enum objname trigger;
size_t ctx_blocks;
size_t ctx_size;
int by_destroy_event;
ctx = talloc_named_const(NULL, 0, "main");
osmo_init_logging2(ctx, NULL);
@ -405,22 +452,10 @@ int main(void)
log_set_category_filter(osmo_stderr_target, DLGLOBAL, 1, LOGL_DEBUG);
osmo_fsm_term_safely(true);
osmo_fsm_register(&test_fsm);
ctx_blocks = talloc_total_blocks(ctx);
ctx_size = talloc_total_size(ctx);
for (trigger = 0; trigger < scene_size; trigger++) {
for (by_destroy_event = 0; by_destroy_event < 2; by_destroy_event++) {
test_dealloc(trigger, (bool)by_destroy_event);
if (ctx_blocks != talloc_total_blocks(ctx)
|| ctx_size != talloc_total_size(ctx)) {
talloc_report_full(ctx, stderr);
OSMO_ASSERT(false);
}
}
}
test_osmo_fsm_term_safely();
test_osmo_fsm_set_dealloc_ctx();
talloc_free(ctx);
return 0;

File diff suppressed because it is too large Load Diff