amf,mme: defer admin purge to event queue + add MME event-id alignment guard

Two follow-ups to the admin-endpoints series, addressing concerns that
surfaced in pre-submission review:

1. Defer the ?purge=true cleanup off the SBI handler stack frame.

   The MME and AMF admin handlers previously called mme_ue_remove() /
   amf_ue_remove() inline after sending the 204 response. Open5GS
   schedules cross-event references via pool ids rather than raw
   pointers, so this is not a textbook use-after-free, but it does
   break the convention that every other state-removing trigger
   in these NFs (T3412/T3512 expiry, S6a/UDM Cancel, ordinary
   detach/deregistration) follows: state teardown happens in the
   main event loop, not from inside an inbound message handler.

   This commit adds two new event types --
   MME_EVENT_ADMIN_UE_PURGE and AMF_EVENT_ADMIN_UE_PURGE -- and posts
   them onto ogs_app()->queue right after the 204 has been put on the
   wire. The respective state_operational() handler re-resolves the
   pool id via mme_ue_find_by_id() / amf_ue_find_by_id() and silently
   no-ops if the context disappeared in the meantime, then calls the
   normal _remove() path from a clean stack frame.

   This isolates the heavy cleanup (timer cancellation, child-list
   teardown, pool free) from any state the SBI handler still holds
   on its stack, and matches the dispatch convention reviewers
   expect from cleanup paths in the project.

2. Compile-time alignment guard for the MME event header.

   mme-init.c reads ((ogs_event_t *)e)->id before the FSM dispatch
   to divert OGS_EVENT_SBI_SERVER events to the synchronous admin
   handler. That cast is well-defined only because both ogs_event_t
   and mme_event_t place int id at offset 0 -- a convention
   maintained today by manual care. Adding
   OGS_STATIC_ASSERT(offsetof(mme_event_t, id) ==
   offsetof(ogs_event_t, id)) turns the convention into a build-time
   check so any future reordering of mme_event_t breaks the build
   instead of silently corrupting the diversion logic.

   AMF does not need an equivalent assert because amf_event_t embeds
   ogs_event_t as its first member (`ogs_event_t h;`); the cast is
   trivially well-defined there.

Signed-off-by: MrBrownieCGN <daniel.brune.so@outlook.com>
This commit is contained in:
MrBrownieCGN 2026-05-02 20:39:43 +02:00
parent 011284b3f6
commit df6a77bc0e
8 changed files with 159 additions and 2 deletions

View file

@ -19,8 +19,34 @@
#include "admin-handler.h"
#include "amf-sm.h"
#include "event.h"
#include "nas-path.h"
/*
* Post a deferred-purge event so the heavy amf_ue_remove() cleanup
* runs from the main loop on a clean stack frame never from inside
* the SBI handler. The pool id is opaque to ogs_app()->queue;
* amf_state_operational() re-resolves it via amf_ue_find_by_id() and
* silently no-ops if the context has already been removed by another
* code path between the POST and dispatch.
*/
void amf_admin_post_purge(ogs_pool_id_t amf_ue_id)
{
amf_event_t *e;
int rv;
e = amf_event_new(AMF_EVENT_ADMIN_UE_PURGE);
ogs_assert(e);
e->amf_ue_id = amf_ue_id;
rv = ogs_queue_push(ogs_app()->queue, e);
if (rv != OGS_OK) {
ogs_error("ogs_queue_push() failed [%d] — admin purge dropped "
"for amf_ue_id=%d", rv, (int)amf_ue_id);
ogs_event_free(e);
}
}
void amf_admin_handle_delete_ue_context(
ogs_sbi_stream_t *stream, ogs_sbi_message_t *message,
ogs_sbi_request_t *request)
@ -104,7 +130,23 @@ void amf_admin_handle_delete_ue_context(
ogs_assert(true ==
ogs_sbi_server_send_response(stream, response));
amf_ue_remove(amf_ue);
/*
* Defer amf_ue_remove() to the next main-loop tick via an
* AMF_EVENT_ADMIN_UE_PURGE event. Calling amf_ue_remove()
* synchronously from the SBI handler would tear down the
* context from inside the FSM dispatch frame that brought
* us here; running the heavy cleanup from a clean stack
* frame (after the 204 has already been put on the wire)
* eliminates re-entrancy concerns and matches the
* teardown convention for every other state-removing
* trigger in the AMF (timer expiry, UDM Cancel, etc.).
*
* The event carries the amf_ue's pool id (not a raw
* pointer); the FSM handler re-resolves it via
* amf_ue_find_by_id() and bails out gracefully if the
* context disappeared in the meantime.
*/
amf_admin_post_purge((ogs_pool_id_t)amf_ue->id);
return;
}
}

View file

@ -45,6 +45,13 @@ void amf_admin_handle_delete_ue_context(
ogs_sbi_stream_t *stream, ogs_sbi_message_t *message,
ogs_sbi_request_t *request);
/*
* Internal helper: post a deferred amf_ue_remove() onto the main app
* event queue (AMF_EVENT_ADMIN_UE_PURGE). Used by the ?purge=true
* path to keep state teardown out of the SBI handler's stack frame.
*/
void amf_admin_post_purge(ogs_pool_id_t amf_ue_id);
#ifdef __cplusplus
}
#endif

View file

@ -1077,6 +1077,28 @@ void amf_state_operational(ogs_fsm_t *s, amf_event_t *e)
ogs_free(addr);
break;
case AMF_EVENT_ADMIN_UE_PURGE:
/*
* Deferred purge from DELETE /admin/v1/ue-contexts/{id}?purge=true.
* Re-resolve the pool id from a clean stack frame so state
* teardown never runs from inside the SBI handler. Silently
* no-op if the context disappeared in the meantime another
* code path (UDM Cancel, T3512 expiry, ordinary deregistration)
* may have raced us to the cleanup.
*/
amf_ue = amf_ue_find_by_id(e->amf_ue_id);
if (!amf_ue) {
ogs_info("Admin purge: amf_ue_id=%d already gone — no-op",
(int)e->amf_ue_id);
break;
}
ogs_warn("[%s] Admin purge: removing local context for "
"amf_ue_id=%d (deferred from SBI handler)",
amf_ue->supi ? amf_ue->supi : "unknown",
(int)e->amf_ue_id);
amf_ue_remove(amf_ue);
break;
case AMF_EVENT_NGAP_MESSAGE:
sock = e->ngap.sock;
ogs_assert(sock);

View file

@ -50,6 +50,8 @@ typedef enum {
AMF_EVENT_5GSM_MESSAGE,
AMF_EVENT_5GSM_TIMER,
AMF_EVENT_ADMIN_UE_PURGE,
MAX_NUM_OF_AMF_EVENT,
} amf_event_e;

View file

@ -36,6 +36,31 @@ static void handle_admin_route(
ogs_sbi_stream_t *stream, ogs_sbi_message_t *message,
ogs_sbi_request_t *request);
/*
* Post a deferred-purge event so the heavy mme_ue_remove() cleanup
* runs from the main loop on a clean stack frame never from inside
* the SBI handler. The pool id is opaque to ogs_app()->queue;
* mme_state_operational() re-resolves it via mme_ue_find_by_id() and
* silently no-ops if the context has already been removed by another
* code path between the POST and dispatch.
*/
void mme_admin_post_purge(ogs_pool_id_t mme_ue_id)
{
mme_event_t *e;
int rv;
e = mme_event_new(MME_EVENT_ADMIN_UE_PURGE);
ogs_assert(e);
e->mme_ue_id = mme_ue_id;
rv = ogs_queue_push(ogs_app()->queue, e);
if (rv != OGS_OK) {
ogs_error("ogs_queue_push() failed [%d] — admin purge dropped "
"for mme_ue_id=%d", rv, (int)mme_ue_id);
mme_event_free(e);
}
}
int mme_admin_server_open(void)
{
if (!mme_self()->admin_config.enabled) {
@ -232,7 +257,25 @@ void mme_admin_handle_delete_ue_context(
ogs_assert(true ==
ogs_sbi_server_send_response(stream, response));
mme_ue_remove(mme_ue);
/*
* Defer mme_ue_remove() to the next main-loop tick via a
* MME_EVENT_ADMIN_UE_PURGE event. Calling mme_ue_remove()
* synchronously from the SBI handler would tear down a
* context that the same call frame still holds a pointer
* to, and Open5GS dispatches admin SBI events from
* mme_main() before any other queued events for this UE
* have been drained running the heavy cleanup from a
* clean stack frame, after the 204 has already been put on
* the wire, eliminates re-entrancy concerns and matches
* the pattern used by every other state-teardown trigger
* in the MME (timer expiry, S6a peer Cancel, etc.).
*
* The event carries the mme_ue's pool id (not a raw
* pointer); the FSM handler re-resolves it via
* mme_ue_find_by_id() and bails out gracefully if the
* context disappeared in the meantime.
*/
mme_admin_post_purge((ogs_pool_id_t)mme_ue->id);
return;
}
}

View file

@ -61,6 +61,13 @@ void mme_admin_handle_delete_ue_context(
ogs_sbi_stream_t *stream, ogs_sbi_message_t *message,
ogs_sbi_request_t *request);
/*
* Internal helper: post a deferred mme_ue_remove() onto the main app
* event queue (MME_EVENT_ADMIN_UE_PURGE). Used by the ?purge=true
* path to keep state teardown out of the SBI handler's stack frame.
*/
void mme_admin_post_purge(ogs_pool_id_t mme_ue_id);
#ifdef __cplusplus
}
#endif

View file

@ -53,6 +53,8 @@ typedef enum {
MME_EVENT_GN_MESSAGE,
MME_EVENT_GN_TIMER,
MME_EVENT_ADMIN_UE_PURGE,
MAX_NUM_OF_MME_EVENT,
} mme_event_e;
@ -106,6 +108,16 @@ typedef struct mme_event_s {
OGS_STATIC_ASSERT(OGS_EVENT_SIZE >= sizeof(mme_event_t));
/*
* The admin SBI handler in mme-init.c reads the event id via
* ((ogs_event_t *)e)->id before deciding whether to dispatch through
* the FSM. That cast is well-defined only as long as the int id field
* lives at offset 0 of both layouts. Open5GS keeps ogs_event_t and
* mme_event_t in sync by convention; this static assert turns that
* convention into a compile-time guarantee.
*/
OGS_STATIC_ASSERT(offsetof(mme_event_t, id) == offsetof(ogs_event_t, id));
void mme_event_term(void);
mme_event_t *mme_event_new(mme_event_e id);

View file

@ -520,6 +520,28 @@ void mme_state_operational(ogs_fsm_t *s, mme_event_t *e)
ogs_fsm_dispatch(&bearer->sm, e);
break;
case MME_EVENT_ADMIN_UE_PURGE:
/*
* Deferred purge from DELETE /admin/v1/ue-contexts/{id}?purge=true.
* Re-resolve the pool id from a clean stack frame so state
* teardown never runs from inside the SBI handler. Silently
* no-op if the context disappeared in the meantime another
* code path (ordinary detach, T3412 expiry, peer Cancel)
* may have raced us to the cleanup.
*/
mme_ue = mme_ue_find_by_id(e->mme_ue_id);
if (!mme_ue) {
ogs_info("Admin purge: mme_ue_id=%d already gone — no-op",
(int)e->mme_ue_id);
break;
}
ogs_warn("[%s] Admin purge: removing local context for "
"mme_ue_id=%d (deferred from SBI handler)",
mme_ue->imsi_bcd[0] ? mme_ue->imsi_bcd : "unknown",
(int)e->mme_ue_id);
mme_ue_remove(mme_ue);
break;
case MME_EVENT_S6A_MESSAGE:
s6a_message = e->s6a_message;
ogs_assert(s6a_message);