sbi: cleanup NRF subscriptions before NF re-registration

When an NF loses heartbeat and enters re-registration, existing
NRF subscription states tied to the previous NF instance remain
both remotely (NRF) and locally (subscription_data pool).

In environments with repeated heartbeat loss or timing races
(e.g., docker-compose deployments), this leads to continuous
re-subscription loops and unbounded growth of
subscription_data entries, eventually exhausting the pool and
triggering assertion failures in ogs_sbi_subscription_data_add().

This patch introduces a pre-registration cleanup mechanism:

- Send DELETE requests for all subscriptions associated with
  the NF instance before re-registration.
- Perform asynchronous local cleanup in the unsubscribe
  response handler (avoiding use-after-free and double free).
- Add duplicate DELETE guard using subscription flags.
- Improve logging visibility for subscription cleanup flow.

This ensures that stale NRF subscription states are removed
and prevents subscription_data pool exhaustion during
re-registration loops.

Issues: #4207
This commit is contained in:
Sukchan Lee 2026-02-12 16:14:59 +09:00
parent 51f2655333
commit b3169c8ee9
4 changed files with 112 additions and 0 deletions

View file

@ -2805,6 +2805,92 @@ void ogs_sbi_subscription_data_remove_all_by_nf_instance_id(
}
}
/*
* Send DELETE requests to NRF for all subscriptions belonging
* to the given NF instance before re-registration.
*
* This prevents subscription accumulation during repeated
* re-registration loops (e.g., heartbeat flapping).
*
* IMPORTANT:
* Local subscription_data MUST NOT be removed here.
* Cleanup is performed asynchronously in the unsubscribe
* response handler once NRF confirms deletion.
*/
void ogs_sbi_subscription_data_delete_and_remove_all_by_nf_instance_id(
const char *nf_instance_id)
{
ogs_sbi_subscription_data_t *subscription_data = NULL;
ogs_assert(nf_instance_id);
ogs_list_for_each(
&ogs_sbi_self()->subscription_data_list, subscription_data) {
if (!subscription_data->id) {
ogs_error("Skip subscription delete: id is NULL");
continue;
}
if (!subscription_data->req_nf_instance_id) {
ogs_error("Skip subscription delete: req_nf_instance_id is NULL");
continue;
}
if (!subscription_data->resource_uri) {
ogs_error("Skip subscription delete: resource_uri is NULL");
continue;
}
if (strcmp(subscription_data->req_nf_instance_id,
nf_instance_id) != 0) {
ogs_error("Skip subscription delete: nf_instance_id mismatch "
"[target:%s, current:%s]",
subscription_data->req_nf_instance_id, nf_instance_id);
continue;
}
/*
* Prevent duplicate DELETE transmissions.
* (Simple guard using existing state field or flag placeholder)
*/
if (subscription_data->flags & OGS_SBI_SUBSCRIPTION_DELETE_SENT) {
ogs_debug("[%s] Skip subscription delete: DELETE already sent",
subscription_data->id);
continue;
}
subscription_data->flags |= OGS_SBI_SUBSCRIPTION_DELETE_SENT;
/*
* If we have a subscription resource identifier,
* send DELETE to NRF to cleanup remote subscription state.
*
* Typical resource:
* /nnrf-nfm/v1/subscriptions/{subscriptionId}
*/
ogs_info("[%s] Sending NRF subscription DELETE before "
"NF re-registration", subscription_data->id);
/* Build DELETE request */
ogs_nnrf_nfm_send_nf_status_unsubscribe(subscription_data);
/*
* NOTE:
* Do NOT remove subscription_data here.
*
* Local cleanup is performed in the unsubscribe
* response handler once NRF confirms deletion.
*
* Removing here could lead to:
* - Use-after-free
* - Double free
* - Dangling transaction context
*/
}
}
void ogs_sbi_subscription_data_remove_all(void)
{
ogs_sbi_subscription_data_t *subscription_data = NULL;

View file

@ -335,6 +335,9 @@ typedef struct ogs_sbi_subscription_data_s {
ogs_timer_t *t_validity; /* check validation */
ogs_timer_t *t_patch; /* for sending PATCH */
#define OGS_SBI_SUBSCRIPTION_DELETE_SENT (1 << 0)
uint32_t flags; /* Subscription lifecycle flags */
char *id; /* SubscriptionId */
char *req_nf_instance_id; /* reqNfInstanceId */
OpenAPI_nf_type_e req_nf_type; /* reqNfType */
@ -640,6 +643,8 @@ void ogs_sbi_subscription_data_remove(
ogs_sbi_subscription_data_t *subscription_data);
void ogs_sbi_subscription_data_remove_all_by_nf_instance_id(
char *nf_instance_id);
void ogs_sbi_subscription_data_delete_and_remove_all_by_nf_instance_id(
const char *nf_instance_id);
void ogs_sbi_subscription_data_remove_all(void);
ogs_sbi_subscription_data_t *ogs_sbi_subscription_data_find(char *id);

View file

@ -432,6 +432,20 @@ void ogs_sbi_nf_state_registered(ogs_fsm_t *s, ogs_event_t *e)
NF_INSTANCE_ID(ogs_sbi_self()->nf_instance),
OpenAPI_nf_type_ToString(
NF_INSTANCE_TYPE(ogs_sbi_self()->nf_instance)));
/*
* In case of NF re-registration due to heartbeat loss, clear any
* local subscription bookkeeping tied to the current NF instance id.
*
* This prevents unbounded growth of subscription_data entries when
* re-registration and re-subscription loops happen (e.g., docker-compose
* timing/race), which could otherwise exhaust subscription_data_pool.
*/
if (ogs_sbi_self()->nf_instance && ogs_sbi_self()->nf_instance->id)
ogs_sbi_subscription_data_delete_and_remove_all_by_nf_instance_id(
ogs_sbi_self()->nf_instance->id);
OGS_FSM_TRAN(s, &ogs_sbi_nf_state_will_register);
break;