speculative: free sockets and send stop signal when inference ends

This commit is contained in:
Li, Zonghang 2025-06-13 13:30:29 +04:00
parent dc875bbef9
commit c9cae626cf

View file

@ -628,6 +628,9 @@ int main(int argc, char ** argv) {
LOG_INF("target:\n\n");
gpt_perf_print(ctx_tgt, smpl);
char * stop_signal = nullptr;
llama_free_sockets(ctx_tgt, &stop_signal);
gpt_sampler_free(smpl);
for (int s = 0; s < n_seq_dft; ++s) {
gpt_sampler_free(drafts[s].smpl);