speculative: free sockets and send stop signal when inference ends

This commit is contained in:
Li, Zonghang 2025-06-13 11:25:42 +04:00
parent dc875bbef9
commit 2687ef3126

View file

@ -628,6 +628,8 @@ int main(int argc, char ** argv) {
LOG_INF("target:\n\n");
gpt_perf_print(ctx_tgt, smpl);
llama_free_sockets(ctx_tgt, nullptr);
gpt_sampler_free(smpl);
for (int s = 0; s < n_seq_dft; ++s) {
gpt_sampler_free(drafts[s].smpl);