diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index 0120246d..7d188b56 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -691,23 +691,24 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par } } + if (my_rank == 0) { + // Required batch info: Operation scale, KV cache location, Logits calculation location + meta.n_ctx = n_ctx; + meta.n_tokens = batch.n_tokens; + meta.pos = batch.pos; + meta.logits = batch.logits; + meta.all_pos_0 = batch.all_pos_0; + meta.all_pos_1 = batch.all_pos_1; + meta.n_outputs = n_outputs; + meta.chunk_start_pos = start; + } + // other ranks need to know batch info { if (n_world > 1) { meta.n_ctx = n_ctx; if (my_rank == 0) { - // Required batch info: Operation scale, KV cache location, Logits calculation location - meta.n_tokens = batch.n_tokens; - meta.pos = batch.pos; - meta.logits = batch.logits; - - meta.all_pos_0 = batch.all_pos_0; - meta.all_pos_1 = batch.all_pos_1; - - meta.n_outputs = n_outputs; - meta.chunk_start_pos = start; - llama_send_meta(ctx, &meta); } else { if (llama_recv_meta(ctx, &meta) == -1) { diff --git a/src/llama.cpp b/src/llama.cpp index 9428e3bd..cc4e8ab2 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -18316,8 +18316,8 @@ static int llama_decode_internal( /* logits_all */ n_outputs == n_tokens_all); // reserve output buffer - if (my_rank == 0 && llama_output_reserve(lctx, n_outputs) < n_outputs) { - LLAMA_LOG_ERROR("%s: could not reserve space for batch with %u outputs\n", __func__, n_outputs); + if (my_rank == 0 && llama_output_reserve(lctx, n_outputs) < (size_t)n_outputs) { + LLAMA_LOG_ERROR("%s: could not reserve space for batch with %llu outputs\n", __func__, n_outputs); return -2; };