mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-10 17:44:33 +00:00
llama_profile_device: add arg n_predict
This commit is contained in:
parent
a46d56cc60
commit
cd823546dd
3 changed files with 4 additions and 3 deletions
|
@ -896,7 +896,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
|
||||||
|
|
||||||
device_info dev_info;
|
device_info dev_info;
|
||||||
dev_info.rank = params.rank;
|
dev_info.rank = params.rank;
|
||||||
llama_profile_device(&dev_info, model, ml, params.cpuparams.n_threads);
|
llama_profile_device(&dev_info, model, ml, params.n_predict, params.cpuparams.n_threads);
|
||||||
|
|
||||||
// create llama context
|
// create llama context
|
||||||
struct llama_context_params cparams = llama_context_params_from_gpt_params(params);
|
struct llama_context_params cparams = llama_context_params_from_gpt_params(params);
|
||||||
|
|
|
@ -415,6 +415,7 @@ extern "C" {
|
||||||
struct device_info * dev_info,
|
struct device_info * dev_info,
|
||||||
struct llama_model * model,
|
struct llama_model * model,
|
||||||
struct llama_model_loader * ml,
|
struct llama_model_loader * ml,
|
||||||
|
int n_predict,
|
||||||
int n_threads);
|
int n_threads);
|
||||||
|
|
||||||
LLAMA_API ggml_backend_buffer_type_t llama_dev_buffer_type(struct llama_model * model, int device);
|
LLAMA_API ggml_backend_buffer_type_t llama_dev_buffer_type(struct llama_model * model, int device);
|
||||||
|
|
|
@ -3570,7 +3570,7 @@ static bool is_dtype_exist(struct model_params * n_params, enum ggml_type dtype)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void llama_profile_device(device_info * dev_info, struct llama_model * model, llama_model_loader * ml, int n_threads) {
|
void llama_profile_device(device_info * dev_info, struct llama_model * model, llama_model_loader * ml, int n_predict, int n_threads) {
|
||||||
dev_info->device_name = device_name();
|
dev_info->device_name = device_name();
|
||||||
dev_info->cpu_props.cores = device_cpu_cores();
|
dev_info->cpu_props.cores = device_cpu_cores();
|
||||||
|
|
||||||
|
@ -3584,7 +3584,7 @@ void llama_profile_device(device_info * dev_info, struct llama_model * model, ll
|
||||||
struct model_params * n_params = &dev_info->model_params;
|
struct model_params * n_params = &dev_info->model_params;
|
||||||
if (dev_info->rank == 0) {
|
if (dev_info->rank == 0) {
|
||||||
enum ggml_type inp_embd_dtype = GGML_TYPE_F32;
|
enum ggml_type inp_embd_dtype = GGML_TYPE_F32;
|
||||||
llama_model_n_flops(model, ml, n_flops, n_params, 1, 32, &inp_embd_dtype);
|
llama_model_n_flops(model, ml, n_flops, n_params, 1, n_predict, &inp_embd_dtype);
|
||||||
n_flops->inp_embd_ms = device_inp_embd_delay(model, inp_embd_dtype, 1, n_threads);
|
n_flops->inp_embd_ms = device_inp_embd_delay(model, inp_embd_dtype, 1, n_threads);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue