mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-11 12:14:34 +00:00
llama_model_n_flops: remove ctxs
This commit is contained in:
parent
f1c1d1b929
commit
a46d56cc60
1 changed files with 122 additions and 127 deletions
|
@ -3571,23 +3571,23 @@ static bool is_dtype_exist(struct model_params * n_params, enum ggml_type dtype)
|
||||||
}
|
}
|
||||||
|
|
||||||
void llama_profile_device(device_info * dev_info, struct llama_model * model, llama_model_loader * ml, int n_threads) {
|
void llama_profile_device(device_info * dev_info, struct llama_model * model, llama_model_loader * ml, int n_threads) {
|
||||||
struct model_flops * n_flops = &dev_info->model_flops;
|
|
||||||
struct model_params * n_params = &dev_info->model_params;
|
|
||||||
|
|
||||||
if (dev_info->rank == 0) {
|
|
||||||
enum ggml_type inp_embd_dtype = GGML_TYPE_F32;
|
|
||||||
llama_model_n_flops(model, ml, n_flops, n_params, 1, 32, &inp_embd_dtype);
|
|
||||||
n_flops->inp_embd_ms = device_inp_embd_delay(model, inp_embd_dtype, 1, n_threads);
|
|
||||||
}
|
|
||||||
|
|
||||||
dev_info->device_name = device_name();
|
dev_info->device_name = device_name();
|
||||||
dev_info->cpu_props.cores = device_cpu_cores();
|
dev_info->cpu_props.cores = device_cpu_cores();
|
||||||
|
|
||||||
dev_info->memory.total_physical = round(device_physical_memory(false) / (double)(1 << 30) * 100) / 100;
|
dev_info->memory.total_physical = round(device_physical_memory(false) / (double)(1 << 30) * 100) / 100;
|
||||||
dev_info->memory.available_physical = round(device_physical_memory(true) / (double)(1 << 30) * 100) / 100;
|
dev_info->memory.available_physical = round(device_physical_memory(true) / (double)(1 << 30) * 100) / 100;
|
||||||
dev_info->memory.total_swap = round(device_swap_memory(false) / (double)(1 << 30) * 100) / 100;
|
dev_info->memory.total_swap = round(device_swap_memory(false) / (double)(1 << 30) * 100) / 100;
|
||||||
dev_info->memory.available_swap = round(device_swap_memory(true) / (double)(1 << 30) * 100) / 100;
|
dev_info->memory.available_swap = round(device_swap_memory(true) / (double)(1 << 30) * 100) / 100;
|
||||||
dev_info->memory.cpu_read_ram_bw = device_memory_bw(n_threads);
|
dev_info->memory.cpu_read_ram_bw = device_memory_bw(n_threads);
|
||||||
|
|
||||||
|
struct model_flops * n_flops = &dev_info->model_flops;
|
||||||
|
struct model_params * n_params = &dev_info->model_params;
|
||||||
|
if (dev_info->rank == 0) {
|
||||||
|
enum ggml_type inp_embd_dtype = GGML_TYPE_F32;
|
||||||
|
llama_model_n_flops(model, ml, n_flops, n_params, 1, 32, &inp_embd_dtype);
|
||||||
|
n_flops->inp_embd_ms = device_inp_embd_delay(model, inp_embd_dtype, 1, n_threads);
|
||||||
|
}
|
||||||
|
|
||||||
device_disk_seq_bw(&dev_info->disk.read_seq_bw, &dev_info->disk.write_seq_bw, n_threads);
|
device_disk_seq_bw(&dev_info->disk.read_seq_bw, &dev_info->disk.write_seq_bw, n_threads);
|
||||||
device_disk_rnd_bw(&dev_info->disk.read_rnd_bw, &dev_info->disk.write_rnd_bw, n_threads);
|
device_disk_rnd_bw(&dev_info->disk.read_rnd_bw, &dev_info->disk.write_rnd_bw, n_threads);
|
||||||
|
|
||||||
|
@ -20966,6 +20966,7 @@ void llama_model_n_flops(
|
||||||
buft_layer_count[model->buft_layer[i].buft]++;
|
buft_layer_count[model->buft_layer[i].buft]++;
|
||||||
buft_layer_count[model->buft_layer[i].buft_matrix]++;
|
buft_layer_count[model->buft_layer[i].buft_matrix]++;
|
||||||
}
|
}
|
||||||
|
GGML_ASSERT(buft_layer_count.size() == 1);
|
||||||
|
|
||||||
// create one context per buffer type
|
// create one context per buffer type
|
||||||
size_t ctx_size = ggml_tensor_overhead() * (ml->n_tensors + 1);
|
size_t ctx_size = ggml_tensor_overhead() * (ml->n_tensors + 1);
|
||||||
|
@ -20974,19 +20975,18 @@ void llama_model_n_flops(
|
||||||
ctx_size += ggml_tensor_overhead() * n_layer * 3;
|
ctx_size += ggml_tensor_overhead() * n_layer * 3;
|
||||||
|
|
||||||
std::map<ggml_backend_buffer_type_t, ggml_context *> ctx_map;
|
std::map<ggml_backend_buffer_type_t, ggml_context *> ctx_map;
|
||||||
std::vector<struct ggml_context *> ctxs;
|
struct ggml_context * ctx = nullptr;
|
||||||
for (auto & it : buft_layer_count) {
|
for (auto & it : buft_layer_count) {
|
||||||
struct ggml_init_params params = {
|
struct ggml_init_params params = {
|
||||||
/*.mem_size =*/ ctx_size,
|
/*.mem_size =*/ ctx_size,
|
||||||
/*.mem_buffer =*/ NULL,
|
/*.mem_buffer =*/ NULL,
|
||||||
/*.no_alloc =*/ true,
|
/*.no_alloc =*/ true,
|
||||||
};
|
};
|
||||||
ggml_context * ctx = ggml_init(params);
|
ctx = ggml_init(params);
|
||||||
if (!ctx) {
|
if (!ctx) {
|
||||||
throw std::runtime_error(format("failed to create context\n"));
|
throw std::runtime_error(format("failed to create context\n"));
|
||||||
}
|
}
|
||||||
ctx_map[it.first] = ctx;
|
ctx_map[it.first] = ctx;
|
||||||
ctxs.push_back(ctx);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const uint32_t n_layer_window[32] = {(uint32_t)n_layer};
|
const uint32_t n_layer_window[32] = {(uint32_t)n_layer};
|
||||||
|
@ -21035,7 +21035,6 @@ void llama_model_n_flops(
|
||||||
{"blk.0.ffn_up_exps.weight", 24},
|
{"blk.0.ffn_up_exps.weight", 24},
|
||||||
};
|
};
|
||||||
|
|
||||||
for (ggml_context * ctx : ctxs) {
|
|
||||||
for (auto * cur = ggml_get_first_tensor(ctx); cur != NULL; cur = ggml_get_next_tensor(ctx, cur)) {
|
for (auto * cur = ggml_get_first_tensor(ctx); cur != NULL; cur = ggml_get_next_tensor(ctx, cur)) {
|
||||||
auto it = tensor_name_map.find(ggml_get_name(cur));
|
auto it = tensor_name_map.find(ggml_get_name(cur));
|
||||||
if (it != tensor_name_map.end()) {
|
if (it != tensor_name_map.end()) {
|
||||||
|
@ -21149,16 +21148,12 @@ void llama_model_n_flops(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// reset ml, model, and clear contexts
|
// reset ml, model, and clear contexts
|
||||||
ml->n_created = 0;
|
ml->n_created = 0;
|
||||||
ml->size_data = 0;
|
ml->size_data = 0;
|
||||||
llama_model_reset_tensors(model);
|
llama_model_reset_tensors(model);
|
||||||
for (ggml_context * ctx : ctxs) {
|
|
||||||
ggml_free(ctx);
|
ggml_free(ctx);
|
||||||
}
|
|
||||||
ctxs.clear();
|
|
||||||
ctx_map.clear();
|
ctx_map.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue