mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-08 15:49:03 +00:00
remove device_flops from profiler api
This commit is contained in:
parent
477ecf2084
commit
80f6b72e71
2 changed files with 30 additions and 31 deletions
|
@ -81,31 +81,7 @@ uint32_t device_cpu_cores() {
|
||||||
return core_count;
|
return core_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
float device_cpu_flops(struct llama_model * model, enum ggml_type dtype, int n_threads) {
|
static float device_flops(struct llama_model * model, enum ggml_type dtype, profiler_backend_type btype, int n_threads) {
|
||||||
return device_flops(model, dtype, PROFILER_BACKEND_TYPE_CPU, n_threads);
|
|
||||||
}
|
|
||||||
|
|
||||||
float device_metal_flops(struct llama_model * model, enum ggml_type dtype) {
|
|
||||||
#ifdef GGML_USE_METAL
|
|
||||||
return device_flops(model, dtype, PROFILER_BACKEND_TYPE_METAL, 4);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
(void)model;
|
|
||||||
(void)dtype;
|
|
||||||
return 0.0f;
|
|
||||||
}
|
|
||||||
|
|
||||||
float device_cuda_flops(struct llama_model * model, enum ggml_type dtype) {
|
|
||||||
#ifdef GGML_USE_CUDA
|
|
||||||
return device_flops(model, dtype, PROFILER_BACKEND_TYPE_CUDA, 4);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
(void)model;
|
|
||||||
(void)dtype;
|
|
||||||
return 0.0f;
|
|
||||||
}
|
|
||||||
|
|
||||||
float device_flops(struct llama_model * model, enum ggml_type dtype, profiler_backend_type btype, int n_threads) {
|
|
||||||
const int n_embd = llama_n_embd(model);
|
const int n_embd = llama_n_embd(model);
|
||||||
const int n_ff_hidden = llama_n_ff_hidden(model);
|
const int n_ff_hidden = llama_n_ff_hidden(model);
|
||||||
const int rows_A = n_embd, cols_A = n_ff_hidden;
|
const int rows_A = n_embd, cols_A = n_ff_hidden;
|
||||||
|
@ -197,6 +173,30 @@ float device_flops(struct llama_model * model, enum ggml_type dtype, profiler_ba
|
||||||
return (float)flops;
|
return (float)flops;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
float device_cpu_flops(struct llama_model * model, enum ggml_type dtype, int n_threads) {
|
||||||
|
return device_flops(model, dtype, PROFILER_BACKEND_TYPE_CPU, n_threads);
|
||||||
|
}
|
||||||
|
|
||||||
|
float device_metal_flops(struct llama_model * model, enum ggml_type dtype) {
|
||||||
|
#ifdef GGML_USE_METAL
|
||||||
|
return device_flops(model, dtype, PROFILER_BACKEND_TYPE_METAL, 4);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
(void)model;
|
||||||
|
(void)dtype;
|
||||||
|
return 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
float device_cuda_flops(struct llama_model * model, enum ggml_type dtype) {
|
||||||
|
#ifdef GGML_USE_CUDA
|
||||||
|
return device_flops(model, dtype, PROFILER_BACKEND_TYPE_CUDA, 4);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
(void)model;
|
||||||
|
(void)dtype;
|
||||||
|
return 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
uint64_t device_physical_memory(bool available) {
|
uint64_t device_physical_memory(bool available) {
|
||||||
uint64_t memory = 0;
|
uint64_t memory = 0;
|
||||||
|
|
||||||
|
@ -726,7 +726,7 @@ void deserialize(const char * buffer, struct device_info * dev_info) {
|
||||||
memcpy(&device_name_len, ptr, sizeof(size_t));
|
memcpy(&device_name_len, ptr, sizeof(size_t));
|
||||||
ptr += sizeof(size_t);
|
ptr += sizeof(size_t);
|
||||||
dev_info->device_name = (char *)malloc(device_name_len);
|
dev_info->device_name = (char *)malloc(device_name_len);
|
||||||
memcpy((void *)dev_info->device_name, ptr, device_name_len);
|
memcpy(const_cast<void*>(static_cast<const void*>(dev_info->device_name)), ptr, device_name_len);
|
||||||
ptr += device_name_len;
|
ptr += device_name_len;
|
||||||
|
|
||||||
// cpu_props.name
|
// cpu_props.name
|
||||||
|
@ -734,7 +734,7 @@ void deserialize(const char * buffer, struct device_info * dev_info) {
|
||||||
memcpy(&cpu_name_len, ptr, sizeof(size_t));
|
memcpy(&cpu_name_len, ptr, sizeof(size_t));
|
||||||
ptr += sizeof(size_t);
|
ptr += sizeof(size_t);
|
||||||
dev_info->cpu_props.name = (char *)malloc(cpu_name_len);
|
dev_info->cpu_props.name = (char *)malloc(cpu_name_len);
|
||||||
memcpy((void *)dev_info->cpu_props.name, ptr, cpu_name_len);
|
memcpy(const_cast<void*>(static_cast<const void*>(dev_info->cpu_props.name)), ptr, cpu_name_len);
|
||||||
ptr += cpu_name_len;
|
ptr += cpu_name_len;
|
||||||
|
|
||||||
// cpu_props.description
|
// cpu_props.description
|
||||||
|
@ -742,7 +742,7 @@ void deserialize(const char * buffer, struct device_info * dev_info) {
|
||||||
memcpy(&cpu_description_len, ptr, sizeof(size_t));
|
memcpy(&cpu_description_len, ptr, sizeof(size_t));
|
||||||
ptr += sizeof(size_t);
|
ptr += sizeof(size_t);
|
||||||
dev_info->cpu_props.description = (char *)malloc(cpu_description_len);
|
dev_info->cpu_props.description = (char *)malloc(cpu_description_len);
|
||||||
memcpy((void *)dev_info->cpu_props.description, ptr, cpu_description_len);
|
memcpy(const_cast<void*>(static_cast<const void*>(dev_info->cpu_props.description)), ptr, cpu_description_len);
|
||||||
ptr += cpu_description_len;
|
ptr += cpu_description_len;
|
||||||
|
|
||||||
// gpu_props.name
|
// gpu_props.name
|
||||||
|
@ -750,7 +750,7 @@ void deserialize(const char * buffer, struct device_info * dev_info) {
|
||||||
memcpy(&gpu_name_len, ptr, sizeof(size_t));
|
memcpy(&gpu_name_len, ptr, sizeof(size_t));
|
||||||
ptr += sizeof(size_t);
|
ptr += sizeof(size_t);
|
||||||
dev_info->gpu_props.name = (char *)malloc(gpu_name_len);
|
dev_info->gpu_props.name = (char *)malloc(gpu_name_len);
|
||||||
memcpy((void *)dev_info->gpu_props.name, ptr, gpu_name_len);
|
memcpy(const_cast<void*>(static_cast<const void*>(dev_info->gpu_props.name)), ptr, gpu_name_len);
|
||||||
ptr += gpu_name_len;
|
ptr += gpu_name_len;
|
||||||
|
|
||||||
// gpu_props.description
|
// gpu_props.description
|
||||||
|
@ -758,7 +758,7 @@ void deserialize(const char * buffer, struct device_info * dev_info) {
|
||||||
memcpy(&gpu_description_len, ptr, sizeof(size_t));
|
memcpy(&gpu_description_len, ptr, sizeof(size_t));
|
||||||
ptr += sizeof(size_t);
|
ptr += sizeof(size_t);
|
||||||
dev_info->gpu_props.description = (char *)malloc(gpu_description_len);
|
dev_info->gpu_props.description = (char *)malloc(gpu_description_len);
|
||||||
memcpy((void *)dev_info->gpu_props.description, ptr, gpu_description_len);
|
memcpy(const_cast<void*>(static_cast<const void*>(dev_info->gpu_props.description)), ptr, gpu_description_len);
|
||||||
ptr += gpu_description_len;
|
ptr += gpu_description_len;
|
||||||
|
|
||||||
// other non-string members
|
// other non-string members
|
||||||
|
|
|
@ -91,7 +91,6 @@ enum profiler_backend_type {
|
||||||
const char * device_name(void);
|
const char * device_name(void);
|
||||||
|
|
||||||
uint32_t device_cpu_cores (void);
|
uint32_t device_cpu_cores (void);
|
||||||
float device_flops (struct llama_model * model, enum ggml_type dtype, profiler_backend_type btype, int n_threads);
|
|
||||||
float device_cpu_flops (struct llama_model * model, enum ggml_type dtype, int n_threads);
|
float device_cpu_flops (struct llama_model * model, enum ggml_type dtype, int n_threads);
|
||||||
float device_metal_flops (struct llama_model * model, enum ggml_type dtype);
|
float device_metal_flops (struct llama_model * model, enum ggml_type dtype);
|
||||||
float device_cuda_flops (struct llama_model * model, enum ggml_type dtype);
|
float device_cuda_flops (struct llama_model * model, enum ggml_type dtype);
|
||||||
|
|
Loading…
Add table
Reference in a new issue