mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-06 18:39:04 +00:00
add f32, f16, q8, q4k speed test for cuda
This commit is contained in:
parent
f4260bb346
commit
10f6f92c7e
3 changed files with 59 additions and 12 deletions
|
@ -90,6 +90,8 @@ float device_metal_flops(struct llama_model * model, enum ggml_type dtype) {
|
||||||
return device_flops(model, dtype, PROFILER_BACKEND_TYPE_METAL, 4);
|
return device_flops(model, dtype, PROFILER_BACKEND_TYPE_METAL, 4);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
(void)model;
|
||||||
|
(void)dtype;
|
||||||
return 0.0f;
|
return 0.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -98,6 +100,8 @@ float device_cuda_flops(struct llama_model * model, enum ggml_type dtype) {
|
||||||
return device_flops(model, dtype, PROFILER_BACKEND_TYPE_CUDA, 4);
|
return device_flops(model, dtype, PROFILER_BACKEND_TYPE_CUDA, 4);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
(void)model;
|
||||||
|
(void)dtype;
|
||||||
return 0.0f;
|
return 0.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -578,15 +582,33 @@ void device_print_props(struct device_info * dev_info_set, int n) {
|
||||||
}
|
}
|
||||||
LOG_INF("\n");
|
LOG_INF("\n");
|
||||||
|
|
||||||
LOG_INF("| GPU Metal flops (GFLOPS) ");
|
LOG_INF("| Metal flops (F32, GFLOPS) ");
|
||||||
for (int i = 0; i < n; ++i) {
|
for (int i = 0; i < n; ++i) {
|
||||||
LOG_INF("| %-10.1f ", dev_info_set[i].gpu_props.metal_flops);
|
LOG_INF("| %-10.1f ", dev_info_set[i].gpu_props.metal_flops);
|
||||||
}
|
}
|
||||||
LOG_INF("\n");
|
LOG_INF("\n");
|
||||||
|
|
||||||
LOG_INF("| GPU CUDA flops (GFLOPS) ");
|
LOG_INF("| CUDA flops (F32, GFLOPS) ");
|
||||||
for (int i = 0; i < n; ++i) {
|
for (int i = 0; i < n; ++i) {
|
||||||
LOG_INF("| %-10.1f ", dev_info_set[i].gpu_props.cuda_flops);
|
LOG_INF("| %-10.1f ", dev_info_set[i].gpu_props.cuda_flops_f32);
|
||||||
|
}
|
||||||
|
LOG_INF("\n");
|
||||||
|
|
||||||
|
LOG_INF("| CUDA flops (F16, GFLOPS) ");
|
||||||
|
for (int i = 0; i < n; ++i) {
|
||||||
|
LOG_INF("| %-10.1f ", dev_info_set[i].gpu_props.cuda_flops_f16);
|
||||||
|
}
|
||||||
|
LOG_INF("\n");
|
||||||
|
|
||||||
|
LOG_INF("| CUDA flops (Q8_0, GFLOPS) ");
|
||||||
|
for (int i = 0; i < n; ++i) {
|
||||||
|
LOG_INF("| %-10.1f ", dev_info_set[i].gpu_props.cuda_flops_q8);
|
||||||
|
}
|
||||||
|
LOG_INF("\n");
|
||||||
|
|
||||||
|
LOG_INF("| CUDA flops (Q4_K, GFLOPS) ");
|
||||||
|
for (int i = 0; i < n; ++i) {
|
||||||
|
LOG_INF("| %-10.1f ", dev_info_set[i].gpu_props.cuda_flops_q4k);
|
||||||
}
|
}
|
||||||
LOG_INF("\n");
|
LOG_INF("\n");
|
||||||
|
|
||||||
|
@ -614,7 +636,8 @@ size_t serialize(const struct device_info * dev_info, char ** buffer) {
|
||||||
+ sizeof(float) * 2 // cpu_props.flops_f32 and cpu_props.flops_f16
|
+ sizeof(float) * 2 // cpu_props.flops_f32 and cpu_props.flops_f16
|
||||||
+ sizeof(struct memory_info)
|
+ sizeof(struct memory_info)
|
||||||
+ sizeof(struct gpu_support)
|
+ sizeof(struct gpu_support)
|
||||||
+ sizeof(float) * 4; // gpu_props.memory_free, gpu_props.memory_total, gpu_props.metal_flops, and gpu_props.cuda_flops
|
+ sizeof(float) * 7; // gpu_props.memory_free, gpu_props.memory_total, gpu_props.metal_flops,
|
||||||
|
// gpu_props.cuda_flops_f32, gpu_props.cuda_flops_f16, gpu_props.cuda_flops_q8, and gpu_props.cuda_flops_q4k
|
||||||
|
|
||||||
*buffer = (char *)malloc(total_size);
|
*buffer = (char *)malloc(total_size);
|
||||||
char * ptr = *buffer;
|
char * ptr = *buffer;
|
||||||
|
@ -677,7 +700,16 @@ size_t serialize(const struct device_info * dev_info, char ** buffer) {
|
||||||
memcpy(ptr, &dev_info->gpu_props.metal_flops, sizeof(float));
|
memcpy(ptr, &dev_info->gpu_props.metal_flops, sizeof(float));
|
||||||
ptr += sizeof(float);
|
ptr += sizeof(float);
|
||||||
|
|
||||||
memcpy(ptr, &dev_info->gpu_props.cuda_flops, sizeof(float));
|
memcpy(ptr, &dev_info->gpu_props.cuda_flops_f32, sizeof(float));
|
||||||
|
ptr += sizeof(float);
|
||||||
|
|
||||||
|
memcpy(ptr, &dev_info->gpu_props.cuda_flops_f16, sizeof(float));
|
||||||
|
ptr += sizeof(float);
|
||||||
|
|
||||||
|
memcpy(ptr, &dev_info->gpu_props.cuda_flops_q8, sizeof(float));
|
||||||
|
ptr += sizeof(float);
|
||||||
|
|
||||||
|
memcpy(ptr, &dev_info->gpu_props.cuda_flops_q4k, sizeof(float));
|
||||||
|
|
||||||
return total_size;
|
return total_size;
|
||||||
}
|
}
|
||||||
|
@ -757,5 +789,14 @@ void deserialize(const char * buffer, struct device_info * dev_info) {
|
||||||
memcpy(&dev_info->gpu_props.metal_flops, ptr, sizeof(float));
|
memcpy(&dev_info->gpu_props.metal_flops, ptr, sizeof(float));
|
||||||
ptr += sizeof(float);
|
ptr += sizeof(float);
|
||||||
|
|
||||||
memcpy(&dev_info->gpu_props.cuda_flops, ptr, sizeof(float));
|
memcpy(&dev_info->gpu_props.cuda_flops_f32, ptr, sizeof(float));
|
||||||
|
ptr += sizeof(float);
|
||||||
|
|
||||||
|
memcpy(&dev_info->gpu_props.cuda_flops_f16, ptr, sizeof(float));
|
||||||
|
ptr += sizeof(float);
|
||||||
|
|
||||||
|
memcpy(&dev_info->gpu_props.cuda_flops_q8, ptr, sizeof(float));
|
||||||
|
ptr += sizeof(float);
|
||||||
|
|
||||||
|
memcpy(&dev_info->gpu_props.cuda_flops_q4k, ptr, sizeof(float));
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,13 +42,16 @@ struct gpu_support {
|
||||||
struct gpu_props {
|
struct gpu_props {
|
||||||
const char * name;
|
const char * name;
|
||||||
const char * description;
|
const char * description;
|
||||||
float memory_free; // in GB
|
float memory_free; // in GB
|
||||||
float memory_total; // in GB
|
float memory_total; // in GB
|
||||||
float metal_flops; // in GFLOPS
|
float metal_flops; // in GFLOPS
|
||||||
float cuda_flops; // in GFLOPS
|
float cuda_flops_f32; // in GFLOPS
|
||||||
|
float cuda_flops_f16; // in GFLOPS
|
||||||
|
float cuda_flops_q8; // in GFLOPS
|
||||||
|
float cuda_flops_q4k; // in GFLOPS
|
||||||
|
|
||||||
gpu_props()
|
gpu_props()
|
||||||
: name(""), description(""), memory_free(0.0f), memory_total(0.0f), metal_flops(0.0f), cuda_flops(0.0f) {}
|
: name(""), description(""), memory_free(0.0f), memory_total(0.0f), metal_flops(0.0f), cuda_flops_f32(0.0f), cuda_flops_f16(0.0f), cuda_flops_q8(0.0f), cuda_flops_q4k(0.0f) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct device_info {
|
struct device_info {
|
||||||
|
|
|
@ -3582,7 +3582,10 @@ void llama_profile_device(device_info * dev_info, struct llama_model * model, co
|
||||||
dev_info->gpu_props.memory_free = round(gpu_props.memory_free / (double)(1 << 30) * 100) / 100;
|
dev_info->gpu_props.memory_free = round(gpu_props.memory_free / (double)(1 << 30) * 100) / 100;
|
||||||
dev_info->gpu_props.memory_total = round(gpu_props.memory_total / (double)(1 << 30) * 100) / 100;
|
dev_info->gpu_props.memory_total = round(gpu_props.memory_total / (double)(1 << 30) * 100) / 100;
|
||||||
dev_info->gpu_props.metal_flops = device_metal_flops(model, GGML_TYPE_F32);
|
dev_info->gpu_props.metal_flops = device_metal_flops(model, GGML_TYPE_F32);
|
||||||
dev_info->gpu_props.cuda_flops = device_cuda_flops(model, GGML_TYPE_F32);
|
dev_info->gpu_props.cuda_flops_f32 = device_cuda_flops(model, GGML_TYPE_F32);
|
||||||
|
dev_info->gpu_props.cuda_flops_f16 = device_cuda_flops(model, GGML_TYPE_F16);
|
||||||
|
dev_info->gpu_props.cuda_flops_q8 = device_cuda_flops(model, GGML_TYPE_Q8_0);
|
||||||
|
dev_info->gpu_props.cuda_flops_q4k = device_cuda_flops(model, GGML_TYPE_Q4_K);
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_backend_buffer_type_t llama_dev_buffer_type(struct llama_model * model, int device) {
|
ggml_backend_buffer_type_t llama_dev_buffer_type(struct llama_model * model, int device) {
|
||||||
|
|
Loading…
Add table
Reference in a new issue