mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-10 16:44:40 +00:00
use multithread disk r/w test
This commit is contained in:
parent
f7507ec20b
commit
740f7f0b95
3 changed files with 44 additions and 21 deletions
|
@ -441,7 +441,21 @@ uint64_t device_swap_memory(bool available) {
|
||||||
return swap_memory;
|
return swap_memory;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void external_fio_impl(float * read_bw, float * write_bw, bool op_rand) {
|
static size_t get_page_size() {
|
||||||
|
size_t page_size = 0;
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
SYSTEM_INFO si;
|
||||||
|
GetSystemInfo(&si);
|
||||||
|
page_size = si.dwPageSize;
|
||||||
|
#elif defined(__APPLE__) || defined(__linux__)
|
||||||
|
page_size = sysconf(_SC_PAGESIZE);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return page_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void external_fio_impl(float * read_bw, float * write_bw, bool op_rand, int n_threads) {
|
||||||
const char * test_file = "fio_test";
|
const char * test_file = "fio_test";
|
||||||
const char * fio_conf_template = R"(
|
const char * fio_conf_template = R"(
|
||||||
[global]
|
[global]
|
||||||
|
@ -456,22 +470,37 @@ group_reporting=1
|
||||||
rw=%s
|
rw=%s
|
||||||
bs=%s
|
bs=%s
|
||||||
filename=%s
|
filename=%s
|
||||||
|
numjobs=%d
|
||||||
|
|
||||||
[write-job]
|
[write-job]
|
||||||
rw=%s
|
rw=%s
|
||||||
bs=%s
|
bs=%s
|
||||||
filename=%s
|
filename=%s
|
||||||
|
numjobs=%d
|
||||||
)";
|
)";
|
||||||
|
|
||||||
|
size_t page_size = get_page_size();
|
||||||
|
if (page_size == 0) {
|
||||||
|
LOG_INF("Error: Unable to get system page size\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// format the page size as a readable string (e.g., "16k" or "4k")
|
||||||
|
char page_size_str[8];
|
||||||
|
if (page_size >= 1024) {
|
||||||
|
snprintf(page_size_str, sizeof(page_size_str), "%zuk", page_size / 1024);
|
||||||
|
} else {
|
||||||
|
snprintf(page_size_str, sizeof(page_size_str), "%zu", page_size);
|
||||||
|
}
|
||||||
|
|
||||||
const char * read_type = op_rand ? "randread" : "read";
|
const char * read_type = op_rand ? "randread" : "read";
|
||||||
const char * write_type = op_rand ? "randwrite" : "write";
|
const char * write_type = op_rand ? "randwrite" : "write";
|
||||||
const char * block_size = op_rand ? "4k" : "1M";
|
const char * block_size = op_rand ? page_size_str : "1M";
|
||||||
|
|
||||||
// write config to a file
|
// write config to a file
|
||||||
char fio_conf[1024];
|
char fio_conf[1024];
|
||||||
snprintf(fio_conf, sizeof(fio_conf), fio_conf_template,
|
snprintf(fio_conf, sizeof(fio_conf), fio_conf_template,
|
||||||
read_type, block_size, test_file,
|
read_type, block_size, test_file, n_threads,
|
||||||
write_type, block_size, test_file);
|
write_type, block_size, test_file, n_threads);
|
||||||
const char * conf_file = "config.fio";
|
const char * conf_file = "config.fio";
|
||||||
std::ofstream conf(conf_file);
|
std::ofstream conf(conf_file);
|
||||||
if (!conf) {
|
if (!conf) {
|
||||||
|
@ -529,12 +558,12 @@ filename=%s
|
||||||
std::remove(output_file);
|
std::remove(output_file);
|
||||||
}
|
}
|
||||||
|
|
||||||
void device_disk_rnd_bw(float * read_rnd_bw, float * write_rnd_bw) {
|
void device_disk_rnd_bw(float * read_rnd_bw, float * write_rnd_bw, int n_threads) {
|
||||||
external_fio_impl(read_rnd_bw, write_rnd_bw, true);
|
external_fio_impl(read_rnd_bw, write_rnd_bw, true, n_threads);
|
||||||
}
|
}
|
||||||
|
|
||||||
void device_disk_seq_bw(float * read_seq_bw, float * write_seq_bw) {
|
void device_disk_seq_bw(float * read_seq_bw, float * write_seq_bw, int n_threads) {
|
||||||
external_fio_impl(read_seq_bw, write_seq_bw, false);
|
external_fio_impl(read_seq_bw, write_seq_bw, false, n_threads);
|
||||||
}
|
}
|
||||||
|
|
||||||
float device_memory_bw(int n_thread) {
|
float device_memory_bw(int n_thread) {
|
||||||
|
@ -762,16 +791,11 @@ static float device_disk_access_delay(struct device_info & dev_info, int n_layer
|
||||||
|
|
||||||
float total_gbytes = (double)total_bytes / 1e9; // convert to GB
|
float total_gbytes = (double)total_bytes / 1e9; // convert to GB
|
||||||
float mem_avail = dev_info.memory.available_physical * 1024.0f * 1024.0f * 1024.0f / 1e9; // convert to GB
|
float mem_avail = dev_info.memory.available_physical * 1024.0f * 1024.0f * 1024.0f / 1e9; // convert to GB
|
||||||
float disk_read_bw = dev_info.disk.read_seq_bw; // GB/s
|
// todo: consider activations which also consumes the available memory
|
||||||
|
float disk_read_bw = dev_info.disk.read_rnd_bw; // GB/s
|
||||||
return std::max(0.0, static_cast<double>(total_gbytes - mem_avail) / disk_read_bw * 1000); // convert to ms
|
return std::max(0.0, static_cast<double>(total_gbytes - mem_avail) / disk_read_bw * 1000); // convert to ms
|
||||||
}
|
}
|
||||||
|
|
||||||
static float device_swap_access_delay(struct device_info & dev_info, int n_layers) {
|
|
||||||
(void)dev_info;
|
|
||||||
(void)n_layers;
|
|
||||||
return 0.0f;
|
|
||||||
}
|
|
||||||
|
|
||||||
void device_print_props(struct device_info * dev_info_set, int n, struct llama_model * model) {
|
void device_print_props(struct device_info * dev_info_set, int n, struct llama_model * model) {
|
||||||
LOG_INF("\n-------------------------------------------------------------------------------------------\n");
|
LOG_INF("\n-------------------------------------------------------------------------------------------\n");
|
||||||
LOG_INF("| Property ");
|
LOG_INF("| Property ");
|
||||||
|
@ -1127,7 +1151,6 @@ void device_print_props(struct device_info * dev_info_set, int n, struct llama_m
|
||||||
latency += device_compute_delay(dev_info_set[0], n_layers);
|
latency += device_compute_delay(dev_info_set[0], n_layers);
|
||||||
latency += device_memory_access_delay(dev_info_set[0], n_layers);
|
latency += device_memory_access_delay(dev_info_set[0], n_layers);
|
||||||
latency += device_disk_access_delay(dev_info_set[0], n_layers); // if physical memory is not enough, some tensor weights will be released from memory and reloaded by mmap later
|
latency += device_disk_access_delay(dev_info_set[0], n_layers); // if physical memory is not enough, some tensor weights will be released from memory and reloaded by mmap later
|
||||||
latency += device_swap_access_delay(dev_info_set[0], n_layers); // if physical memory is not enough, activations will be stored in swap, which causes additional disk io with random access
|
|
||||||
|
|
||||||
LOG_INF("| Token latency (ms) ");
|
LOG_INF("| Token latency (ms) ");
|
||||||
LOG_INF("| %-10.2f ", latency);
|
LOG_INF("| %-10.2f ", latency);
|
||||||
|
|
|
@ -217,8 +217,8 @@ float device_cuda_flops (struct llama_model * model, enum ggml_type src0
|
||||||
float device_inp_embd_delay (struct llama_model * model, enum ggml_type src0t, int n_tokens, int n_threads);
|
float device_inp_embd_delay (struct llama_model * model, enum ggml_type src0t, int n_tokens, int n_threads);
|
||||||
uint64_t device_physical_memory (bool available);
|
uint64_t device_physical_memory (bool available);
|
||||||
uint64_t device_swap_memory (bool available);
|
uint64_t device_swap_memory (bool available);
|
||||||
void device_disk_seq_bw (float * read_seq_bw, float * write_seq_bw);
|
void device_disk_seq_bw (float * read_seq_bw, float * write_seq_bw, int n_threads);
|
||||||
void device_disk_rnd_bw (float * read_rnd_bw, float * write_rnd_bw);
|
void device_disk_rnd_bw (float * read_rnd_bw, float * write_rnd_bw, int n_threads);
|
||||||
float device_memory_bw (int n_thread);
|
float device_memory_bw (int n_thread);
|
||||||
float device_cuda_memory_bw (struct llama_model * model);
|
float device_cuda_memory_bw (struct llama_model * model);
|
||||||
void device_get_props (struct llama_model * model, int device, struct ggml_backend_dev_props * props);
|
void device_get_props (struct llama_model * model, int device, struct ggml_backend_dev_props * props);
|
||||||
|
|
|
@ -3562,8 +3562,8 @@ void llama_profile_device(device_info * dev_info, struct llama_model * model, ll
|
||||||
dev_info->memory.available_swap = round(device_swap_memory(true) / (double)(1 << 30) * 100) / 100;
|
dev_info->memory.available_swap = round(device_swap_memory(true) / (double)(1 << 30) * 100) / 100;
|
||||||
dev_info->memory.read_bandwidth = device_memory_bw(n_threads);
|
dev_info->memory.read_bandwidth = device_memory_bw(n_threads);
|
||||||
|
|
||||||
device_disk_seq_bw(&dev_info->disk.read_seq_bw, &dev_info->disk.write_seq_bw);
|
device_disk_seq_bw(&dev_info->disk.read_seq_bw, &dev_info->disk.write_seq_bw, n_threads);
|
||||||
device_disk_rnd_bw(&dev_info->disk.read_rnd_bw, &dev_info->disk.write_rnd_bw);
|
device_disk_rnd_bw(&dev_info->disk.read_rnd_bw, &dev_info->disk.write_rnd_bw, n_threads);
|
||||||
|
|
||||||
dev_info->gpu_support.metal = device_has_metal();
|
dev_info->gpu_support.metal = device_has_metal();
|
||||||
dev_info->gpu_support.cuda = device_has_cuda();
|
dev_info->gpu_support.cuda = device_has_cuda();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue