From 740f7f0b950896fb8cf8043302a13f5fa33c2233 Mon Sep 17 00:00:00 2001 From: Lizonghang <870644199@qq.com> Date: Wed, 27 Nov 2024 22:14:17 +0400 Subject: [PATCH] use multithread disk r/w test --- common/profiler.cpp | 57 +++++++++++++++++++++++++++++++-------------- common/profiler.h | 4 ++-- src/llama.cpp | 4 ++-- 3 files changed, 44 insertions(+), 21 deletions(-) diff --git a/common/profiler.cpp b/common/profiler.cpp index 9e7094b0..72bf68dc 100644 --- a/common/profiler.cpp +++ b/common/profiler.cpp @@ -441,7 +441,21 @@ uint64_t device_swap_memory(bool available) { return swap_memory; } -static void external_fio_impl(float * read_bw, float * write_bw, bool op_rand) { +static size_t get_page_size() { + size_t page_size = 0; + +#ifdef _WIN32 + SYSTEM_INFO si; + GetSystemInfo(&si); + page_size = si.dwPageSize; +#elif defined(__APPLE__) || defined(__linux__) + page_size = sysconf(_SC_PAGESIZE); +#endif + + return page_size; +} + +static void external_fio_impl(float * read_bw, float * write_bw, bool op_rand, int n_threads) { const char * test_file = "fio_test"; const char * fio_conf_template = R"( [global] @@ -456,22 +470,37 @@ group_reporting=1 rw=%s bs=%s filename=%s +numjobs=%d [write-job] rw=%s bs=%s filename=%s +numjobs=%d )"; + size_t page_size = get_page_size(); + if (page_size == 0) { + LOG_INF("Error: Unable to get system page size\n"); + return; + } + // format the page size as a readable string (e.g., "16k" or "4k") + char page_size_str[8]; + if (page_size >= 1024) { + snprintf(page_size_str, sizeof(page_size_str), "%zuk", page_size / 1024); + } else { + snprintf(page_size_str, sizeof(page_size_str), "%zu", page_size); + } + const char * read_type = op_rand ? "randread" : "read"; const char * write_type = op_rand ? "randwrite" : "write"; - const char * block_size = op_rand ? "4k" : "1M"; + const char * block_size = op_rand ? page_size_str : "1M"; // write config to a file char fio_conf[1024]; snprintf(fio_conf, sizeof(fio_conf), fio_conf_template, - read_type, block_size, test_file, - write_type, block_size, test_file); + read_type, block_size, test_file, n_threads, + write_type, block_size, test_file, n_threads); const char * conf_file = "config.fio"; std::ofstream conf(conf_file); if (!conf) { @@ -529,12 +558,12 @@ filename=%s std::remove(output_file); } -void device_disk_rnd_bw(float * read_rnd_bw, float * write_rnd_bw) { - external_fio_impl(read_rnd_bw, write_rnd_bw, true); +void device_disk_rnd_bw(float * read_rnd_bw, float * write_rnd_bw, int n_threads) { + external_fio_impl(read_rnd_bw, write_rnd_bw, true, n_threads); } -void device_disk_seq_bw(float * read_seq_bw, float * write_seq_bw) { - external_fio_impl(read_seq_bw, write_seq_bw, false); +void device_disk_seq_bw(float * read_seq_bw, float * write_seq_bw, int n_threads) { + external_fio_impl(read_seq_bw, write_seq_bw, false, n_threads); } float device_memory_bw(int n_thread) { @@ -762,16 +791,11 @@ static float device_disk_access_delay(struct device_info & dev_info, int n_layer float total_gbytes = (double)total_bytes / 1e9; // convert to GB float mem_avail = dev_info.memory.available_physical * 1024.0f * 1024.0f * 1024.0f / 1e9; // convert to GB - float disk_read_bw = dev_info.disk.read_seq_bw; // GB/s + // todo: consider activations which also consumes the available memory + float disk_read_bw = dev_info.disk.read_rnd_bw; // GB/s return std::max(0.0, static_cast(total_gbytes - mem_avail) / disk_read_bw * 1000); // convert to ms } -static float device_swap_access_delay(struct device_info & dev_info, int n_layers) { - (void)dev_info; - (void)n_layers; - return 0.0f; -} - void device_print_props(struct device_info * dev_info_set, int n, struct llama_model * model) { LOG_INF("\n-------------------------------------------------------------------------------------------\n"); LOG_INF("| Property "); @@ -1127,8 +1151,7 @@ void device_print_props(struct device_info * dev_info_set, int n, struct llama_m latency += device_compute_delay(dev_info_set[0], n_layers); latency += device_memory_access_delay(dev_info_set[0], n_layers); latency += device_disk_access_delay(dev_info_set[0], n_layers); // if physical memory is not enough, some tensor weights will be released from memory and reloaded by mmap later - latency += device_swap_access_delay(dev_info_set[0], n_layers); // if physical memory is not enough, activations will be stored in swap, which causes additional disk io with random access - + LOG_INF("| Token latency (ms) "); LOG_INF("| %-10.2f ", latency); LOG_INF("\n"); diff --git a/common/profiler.h b/common/profiler.h index 3f7453ea..43a5fc81 100644 --- a/common/profiler.h +++ b/common/profiler.h @@ -217,8 +217,8 @@ float device_cuda_flops (struct llama_model * model, enum ggml_type src0 float device_inp_embd_delay (struct llama_model * model, enum ggml_type src0t, int n_tokens, int n_threads); uint64_t device_physical_memory (bool available); uint64_t device_swap_memory (bool available); -void device_disk_seq_bw (float * read_seq_bw, float * write_seq_bw); -void device_disk_rnd_bw (float * read_rnd_bw, float * write_rnd_bw); +void device_disk_seq_bw (float * read_seq_bw, float * write_seq_bw, int n_threads); +void device_disk_rnd_bw (float * read_rnd_bw, float * write_rnd_bw, int n_threads); float device_memory_bw (int n_thread); float device_cuda_memory_bw (struct llama_model * model); void device_get_props (struct llama_model * model, int device, struct ggml_backend_dev_props * props); diff --git a/src/llama.cpp b/src/llama.cpp index c05621f6..5817d3c2 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -3562,8 +3562,8 @@ void llama_profile_device(device_info * dev_info, struct llama_model * model, ll dev_info->memory.available_swap = round(device_swap_memory(true) / (double)(1 << 30) * 100) / 100; dev_info->memory.read_bandwidth = device_memory_bw(n_threads); - device_disk_seq_bw(&dev_info->disk.read_seq_bw, &dev_info->disk.write_seq_bw); - device_disk_rnd_bw(&dev_info->disk.read_rnd_bw, &dev_info->disk.write_rnd_bw); + device_disk_seq_bw(&dev_info->disk.read_seq_bw, &dev_info->disk.write_seq_bw, n_threads); + device_disk_rnd_bw(&dev_info->disk.read_rnd_bw, &dev_info->disk.write_rnd_bw, n_threads); dev_info->gpu_support.metal = device_has_metal(); dev_info->gpu_support.cuda = device_has_cuda();