From 4e1be1065d8aa05937aae1d1ee845fad1ddaab1d Mon Sep 17 00:00:00 2001 From: Lizonghang <870644199@qq.com> Date: Wed, 6 Nov 2024 10:57:30 +0400 Subject: [PATCH] add memory speed test --- common/common.cpp | 4 +++- common/profiler.cpp | 45 +++++++++++++++++++++++++++++++++++++++++++-- common/profiler.h | 5 ++++- 3 files changed, 50 insertions(+), 4 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index c37ef4cc..a491449c 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -841,7 +841,8 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) { uint64_t available_memory = profiler::device_physical_memory(true); uint64_t total_swap = profiler::device_swap_memory(false); uint64_t available_swap = profiler::device_swap_memory(true); - uint64_t disk_read_bw = profiler::get_disk_read_speed(params.model.c_str(), 500); + uint64_t disk_read_bw = profiler::device_disk_read_bw(params.model.c_str(), 500); + uint64_t memory_bw = profiler::device_memory_bw(500); LOG_INF("Device Name: %s\n", dev_name); LOG_INF("Number of CPU cores: %u\n", n_cpu_cores); @@ -850,6 +851,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) { LOG_INF("Total Swap Memory: %.2f GB\n", total_swap / (double)(1 << 30)); LOG_INF("Available Swap Memory: %.2f GB\n", available_swap / (double)(1 << 30)); LOG_INF("Disk Read Bandwidth: %.2f GB/s\n", disk_read_bw / (double)(1 << 30)); + LOG_INF("Memory Bandwidth: %.2f GB/s\n", memory_bw / (double)(1 << 30)); if (model == NULL) { LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.c_str()); diff --git a/common/profiler.cpp b/common/profiler.cpp index 1454cea2..0cbf96e5 100644 --- a/common/profiler.cpp +++ b/common/profiler.cpp @@ -193,7 +193,7 @@ uint64_t device_swap_memory(bool available) { return swap_memory; } -uint64_t get_disk_read_speed(const char * test_file, size_t buffer_size_mb) { +uint64_t device_disk_read_bw(const char * test_file, size_t buffer_size_mb) { uint64_t speed = 0; size_t buffer_size = buffer_size_mb * 1024 * 1024; // buffer size in bytes @@ -220,14 +220,55 @@ uint64_t get_disk_read_speed(const char * test_file, size_t buffer_size_mb) { auto end_time = std::chrono::high_resolution_clock::now(); std::chrono::duration elapsed_time = end_time - start_time; - // Calculate speed in bytes per second + // speed in bytes per second if (elapsed_time.count() > 0) { speed = static_cast(buffer.size() / elapsed_time.count()); } + + buffer.clear(); + buffer.shrink_to_fit(); } catch (const std::exception &e) { LOG_ERR("Exception while calculating disk read speed: %s\n", e.what()); } return speed; } + +uint64_t device_memory_bw(size_t buffer_size_mb) { + uint64_t speed = 0; + size_t test_size = buffer_size_mb * 1024 * 1024; // convert MB to bytes + + try { + // allocate memory for speed test + std::vector buffer(test_size, 1); + + // measure write speed + auto start_time = std::chrono::high_resolution_clock::now(); + memset(buffer.data(), 0xAB, buffer.size()); + auto end_time = std::chrono::high_resolution_clock::now(); + std::chrono::duration elapsed_time = end_time - start_time; + double write_speed = static_cast(test_size) / elapsed_time.count(); + + // measure read speed + start_time = std::chrono::high_resolution_clock::now(); + volatile char temp = 0; + for (size_t i = 0; i < buffer.size(); i += 64) { + temp += buffer[i]; // read in steps of cache line size to minimize cache thrashing + } + end_time = std::chrono::high_resolution_clock::now(); + elapsed_time = end_time - start_time; + double read_speed = static_cast(test_size) / elapsed_time.count(); + + // average speed + speed = static_cast((write_speed + read_speed) / 2.0); + + buffer.clear(); + buffer.shrink_to_fit(); + } catch (const std::exception &e) { + LOG_ERR("Exception while calculating memory speed: %s\n", e.what()); + } + + return speed; +} + } // namespace profiler \ No newline at end of file diff --git a/common/profiler.h b/common/profiler.h index 95dfcb09..e81141ec 100644 --- a/common/profiler.h +++ b/common/profiler.h @@ -3,12 +3,15 @@ #include +#define BUFFER_SIZE_MB 1024 + namespace profiler { const char * device_name(); uint32_t device_cpu_cores(); uint64_t device_physical_memory(bool available = true); uint64_t device_swap_memory(bool available = true); - uint64_t get_disk_read_speed(const char * test_file, size_t buffer_size_mb = 500); + uint64_t device_disk_read_bw(const char * test_file, size_t buffer_size_mb = BUFFER_SIZE_MB); + uint64_t device_memory_bw(size_t buffer_size_mb = BUFFER_SIZE_MB); } // namespace profiler #endif // PROFILER_H \ No newline at end of file