add profiler

This commit is contained in:
Lizonghang 2024-11-05 20:29:09 +04:00
parent 766ec7862b
commit 9cd66f2145
4 changed files with 104 additions and 13 deletions

View file

@ -937,6 +937,7 @@ OBJ_LLAMA = \
src/unicode-data.o
OBJ_COMMON = \
common/profiler.o \
common/common.o \
common/arg.o \
common/log.o \
@ -945,8 +946,8 @@ OBJ_COMMON = \
common/sampling.o \
common/train.o \
common/build-info.o \
common/json-schema-to-grammar.o \
common/profiler.o
common/json-schema-to-grammar.o
OBJ_ALL = $(OBJ_GGML) $(OBJ_LLAMA) $(OBJ_COMMON)
@ -1172,6 +1173,11 @@ $(LIB_LLAMA_S): \
# common
common/profiler.o: \
common/profiler.cpp \
common/profiler.h
$(CXX) $(CXXFLAGS) -c $< -o $@
common/common.o: \
common/common.cpp \
common/common.h \
@ -1179,6 +1185,7 @@ common/common.o: \
common/sampling.h \
common/json.hpp \
common/json-schema-to-grammar.h \
common/profiler.h \
include/llama.h
$(CXX) $(CXXFLAGS) -c $< -o $@
@ -1187,11 +1194,6 @@ common/arg.o: \
common/arg.h
$(CXX) $(CXXFLAGS) -c $< -o $@
common/profiler.o: \
common/profiler.cpp \
common/profiler.h
$(CXX) $(CXXFLAGS) -c $< -o $@
common/log.o: \
common/log.cpp \
common/log.h

View file

@ -9,7 +9,7 @@
#include "json.hpp"
#include "json-schema-to-grammar.h"
#include "llama.h"
#include "profile.h"
#include "profiler.h"
#include <algorithm>
#include <cinttypes>
@ -835,8 +835,12 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
}
// profile devices and determine the best setup
uint32_t n_cpu_cores = profiler::get_cpu_core_count();
LOG_INF("Number of CPU cores on this device: %i\n", n_cpu_cores);
uint32_t n_cpu_cores = profiler::device_cpu_cores();
uint64_t total_memory = profiler::device_physical_memory(false);
uint64_t available_memory = profiler::device_physical_memory(true);
LOG_INF("Number of CPU cores: %u\n", n_cpu_cores);
LOG_INF("Total Physical Memory: %.2f GB\n", total_memory / (double)(1 << 30));
LOG_INF("Available Physical Memory: %.2f GB\n", available_memory / (double)(1 << 30));
if (model == NULL) {
LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.c_str());

View file

@ -1,7 +1,91 @@
#include "log.h"
#include "profiler.h"
#if defined(_WIN32) || defined(_WIN64)
#include <windows.h>
#elif defined(__linux__)
#include <unistd.h>
#include <sys/sysinfo.h>
#elif defined(__APPLE__) && defined(__MACH__)
#include <sys/sysctl.h>
#include <mach/mach.h>
#include <unistd.h>
#endif
#include <sys/types.h>
namespace profiler {
unsigned int get_cpu_core_count() {
return 4;
uint32_t device_cpu_cores() {
unsigned int core_count = 1; // default to 1 in case of failure
#if defined(_WIN32) || defined(_WIN64)
// Windows implementation
SYSTEM_INFO sysinfo;
GetSystemInfo(&sysinfo);
core_count = sysinfo.dwNumberOfProcessors;
#elif defined(__linux__)
// Linux implementation
core_count = sysconf(_SC_NPROCESSORS_ONLN);
#elif defined(__APPLE__) && defined(__MACH__)
// macOS implementation
int mib[4];
size_t len = sizeof(core_count);
// set the mib for hw.ncpu
mib[0] = CTL_HW;
mib[1] = HW_AVAILCPU; // number of available cpus
// get the number of available cpus
if (sysctl(mib, 2, &core_count, &len, NULL, 0) != 0 || core_count < 1) {
mib[1] = HW_NCPU; // total number of cpus
if (sysctl(mib, 2, &core_count, &len, NULL, 0) != 0 || core_count < 1) {
core_count = 1; // default to 1 if sysctl fails
}
}
#endif
return core_count;
}
uint64_t device_physical_memory(bool available) {
uint64_t memory = 0;
#if defined(_WIN32) || defined(_WIN64)
MEMORYSTATUSEX status;
status.dwLength = sizeof(status);
GlobalMemoryStatusEx(&status);
if (available) {
memory = status.ullAvailPhys;
} else {
memory = status.ullTotalPhys;
}
#elif defined(__linux__)
struct sysinfo info;
if (sysinfo(&info) == 0) {
memory = available ? info.freeram : info.totalram;
memory *= info.mem_unit;
}
#elif defined(__APPLE__) && defined(__MACH__)
if (available) {
mach_port_t host = mach_host_self();
vm_statistics64_data_t vm_stats;
mach_msg_type_number_t count = HOST_VM_INFO64_COUNT;
if (host_statistics64(host, HOST_VM_INFO64, (host_info64_t)&vm_stats, &count) == KERN_SUCCESS) {
memory = (vm_stats.free_count + vm_stats.inactive_count) * sysconf(_SC_PAGESIZE);
}
} else {
int mib[2];
size_t len = sizeof(memory);
mib[0] = CTL_HW;
mib[1] = HW_MEMSIZE;
sysctl(mib, 2, &memory, &len, NULL, 0);
}
#endif
return memory;
}
} // namespace profiler

View file

@ -2,7 +2,8 @@
#define PROFILER_H
namespace profiler {
unsigned int get_cpu_core_count();
uint32_t device_cpu_cores();
uint64_t device_physical_memory(bool available = true);
} // namespace profiler
#endif // PROFILER_H