mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-07 05:29:03 +00:00
add profiler
This commit is contained in:
parent
766ec7862b
commit
9cd66f2145
4 changed files with 104 additions and 13 deletions
16
Makefile
16
Makefile
|
@ -937,6 +937,7 @@ OBJ_LLAMA = \
|
|||
src/unicode-data.o
|
||||
|
||||
OBJ_COMMON = \
|
||||
common/profiler.o \
|
||||
common/common.o \
|
||||
common/arg.o \
|
||||
common/log.o \
|
||||
|
@ -945,8 +946,8 @@ OBJ_COMMON = \
|
|||
common/sampling.o \
|
||||
common/train.o \
|
||||
common/build-info.o \
|
||||
common/json-schema-to-grammar.o \
|
||||
common/profiler.o
|
||||
common/json-schema-to-grammar.o
|
||||
|
||||
|
||||
OBJ_ALL = $(OBJ_GGML) $(OBJ_LLAMA) $(OBJ_COMMON)
|
||||
|
||||
|
@ -1172,6 +1173,11 @@ $(LIB_LLAMA_S): \
|
|||
|
||||
# common
|
||||
|
||||
common/profiler.o: \
|
||||
common/profiler.cpp \
|
||||
common/profiler.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
common/common.o: \
|
||||
common/common.cpp \
|
||||
common/common.h \
|
||||
|
@ -1179,6 +1185,7 @@ common/common.o: \
|
|||
common/sampling.h \
|
||||
common/json.hpp \
|
||||
common/json-schema-to-grammar.h \
|
||||
common/profiler.h \
|
||||
include/llama.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
|
@ -1187,11 +1194,6 @@ common/arg.o: \
|
|||
common/arg.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
common/profiler.o: \
|
||||
common/profiler.cpp \
|
||||
common/profiler.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
common/log.o: \
|
||||
common/log.cpp \
|
||||
common/log.h
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
#include "json.hpp"
|
||||
#include "json-schema-to-grammar.h"
|
||||
#include "llama.h"
|
||||
#include "profile.h"
|
||||
#include "profiler.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cinttypes>
|
||||
|
@ -835,8 +835,12 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
|
|||
}
|
||||
|
||||
// profile devices and determine the best setup
|
||||
uint32_t n_cpu_cores = profiler::get_cpu_core_count();
|
||||
LOG_INF("Number of CPU cores on this device: %i\n", n_cpu_cores);
|
||||
uint32_t n_cpu_cores = profiler::device_cpu_cores();
|
||||
uint64_t total_memory = profiler::device_physical_memory(false);
|
||||
uint64_t available_memory = profiler::device_physical_memory(true);
|
||||
LOG_INF("Number of CPU cores: %u\n", n_cpu_cores);
|
||||
LOG_INF("Total Physical Memory: %.2f GB\n", total_memory / (double)(1 << 30));
|
||||
LOG_INF("Available Physical Memory: %.2f GB\n", available_memory / (double)(1 << 30));
|
||||
|
||||
if (model == NULL) {
|
||||
LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.c_str());
|
||||
|
|
|
@ -1,7 +1,91 @@
|
|||
#include "log.h"
|
||||
#include "profiler.h"
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#include <windows.h>
|
||||
#elif defined(__linux__)
|
||||
#include <unistd.h>
|
||||
#include <sys/sysinfo.h>
|
||||
#elif defined(__APPLE__) && defined(__MACH__)
|
||||
#include <sys/sysctl.h>
|
||||
#include <mach/mach.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
namespace profiler {
|
||||
unsigned int get_cpu_core_count() {
|
||||
return 4;
|
||||
|
||||
uint32_t device_cpu_cores() {
|
||||
unsigned int core_count = 1; // default to 1 in case of failure
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
// Windows implementation
|
||||
SYSTEM_INFO sysinfo;
|
||||
GetSystemInfo(&sysinfo);
|
||||
core_count = sysinfo.dwNumberOfProcessors;
|
||||
#elif defined(__linux__)
|
||||
// Linux implementation
|
||||
core_count = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
#elif defined(__APPLE__) && defined(__MACH__)
|
||||
// macOS implementation
|
||||
int mib[4];
|
||||
size_t len = sizeof(core_count);
|
||||
|
||||
// set the mib for hw.ncpu
|
||||
mib[0] = CTL_HW;
|
||||
mib[1] = HW_AVAILCPU; // number of available cpus
|
||||
|
||||
// get the number of available cpus
|
||||
if (sysctl(mib, 2, &core_count, &len, NULL, 0) != 0 || core_count < 1) {
|
||||
mib[1] = HW_NCPU; // total number of cpus
|
||||
if (sysctl(mib, 2, &core_count, &len, NULL, 0) != 0 || core_count < 1) {
|
||||
core_count = 1; // default to 1 if sysctl fails
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return core_count;
|
||||
}
|
||||
|
||||
uint64_t device_physical_memory(bool available) {
|
||||
uint64_t memory = 0;
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
MEMORYSTATUSEX status;
|
||||
status.dwLength = sizeof(status);
|
||||
GlobalMemoryStatusEx(&status);
|
||||
if (available) {
|
||||
memory = status.ullAvailPhys;
|
||||
} else {
|
||||
memory = status.ullTotalPhys;
|
||||
}
|
||||
|
||||
#elif defined(__linux__)
|
||||
struct sysinfo info;
|
||||
if (sysinfo(&info) == 0) {
|
||||
memory = available ? info.freeram : info.totalram;
|
||||
memory *= info.mem_unit;
|
||||
}
|
||||
|
||||
#elif defined(__APPLE__) && defined(__MACH__)
|
||||
if (available) {
|
||||
mach_port_t host = mach_host_self();
|
||||
vm_statistics64_data_t vm_stats;
|
||||
mach_msg_type_number_t count = HOST_VM_INFO64_COUNT;
|
||||
|
||||
if (host_statistics64(host, HOST_VM_INFO64, (host_info64_t)&vm_stats, &count) == KERN_SUCCESS) {
|
||||
memory = (vm_stats.free_count + vm_stats.inactive_count) * sysconf(_SC_PAGESIZE);
|
||||
}
|
||||
} else {
|
||||
int mib[2];
|
||||
size_t len = sizeof(memory);
|
||||
mib[0] = CTL_HW;
|
||||
mib[1] = HW_MEMSIZE;
|
||||
sysctl(mib, 2, &memory, &len, NULL, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return memory;
|
||||
}
|
||||
} // namespace profiler
|
|
@ -2,7 +2,8 @@
|
|||
#define PROFILER_H
|
||||
|
||||
namespace profiler {
|
||||
unsigned int get_cpu_core_count();
|
||||
uint32_t device_cpu_cores();
|
||||
uint64_t device_physical_memory(bool available = true);
|
||||
} // namespace profiler
|
||||
|
||||
#endif // PROFILER_H
|
Loading…
Add table
Reference in a new issue