mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-08 01:39:03 +00:00
add profiler
This commit is contained in:
parent
766ec7862b
commit
9cd66f2145
4 changed files with 104 additions and 13 deletions
16
Makefile
16
Makefile
|
@ -937,6 +937,7 @@ OBJ_LLAMA = \
|
||||||
src/unicode-data.o
|
src/unicode-data.o
|
||||||
|
|
||||||
OBJ_COMMON = \
|
OBJ_COMMON = \
|
||||||
|
common/profiler.o \
|
||||||
common/common.o \
|
common/common.o \
|
||||||
common/arg.o \
|
common/arg.o \
|
||||||
common/log.o \
|
common/log.o \
|
||||||
|
@ -945,8 +946,8 @@ OBJ_COMMON = \
|
||||||
common/sampling.o \
|
common/sampling.o \
|
||||||
common/train.o \
|
common/train.o \
|
||||||
common/build-info.o \
|
common/build-info.o \
|
||||||
common/json-schema-to-grammar.o \
|
common/json-schema-to-grammar.o
|
||||||
common/profiler.o
|
|
||||||
|
|
||||||
OBJ_ALL = $(OBJ_GGML) $(OBJ_LLAMA) $(OBJ_COMMON)
|
OBJ_ALL = $(OBJ_GGML) $(OBJ_LLAMA) $(OBJ_COMMON)
|
||||||
|
|
||||||
|
@ -1172,6 +1173,11 @@ $(LIB_LLAMA_S): \
|
||||||
|
|
||||||
# common
|
# common
|
||||||
|
|
||||||
|
common/profiler.o: \
|
||||||
|
common/profiler.cpp \
|
||||||
|
common/profiler.h
|
||||||
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
|
||||||
common/common.o: \
|
common/common.o: \
|
||||||
common/common.cpp \
|
common/common.cpp \
|
||||||
common/common.h \
|
common/common.h \
|
||||||
|
@ -1179,6 +1185,7 @@ common/common.o: \
|
||||||
common/sampling.h \
|
common/sampling.h \
|
||||||
common/json.hpp \
|
common/json.hpp \
|
||||||
common/json-schema-to-grammar.h \
|
common/json-schema-to-grammar.h \
|
||||||
|
common/profiler.h \
|
||||||
include/llama.h
|
include/llama.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
|
||||||
|
@ -1187,11 +1194,6 @@ common/arg.o: \
|
||||||
common/arg.h
|
common/arg.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
|
||||||
common/profiler.o: \
|
|
||||||
common/profiler.cpp \
|
|
||||||
common/profiler.h
|
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
|
||||||
|
|
||||||
common/log.o: \
|
common/log.o: \
|
||||||
common/log.cpp \
|
common/log.cpp \
|
||||||
common/log.h
|
common/log.h
|
||||||
|
|
|
@ -9,7 +9,7 @@
|
||||||
#include "json.hpp"
|
#include "json.hpp"
|
||||||
#include "json-schema-to-grammar.h"
|
#include "json-schema-to-grammar.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
#include "profile.h"
|
#include "profiler.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cinttypes>
|
#include <cinttypes>
|
||||||
|
@ -835,8 +835,12 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// profile devices and determine the best setup
|
// profile devices and determine the best setup
|
||||||
uint32_t n_cpu_cores = profiler::get_cpu_core_count();
|
uint32_t n_cpu_cores = profiler::device_cpu_cores();
|
||||||
LOG_INF("Number of CPU cores on this device: %i\n", n_cpu_cores);
|
uint64_t total_memory = profiler::device_physical_memory(false);
|
||||||
|
uint64_t available_memory = profiler::device_physical_memory(true);
|
||||||
|
LOG_INF("Number of CPU cores: %u\n", n_cpu_cores);
|
||||||
|
LOG_INF("Total Physical Memory: %.2f GB\n", total_memory / (double)(1 << 30));
|
||||||
|
LOG_INF("Available Physical Memory: %.2f GB\n", available_memory / (double)(1 << 30));
|
||||||
|
|
||||||
if (model == NULL) {
|
if (model == NULL) {
|
||||||
LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.c_str());
|
LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.c_str());
|
||||||
|
|
|
@ -1,7 +1,91 @@
|
||||||
|
#include "log.h"
|
||||||
#include "profiler.h"
|
#include "profiler.h"
|
||||||
|
|
||||||
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
#include <windows.h>
|
||||||
|
#elif defined(__linux__)
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <sys/sysinfo.h>
|
||||||
|
#elif defined(__APPLE__) && defined(__MACH__)
|
||||||
|
#include <sys/sysctl.h>
|
||||||
|
#include <mach/mach.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
|
||||||
namespace profiler {
|
namespace profiler {
|
||||||
unsigned int get_cpu_core_count() {
|
|
||||||
return 4;
|
uint32_t device_cpu_cores() {
|
||||||
|
unsigned int core_count = 1; // default to 1 in case of failure
|
||||||
|
|
||||||
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
// Windows implementation
|
||||||
|
SYSTEM_INFO sysinfo;
|
||||||
|
GetSystemInfo(&sysinfo);
|
||||||
|
core_count = sysinfo.dwNumberOfProcessors;
|
||||||
|
#elif defined(__linux__)
|
||||||
|
// Linux implementation
|
||||||
|
core_count = sysconf(_SC_NPROCESSORS_ONLN);
|
||||||
|
#elif defined(__APPLE__) && defined(__MACH__)
|
||||||
|
// macOS implementation
|
||||||
|
int mib[4];
|
||||||
|
size_t len = sizeof(core_count);
|
||||||
|
|
||||||
|
// set the mib for hw.ncpu
|
||||||
|
mib[0] = CTL_HW;
|
||||||
|
mib[1] = HW_AVAILCPU; // number of available cpus
|
||||||
|
|
||||||
|
// get the number of available cpus
|
||||||
|
if (sysctl(mib, 2, &core_count, &len, NULL, 0) != 0 || core_count < 1) {
|
||||||
|
mib[1] = HW_NCPU; // total number of cpus
|
||||||
|
if (sysctl(mib, 2, &core_count, &len, NULL, 0) != 0 || core_count < 1) {
|
||||||
|
core_count = 1; // default to 1 if sysctl fails
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return core_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t device_physical_memory(bool available) {
|
||||||
|
uint64_t memory = 0;
|
||||||
|
|
||||||
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
MEMORYSTATUSEX status;
|
||||||
|
status.dwLength = sizeof(status);
|
||||||
|
GlobalMemoryStatusEx(&status);
|
||||||
|
if (available) {
|
||||||
|
memory = status.ullAvailPhys;
|
||||||
|
} else {
|
||||||
|
memory = status.ullTotalPhys;
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif defined(__linux__)
|
||||||
|
struct sysinfo info;
|
||||||
|
if (sysinfo(&info) == 0) {
|
||||||
|
memory = available ? info.freeram : info.totalram;
|
||||||
|
memory *= info.mem_unit;
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif defined(__APPLE__) && defined(__MACH__)
|
||||||
|
if (available) {
|
||||||
|
mach_port_t host = mach_host_self();
|
||||||
|
vm_statistics64_data_t vm_stats;
|
||||||
|
mach_msg_type_number_t count = HOST_VM_INFO64_COUNT;
|
||||||
|
|
||||||
|
if (host_statistics64(host, HOST_VM_INFO64, (host_info64_t)&vm_stats, &count) == KERN_SUCCESS) {
|
||||||
|
memory = (vm_stats.free_count + vm_stats.inactive_count) * sysconf(_SC_PAGESIZE);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
int mib[2];
|
||||||
|
size_t len = sizeof(memory);
|
||||||
|
mib[0] = CTL_HW;
|
||||||
|
mib[1] = HW_MEMSIZE;
|
||||||
|
sysctl(mib, 2, &memory, &len, NULL, 0);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return memory;
|
||||||
}
|
}
|
||||||
} // namespace profiler
|
} // namespace profiler
|
|
@ -2,7 +2,8 @@
|
||||||
#define PROFILER_H
|
#define PROFILER_H
|
||||||
|
|
||||||
namespace profiler {
|
namespace profiler {
|
||||||
unsigned int get_cpu_core_count();
|
uint32_t device_cpu_cores();
|
||||||
|
uint64_t device_physical_memory(bool available = true);
|
||||||
} // namespace profiler
|
} // namespace profiler
|
||||||
|
|
||||||
#endif // PROFILER_H
|
#endif // PROFILER_H
|
Loading…
Add table
Reference in a new issue