diff --git a/Makefile b/Makefile index d7fd072c..ba931f2b 100644 --- a/Makefile +++ b/Makefile @@ -937,6 +937,7 @@ OBJ_LLAMA = \ src/unicode-data.o OBJ_COMMON = \ + common/profiler.o \ common/common.o \ common/arg.o \ common/log.o \ @@ -945,8 +946,8 @@ OBJ_COMMON = \ common/sampling.o \ common/train.o \ common/build-info.o \ - common/json-schema-to-grammar.o \ - common/profiler.o + common/json-schema-to-grammar.o + OBJ_ALL = $(OBJ_GGML) $(OBJ_LLAMA) $(OBJ_COMMON) @@ -1172,6 +1173,11 @@ $(LIB_LLAMA_S): \ # common +common/profiler.o: \ + common/profiler.cpp \ + common/profiler.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + common/common.o: \ common/common.cpp \ common/common.h \ @@ -1179,6 +1185,7 @@ common/common.o: \ common/sampling.h \ common/json.hpp \ common/json-schema-to-grammar.h \ + common/profiler.h \ include/llama.h $(CXX) $(CXXFLAGS) -c $< -o $@ @@ -1187,11 +1194,6 @@ common/arg.o: \ common/arg.h $(CXX) $(CXXFLAGS) -c $< -o $@ -common/profiler.o: \ - common/profiler.cpp \ - common/profiler.h - $(CXX) $(CXXFLAGS) -c $< -o $@ - common/log.o: \ common/log.cpp \ common/log.h diff --git a/common/common.cpp b/common/common.cpp index 3fe33ff3..72a968ab 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -9,7 +9,7 @@ #include "json.hpp" #include "json-schema-to-grammar.h" #include "llama.h" -#include "profile.h" +#include "profiler.h" #include #include @@ -835,8 +835,12 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) { } // profile devices and determine the best setup - uint32_t n_cpu_cores = profiler::get_cpu_core_count(); - LOG_INF("Number of CPU cores on this device: %i\n", n_cpu_cores); + uint32_t n_cpu_cores = profiler::device_cpu_cores(); + uint64_t total_memory = profiler::device_physical_memory(false); + uint64_t available_memory = profiler::device_physical_memory(true); + LOG_INF("Number of CPU cores: %u\n", n_cpu_cores); + LOG_INF("Total Physical Memory: %.2f GB\n", total_memory / (double)(1 << 30)); + LOG_INF("Available Physical Memory: %.2f GB\n", available_memory / (double)(1 << 30)); if (model == NULL) { LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.c_str()); diff --git a/common/profiler.cpp b/common/profiler.cpp index 8e4676cb..f6311f41 100644 --- a/common/profiler.cpp +++ b/common/profiler.cpp @@ -1,7 +1,91 @@ +#include "log.h" #include "profiler.h" +#if defined(_WIN32) || defined(_WIN64) + #include +#elif defined(__linux__) + #include + #include +#elif defined(__APPLE__) && defined(__MACH__) + #include + #include + #include +#endif + +#include + namespace profiler { -unsigned int get_cpu_core_count() { - return 4; + +uint32_t device_cpu_cores() { + unsigned int core_count = 1; // default to 1 in case of failure + +#if defined(_WIN32) || defined(_WIN64) + // Windows implementation + SYSTEM_INFO sysinfo; + GetSystemInfo(&sysinfo); + core_count = sysinfo.dwNumberOfProcessors; +#elif defined(__linux__) + // Linux implementation + core_count = sysconf(_SC_NPROCESSORS_ONLN); +#elif defined(__APPLE__) && defined(__MACH__) + // macOS implementation + int mib[4]; + size_t len = sizeof(core_count); + + // set the mib for hw.ncpu + mib[0] = CTL_HW; + mib[1] = HW_AVAILCPU; // number of available cpus + + // get the number of available cpus + if (sysctl(mib, 2, &core_count, &len, NULL, 0) != 0 || core_count < 1) { + mib[1] = HW_NCPU; // total number of cpus + if (sysctl(mib, 2, &core_count, &len, NULL, 0) != 0 || core_count < 1) { + core_count = 1; // default to 1 if sysctl fails + } + } +#endif + + return core_count; +} + +uint64_t device_physical_memory(bool available) { + uint64_t memory = 0; + +#if defined(_WIN32) || defined(_WIN64) + MEMORYSTATUSEX status; + status.dwLength = sizeof(status); + GlobalMemoryStatusEx(&status); + if (available) { + memory = status.ullAvailPhys; + } else { + memory = status.ullTotalPhys; + } + +#elif defined(__linux__) + struct sysinfo info; + if (sysinfo(&info) == 0) { + memory = available ? info.freeram : info.totalram; + memory *= info.mem_unit; + } + +#elif defined(__APPLE__) && defined(__MACH__) + if (available) { + mach_port_t host = mach_host_self(); + vm_statistics64_data_t vm_stats; + mach_msg_type_number_t count = HOST_VM_INFO64_COUNT; + + if (host_statistics64(host, HOST_VM_INFO64, (host_info64_t)&vm_stats, &count) == KERN_SUCCESS) { + memory = (vm_stats.free_count + vm_stats.inactive_count) * sysconf(_SC_PAGESIZE); + } + } else { + int mib[2]; + size_t len = sizeof(memory); + mib[0] = CTL_HW; + mib[1] = HW_MEMSIZE; + sysctl(mib, 2, &memory, &len, NULL, 0); + } +#endif + + return memory; } } // namespace profiler \ No newline at end of file diff --git a/common/profiler.h b/common/profiler.h index b384caf0..c21615a8 100644 --- a/common/profiler.h +++ b/common/profiler.h @@ -2,7 +2,8 @@ #define PROFILER_H namespace profiler { - unsigned int get_cpu_core_count(); + uint32_t device_cpu_cores(); + uint64_t device_physical_memory(bool available = true); } // namespace profiler #endif // PROFILER_H \ No newline at end of file