From 18c96e804211fe9bac92474518b925f978cf4a25 Mon Sep 17 00:00:00 2001 From: Lizonghang <870644199@qq.com> Date: Wed, 15 Jan 2025 20:05:49 +0400 Subject: [PATCH] add option USE_HIGHS --- Makefile | 26 ++++++++++++++++++-------- common/common.cpp | 15 +++++++++++---- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index f613193b..8c34694b 100644 --- a/Makefile +++ b/Makefile @@ -264,15 +264,25 @@ MK_CXXFLAGS = -std=c++11 -fPIC MK_NVCCFLAGS = -std=c++11 ifeq ($(UNAME_S),Darwin) - MK_CPPFLAGS += -isystem /opt/homebrew/include -isystem /opt/homebrew/include/highs - MK_LDFLAGS += -L/opt/homebrew/lib -lzmq -lhighs + MK_CPPFLAGS += -isystem /opt/homebrew/include + MK_LDFLAGS += -L/opt/homebrew/lib -lzmq else ifeq ($(UNAME_S),Linux) - MK_CPPFLAGS += -isystem /usr/local/include -isystem /usr/local/include/highs - MK_LDFLAGS += -L/usr/local/lib -lzmq -lhighs - - ifneq ($(CONDA_PREFIX),) - MK_CPPFLAGS += -isystem $(CONDA_PREFIX)/include -isystem $(CONDA_PREFIX)/include/highs - MK_LDFLAGS += -L$(CONDA_PREFIX)/lib -Wl,-rpath,$(CONDA_PREFIX)/lib + MK_CPPFLAGS += -isystem /usr/local/include + MK_LDFLAGS += -L/usr/local/lib -lzmq +endif + +ifdef USE_HIGHS + ifeq ($(UNAME_S),Darwin) + MK_CPPFLAGS += -isystem /opt/homebrew/include/highs + MK_LDFLAGS += -L/opt/homebrew/lib -lhighs + else ifeq ($(UNAME_S),Linux) + MK_CPPFLAGS += -isystem /usr/local/include/highs + MK_LDFLAGS += -L/usr/local/lib -lhighs + + ifneq ($(CONDA_PREFIX),) + MK_CPPFLAGS += -isystem $(CONDA_PREFIX)/include -isystem $(CONDA_PREFIX)/include/highs + MK_LDFLAGS += -L$(CONDA_PREFIX)/lib -Wl,-rpath,$(CONDA_PREFIX)/lib + endif endif endif diff --git a/common/common.cpp b/common/common.cpp index 98791e97..a835709f 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -9,7 +9,6 @@ #include "json.hpp" #include "json-schema-to-grammar.h" #include "llama.h" -#include "Highs.h" #include #include @@ -69,6 +68,10 @@ #define LLAMA_CURL_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083 #endif // LLAMA_USE_CURL +#if defined(USE_HIGHS) +#include "Highs.h" +#endif + using json = nlohmann::ordered_json; constexpr int GIGABYTE = 1024 * 1024 * 1024; @@ -944,6 +947,8 @@ static void assign_device( n[m] = 0; } +#if defined(USE_HIGHS) + // stores the actual read bandwidth (GB/s) for each device std::vector disk_speed(n_world, 0.0f); for (uint32_t m = 0; m < n_world; ++m) { @@ -1326,13 +1331,15 @@ static void assign_device( final_solution = best_solution; } - LOG_INF("Global best solution found for k = %d, W = %d\n", final_k, n_layer / final_k); + LOG_INF("Solution found for k = %d, W = %d\n", final_k, n_layer / final_k); for (uint32_t m = 0; m < n_world; ++m) { const char * device_name = dev_info_set[m].device_name; GGML_ASSERT(final_solution[m] == w[m] && final_solution[m + n_world] == n[m]); - LOG_INF("Device %s (m = %d): w = %d, n = %d\n", device_name, m, w[m], n[m]); + LOG_INF("Device %s (m = %d): n_layer_window = %d, n_gpu_layers = %d\n", device_name, m, w[m], n[m]); } - LOG_INF("Objective value: %.3f\n", final_objective); + LOG_INF("Total latency: %.3f\n", final_objective); + +#endif // copy value from w and n to n_layer_window and n_gpu_layers, respectively std::copy(w.begin(), w.end(), n_layer_window);