add option USE_HIGHS

2025-09-06 08:29:02 +00:00 · 2025-01-15 20:05:49 +04:00 · 2025-01-15 20:05:49 +04:00 · 18c96e8042
commit 18c96e8042
parent 790f702d0c
2 changed files with 29 additions and 12 deletions
--- a/26
+++ b/26
@ -264,15 +264,25 @@ MK_CXXFLAGS  = -std=c++11 -fPIC
 MK_NVCCFLAGS = -std=c++11

 ifeq ($(UNAME_S),Darwin)
-    MK_CPPFLAGS += -isystem /opt/homebrew/include -isystem /opt/homebrew/include/highs
-    MK_LDFLAGS  += -L/opt/homebrew/lib -lzmq -lhighs
+    MK_CPPFLAGS += -isystem /opt/homebrew/include
+    MK_LDFLAGS  += -L/opt/homebrew/lib -lzmq
 else ifeq ($(UNAME_S),Linux)
-    MK_CPPFLAGS += -isystem /usr/local/include -isystem /usr/local/include/highs
-    MK_LDFLAGS  += -L/usr/local/lib -lzmq -lhighs
-	
-	ifneq ($(CONDA_PREFIX),)
-		MK_CPPFLAGS += -isystem $(CONDA_PREFIX)/include -isystem $(CONDA_PREFIX)/include/highs
-		MK_LDFLAGS  += -L$(CONDA_PREFIX)/lib -Wl,-rpath,$(CONDA_PREFIX)/lib
+    MK_CPPFLAGS += -isystem /usr/local/include
+    MK_LDFLAGS  += -L/usr/local/lib -lzmq
+endif
+
+ifdef USE_HIGHS
+	ifeq ($(UNAME_S),Darwin)
+		MK_CPPFLAGS += -isystem /opt/homebrew/include/highs
+		MK_LDFLAGS  += -L/opt/homebrew/lib -lhighs
+	else ifeq ($(UNAME_S),Linux)
+		MK_CPPFLAGS += -isystem /usr/local/include/highs
+		MK_LDFLAGS  += -L/usr/local/lib -lhighs
+
+		ifneq ($(CONDA_PREFIX),)
+			MK_CPPFLAGS += -isystem $(CONDA_PREFIX)/include -isystem $(CONDA_PREFIX)/include/highs
+			MK_LDFLAGS  += -L$(CONDA_PREFIX)/lib -Wl,-rpath,$(CONDA_PREFIX)/lib
+		endif
 	endif
 endif

--- a/common/common.cpp
+++ b/common/common.cpp
@ -9,7 +9,6 @@
 #include "json.hpp"
 #include "json-schema-to-grammar.h"
 #include "llama.h"
-#include "Highs.h"

 #include <algorithm>
 #include <cinttypes>
@ -69,6 +68,10 @@
 #define LLAMA_CURL_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
 #endif // LLAMA_USE_CURL

+#if defined(USE_HIGHS)
+#include "Highs.h"
+#endif
+
 using json = nlohmann::ordered_json;

 constexpr int GIGABYTE = 1024 * 1024 * 1024;
@ -944,6 +947,8 @@ static void assign_device(
        n[m] = 0;
    }

+#if defined(USE_HIGHS)
+
    // stores the actual read bandwidth (GB/s) for each device
    std::vector<float> disk_speed(n_world, 0.0f);
    for (uint32_t m = 0; m < n_world; ++m) {
@ -1326,13 +1331,15 @@ static void assign_device(
        final_solution = best_solution;
    }

-    LOG_INF("Global best solution found for k = %d, W = %d\n", final_k, n_layer / final_k);
+    LOG_INF("Solution found for k = %d, W = %d\n", final_k, n_layer / final_k);
    for (uint32_t m = 0; m < n_world; ++m) {
        const char * device_name = dev_info_set[m].device_name;
        GGML_ASSERT(final_solution[m] == w[m] && final_solution[m + n_world] == n[m]);
-        LOG_INF("Device %s (m = %d): w = %d, n = %d\n", device_name, m, w[m], n[m]);
+        LOG_INF("Device %s (m = %d): n_layer_window = %d, n_gpu_layers = %d\n", device_name, m, w[m], n[m]);
    }
-    LOG_INF("Objective value: %.3f\n", final_objective);
+    LOG_INF("Total latency: %.3f\n", final_objective);
+
+#endif

    // copy value from w and n to n_layer_window and n_gpu_layers, respectively
    std::copy(w.begin(), w.end(), n_layer_window);