mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-06 08:29:02 +00:00
add option USE_HIGHS
This commit is contained in:
parent
790f702d0c
commit
18c96e8042
2 changed files with 29 additions and 12 deletions
26
Makefile
26
Makefile
|
@ -264,15 +264,25 @@ MK_CXXFLAGS = -std=c++11 -fPIC
|
|||
MK_NVCCFLAGS = -std=c++11
|
||||
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
MK_CPPFLAGS += -isystem /opt/homebrew/include -isystem /opt/homebrew/include/highs
|
||||
MK_LDFLAGS += -L/opt/homebrew/lib -lzmq -lhighs
|
||||
MK_CPPFLAGS += -isystem /opt/homebrew/include
|
||||
MK_LDFLAGS += -L/opt/homebrew/lib -lzmq
|
||||
else ifeq ($(UNAME_S),Linux)
|
||||
MK_CPPFLAGS += -isystem /usr/local/include -isystem /usr/local/include/highs
|
||||
MK_LDFLAGS += -L/usr/local/lib -lzmq -lhighs
|
||||
|
||||
ifneq ($(CONDA_PREFIX),)
|
||||
MK_CPPFLAGS += -isystem $(CONDA_PREFIX)/include -isystem $(CONDA_PREFIX)/include/highs
|
||||
MK_LDFLAGS += -L$(CONDA_PREFIX)/lib -Wl,-rpath,$(CONDA_PREFIX)/lib
|
||||
MK_CPPFLAGS += -isystem /usr/local/include
|
||||
MK_LDFLAGS += -L/usr/local/lib -lzmq
|
||||
endif
|
||||
|
||||
ifdef USE_HIGHS
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
MK_CPPFLAGS += -isystem /opt/homebrew/include/highs
|
||||
MK_LDFLAGS += -L/opt/homebrew/lib -lhighs
|
||||
else ifeq ($(UNAME_S),Linux)
|
||||
MK_CPPFLAGS += -isystem /usr/local/include/highs
|
||||
MK_LDFLAGS += -L/usr/local/lib -lhighs
|
||||
|
||||
ifneq ($(CONDA_PREFIX),)
|
||||
MK_CPPFLAGS += -isystem $(CONDA_PREFIX)/include -isystem $(CONDA_PREFIX)/include/highs
|
||||
MK_LDFLAGS += -L$(CONDA_PREFIX)/lib -Wl,-rpath,$(CONDA_PREFIX)/lib
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
|
|
|
@ -9,7 +9,6 @@
|
|||
#include "json.hpp"
|
||||
#include "json-schema-to-grammar.h"
|
||||
#include "llama.h"
|
||||
#include "Highs.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cinttypes>
|
||||
|
@ -69,6 +68,10 @@
|
|||
#define LLAMA_CURL_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
|
||||
#endif // LLAMA_USE_CURL
|
||||
|
||||
#if defined(USE_HIGHS)
|
||||
#include "Highs.h"
|
||||
#endif
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
constexpr int GIGABYTE = 1024 * 1024 * 1024;
|
||||
|
@ -944,6 +947,8 @@ static void assign_device(
|
|||
n[m] = 0;
|
||||
}
|
||||
|
||||
#if defined(USE_HIGHS)
|
||||
|
||||
// stores the actual read bandwidth (GB/s) for each device
|
||||
std::vector<float> disk_speed(n_world, 0.0f);
|
||||
for (uint32_t m = 0; m < n_world; ++m) {
|
||||
|
@ -1326,13 +1331,15 @@ static void assign_device(
|
|||
final_solution = best_solution;
|
||||
}
|
||||
|
||||
LOG_INF("Global best solution found for k = %d, W = %d\n", final_k, n_layer / final_k);
|
||||
LOG_INF("Solution found for k = %d, W = %d\n", final_k, n_layer / final_k);
|
||||
for (uint32_t m = 0; m < n_world; ++m) {
|
||||
const char * device_name = dev_info_set[m].device_name;
|
||||
GGML_ASSERT(final_solution[m] == w[m] && final_solution[m + n_world] == n[m]);
|
||||
LOG_INF("Device %s (m = %d): w = %d, n = %d\n", device_name, m, w[m], n[m]);
|
||||
LOG_INF("Device %s (m = %d): n_layer_window = %d, n_gpu_layers = %d\n", device_name, m, w[m], n[m]);
|
||||
}
|
||||
LOG_INF("Objective value: %.3f\n", final_objective);
|
||||
LOG_INF("Total latency: %.3f\n", final_objective);
|
||||
|
||||
#endif
|
||||
|
||||
// copy value from w and n to n_layer_window and n_gpu_layers, respectively
|
||||
std::copy(w.begin(), w.end(), n_layer_window);
|
||||
|
|
Loading…
Add table
Reference in a new issue