mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-04 00:09:08 +00:00
Add an independent profile tool
This commit is contained in:
parent
168c14f4e8
commit
cf4fa04980
3 changed files with 69 additions and 1 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -67,6 +67,7 @@ autogen-*.md
|
|||
|
||||
/main
|
||||
/server
|
||||
/profile-tool
|
||||
|
||||
# CI
|
||||
|
||||
|
|
7
Makefile
7
Makefile
|
@ -1,5 +1,5 @@
|
|||
# Define the default target now so that it is always the first target
|
||||
BUILD_TARGETS = llama-cli
|
||||
BUILD_TARGETS = llama-cli profile-tool
|
||||
# BUILD_TARGETS = \
|
||||
# libllava.a \
|
||||
# llama-baby-llama \
|
||||
|
@ -1528,6 +1528,11 @@ llama-minicpmv-cli: examples/llava/minicpmv-cli.cpp \
|
|||
$(OBJ_ALL)
|
||||
$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
|
||||
|
||||
profile-tool: tools/profile_tool.cpp \
|
||||
$(OBJ_ALL)
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
||||
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
swift: examples/batched.swift
|
||||
(cd examples/batched.swift; make build)
|
||||
|
|
62
tools/profile_tool.cpp
Normal file
62
tools/profile_tool.cpp
Normal file
|
@ -0,0 +1,62 @@
|
|||
#include "arg.h"
|
||||
#include "common.h"
|
||||
#include "console.h"
|
||||
#include "log.h"
|
||||
#include "llama.h"
|
||||
|
||||
static void print_usage(int argc, char ** argv) {
|
||||
(void) argc;
|
||||
|
||||
LOG("\nexample usage:\n");
|
||||
LOG("\n text generation: %s -m your_model.gguf -p \"I believe the meaning of life is\" -n 128\n", argv[0]);
|
||||
LOG("\n chat (conversation): %s -m your_model.gguf -p \"You are a helpful assistant\" -cnv\n", argv[0]);
|
||||
LOG("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
gpt_params params;
|
||||
if (!gpt_params_parse(argc, argv, params, LLAMA_EXAMPLE_MAIN, print_usage)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (params.n_ctx != 0 && params.n_ctx < 8) {
|
||||
LOG_WRN("%s: warning: minimum context size is 8, using minimum size.\n", __func__);
|
||||
params.n_ctx = 8;
|
||||
}
|
||||
|
||||
if (params.rope_freq_base != 0.0) {
|
||||
LOG_WRN("%s: warning: changing RoPE frequency base to %g.\n", __func__, params.rope_freq_base);
|
||||
}
|
||||
|
||||
if (params.rope_freq_scale != 0.0) {
|
||||
LOG_WRN("%s: warning: scaling RoPE frequency by %g.\n", __func__, params.rope_freq_scale);
|
||||
}
|
||||
|
||||
// load the model and apply lora adapter, if any
|
||||
auto mparams = llama_model_params_from_gpt_params(params);
|
||||
struct llama_context_params cparams = llama_context_params_from_gpt_params(params);
|
||||
|
||||
struct llama_model * model = nullptr;
|
||||
|
||||
if (!params.hf_repo.empty() && !params.hf_file.empty()) {
|
||||
model = llama_load_model_from_hf(params.hf_repo.c_str(), params.hf_file.c_str(), params.model.c_str(), params.hf_token.c_str(), mparams);
|
||||
} else if (!params.model_url.empty()) {
|
||||
model = llama_load_model_from_url(params.model_url.c_str(), params.model.c_str(), params.hf_token.c_str(), mparams);
|
||||
} else {
|
||||
model = llama_load_model_from_file(params.model.c_str(), mparams);
|
||||
}
|
||||
|
||||
if (model == NULL) {
|
||||
LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.c_str());
|
||||
return -1;
|
||||
}
|
||||
|
||||
llama_model_loader * ml = llama_model_load(params.model.c_str(), model, &mparams);
|
||||
|
||||
device_info dev_info;
|
||||
llama_profile_device(&dev_info, model, ml, params.gpu_mem, params.n_predict, params.n_ctx, params.cpuparams.n_threads, params.flash_attn);
|
||||
device_print_props(&dev_info, 1, model, cparams);
|
||||
|
||||
llama_free_model(model);
|
||||
return 0;
|
||||
}
|
Loading…
Add table
Reference in a new issue