From cf4fa04980ee37a7c609dcdc52410895fe839dd4 Mon Sep 17 00:00:00 2001 From: DeEMO Date: Fri, 18 Apr 2025 03:27:20 +0000 Subject: [PATCH] Add an independent profile tool --- .gitignore | 1 + Makefile | 7 ++++- tools/profile_tool.cpp | 62 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 tools/profile_tool.cpp diff --git a/.gitignore b/.gitignore index 87c0aa4f..6f1e6062 100644 --- a/.gitignore +++ b/.gitignore @@ -67,6 +67,7 @@ autogen-*.md /main /server +/profile-tool # CI diff --git a/Makefile b/Makefile index a3f1bf04..bcbaf01c 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # Define the default target now so that it is always the first target -BUILD_TARGETS = llama-cli +BUILD_TARGETS = llama-cli profile-tool # BUILD_TARGETS = \ # libllava.a \ # llama-baby-llama \ @@ -1528,6 +1528,11 @@ llama-minicpmv-cli: examples/llava/minicpmv-cli.cpp \ $(OBJ_ALL) $(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual +profile-tool: tools/profile_tool.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + ifeq ($(UNAME_S),Darwin) swift: examples/batched.swift (cd examples/batched.swift; make build) diff --git a/tools/profile_tool.cpp b/tools/profile_tool.cpp new file mode 100644 index 00000000..328df2e3 --- /dev/null +++ b/tools/profile_tool.cpp @@ -0,0 +1,62 @@ +#include "arg.h" +#include "common.h" +#include "console.h" +#include "log.h" +#include "llama.h" + +static void print_usage(int argc, char ** argv) { + (void) argc; + + LOG("\nexample usage:\n"); + LOG("\n text generation: %s -m your_model.gguf -p \"I believe the meaning of life is\" -n 128\n", argv[0]); + LOG("\n chat (conversation): %s -m your_model.gguf -p \"You are a helpful assistant\" -cnv\n", argv[0]); + LOG("\n"); +} + +int main(int argc, char ** argv) { + gpt_params params; + if (!gpt_params_parse(argc, argv, params, LLAMA_EXAMPLE_MAIN, print_usage)) { + return 1; + } + + if (params.n_ctx != 0 && params.n_ctx < 8) { + LOG_WRN("%s: warning: minimum context size is 8, using minimum size.\n", __func__); + params.n_ctx = 8; + } + + if (params.rope_freq_base != 0.0) { + LOG_WRN("%s: warning: changing RoPE frequency base to %g.\n", __func__, params.rope_freq_base); + } + + if (params.rope_freq_scale != 0.0) { + LOG_WRN("%s: warning: scaling RoPE frequency by %g.\n", __func__, params.rope_freq_scale); + } + + // load the model and apply lora adapter, if any + auto mparams = llama_model_params_from_gpt_params(params); + struct llama_context_params cparams = llama_context_params_from_gpt_params(params); + + struct llama_model * model = nullptr; + + if (!params.hf_repo.empty() && !params.hf_file.empty()) { + model = llama_load_model_from_hf(params.hf_repo.c_str(), params.hf_file.c_str(), params.model.c_str(), params.hf_token.c_str(), mparams); + } else if (!params.model_url.empty()) { + model = llama_load_model_from_url(params.model_url.c_str(), params.model.c_str(), params.hf_token.c_str(), mparams); + } else { + model = llama_load_model_from_file(params.model.c_str(), mparams); + } + + if (model == NULL) { + LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.c_str()); + return -1; + } + + llama_model_loader * ml = llama_model_load(params.model.c_str(), model, &mparams); + + device_info dev_info; + llama_profile_device(&dev_info, model, ml, params.gpu_mem, params.n_predict, params.n_ctx, params.cpuparams.n_threads, params.flash_attn); + device_print_props(&dev_info, 1, model, cparams); + + llama_free_model(model); + return 0; +} \ No newline at end of file