From cf4fa04980ee37a7c609dcdc52410895fe839dd4 Mon Sep 17 00:00:00 2001
From: DeEMO <yzzxrx@foxmail.com>
Date: Fri, 18 Apr 2025 03:27:20 +0000
Subject: [PATCH] Add an independent profile tool

---
 .gitignore             |  1 +
 Makefile               |  7 ++++-
 tools/profile_tool.cpp | 62 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 69 insertions(+), 1 deletion(-)
 create mode 100644 tools/profile_tool.cpp

diff --git a/.gitignore b/.gitignore
index 87c0aa4f..6f1e6062 100644
--- a/.gitignore
+++ b/.gitignore
@@ -67,6 +67,7 @@ autogen-*.md
 
 /main
 /server
+/profile-tool
 
 # CI
 
diff --git a/Makefile b/Makefile
index a3f1bf04..bcbaf01c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 # Define the default target now so that it is always the first target
-BUILD_TARGETS = llama-cli
+BUILD_TARGETS = llama-cli profile-tool
 # BUILD_TARGETS = \
 # 	libllava.a \
 # 	llama-baby-llama \
@@ -1528,6 +1528,11 @@ llama-minicpmv-cli: examples/llava/minicpmv-cli.cpp \
 	$(OBJ_ALL)
 	$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
 
+profile-tool: tools/profile_tool.cpp \
+	$(OBJ_ALL)
+	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
+	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
+
 ifeq ($(UNAME_S),Darwin)
 swift: examples/batched.swift
 	(cd examples/batched.swift; make build)
diff --git a/tools/profile_tool.cpp b/tools/profile_tool.cpp
new file mode 100644
index 00000000..328df2e3
--- /dev/null
+++ b/tools/profile_tool.cpp
@@ -0,0 +1,62 @@
+#include "arg.h"
+#include "common.h"
+#include "console.h"
+#include "log.h"
+#include "llama.h"
+
+static void print_usage(int argc, char ** argv) {
+    (void) argc;
+
+    LOG("\nexample usage:\n");
+    LOG("\n  text generation:     %s -m your_model.gguf -p \"I believe the meaning of life is\" -n 128\n", argv[0]);
+    LOG("\n  chat (conversation): %s -m your_model.gguf -p \"You are a helpful assistant\" -cnv\n", argv[0]);
+    LOG("\n");
+}
+
+int main(int argc, char ** argv) {
+    gpt_params params;
+    if (!gpt_params_parse(argc, argv, params, LLAMA_EXAMPLE_MAIN, print_usage)) {
+        return 1;
+    }
+
+    if (params.n_ctx != 0 && params.n_ctx < 8) {
+        LOG_WRN("%s: warning: minimum context size is 8, using minimum size.\n", __func__);
+        params.n_ctx = 8;
+    }
+
+    if (params.rope_freq_base != 0.0) {
+        LOG_WRN("%s: warning: changing RoPE frequency base to %g.\n", __func__, params.rope_freq_base);
+    }
+
+    if (params.rope_freq_scale != 0.0) {
+        LOG_WRN("%s: warning: scaling RoPE frequency by %g.\n", __func__, params.rope_freq_scale);
+    }
+
+    // load the model and apply lora adapter, if any
+    auto mparams = llama_model_params_from_gpt_params(params);
+    struct llama_context_params cparams = llama_context_params_from_gpt_params(params);
+
+    struct llama_model * model = nullptr;
+
+    if (!params.hf_repo.empty() && !params.hf_file.empty()) {
+        model = llama_load_model_from_hf(params.hf_repo.c_str(), params.hf_file.c_str(), params.model.c_str(), params.hf_token.c_str(), mparams);
+    } else if (!params.model_url.empty()) {
+        model = llama_load_model_from_url(params.model_url.c_str(), params.model.c_str(), params.hf_token.c_str(), mparams);
+    } else {
+        model = llama_load_model_from_file(params.model.c_str(), mparams);
+    }
+
+    if (model == NULL) {
+        LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.c_str());
+        return -1;
+    }
+
+    llama_model_loader * ml = llama_model_load(params.model.c_str(), model, &mparams);
+
+    device_info dev_info;
+    llama_profile_device(&dev_info, model, ml, params.gpu_mem, params.n_predict, params.n_ctx, params.cpuparams.n_threads, params.flash_attn);
+    device_print_props(&dev_info, 1, model, cparams);
+
+    llama_free_model(model);
+    return 0;
+}
\ No newline at end of file