cleanup, fix native build for arm (+28 squashed commit)

Squashed commit:

[d1f6a4154] bundle library

[947ab84b7] undo

[0f9aba8d8] test

[e9ac93873] test

[920438202] test

[1c6d98804] Revert "quick test"

This reverts commit acf8ec8940.

[acf8ec894] quick test

[6a9937233] undo

[5a263a5bd] test

[ddfd82bca] test

[0b30e45da] test

[c3bfece55] messed up

[2a4b37fe0] Revert "test"

This reverts commit 80a1fcaeaf.

[80a1fcaea] test

[e2aa7d944] test

[264d80200] test

[f5b123173] undo

[1ffacc484] test

[63c0be926] undo

[510e0377e] ofast try fix

[4ac199b20] try fix sigill

[1bc987ba2] try fix illegal instruction

[7697252b1] edit

[f87087b28] check gcc ver

[e9dfe2cef] try using qemu to do the pyinstaller

[b411192db] revert

[25b5301e5] try using qemu to do the pyinstaller

[58038cddc] try using qemu to do the pyinstaller
This commit is contained in:
Concedo 2024-12-07 00:19:13 +08:00
parent e9d2332dd8
commit a11bba5893
28 changed files with 69 additions and 10898 deletions

View file

@ -3,7 +3,6 @@ name: Koboldcpp Linux ARM64
on: workflow_dispatch
env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
NOAVX2: 1
jobs:
linux-arm:
@ -15,35 +14,68 @@ jobs:
with:
ref: ${{ github.head_ref || github.ref_name }}
- name: Build Dependencies
id: depends1
- name: Install Dependencies
id: depends
run: |
sudo apt-get update
sudo apt-get install -y python3 python3-pip python3-dev build-essential \
sudo apt-get install -y python3-tk python3-pip python3-dev build-essential \
libffi-dev libssl-dev libbz2-dev libreadline-dev libsqlite3-dev \
crossbuild-essential-arm64 qemu qemu-user qemu-user-static \
gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
crossbuild-essential-arm64 gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
- name: Python Dependencies
id: depends2
- name: Install New GCC for Cross-Compilation
run: |
pip install customtkinter pyinstaller tk
- name: Build with ARM NEON Support
id: build_binary
run: |
# Enable cross-compilation for ARM
export QEMU_LD_PREFIX=/usr/aarch64-linux-gnu
export CC=aarch64-linux-gnu-gcc
export CXX=aarch64-linux-gnu-g++
sudo apt-get install -y software-properties-common
sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
sudo apt-get update
sudo apt-get install -y gcc-12 g++-12 gcc-12-aarch64-linux-gnu g++-12-aarch64-linux-gnu
export CC=/usr/bin/aarch64-linux-gnu-gcc-12
export CXX=/usr/bin/aarch64-linux-gnu-g++-12
export AR=aarch64-linux-gnu-ar
export UNAME_M=aarch64
export UNAME_S=Linux
export PATH=/usr/bin:$PATH
make LLAMA_PORTABLE=1
chmod +x './create_ver_file.sh'
. create_ver_file.sh
pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --version-file './version.txt' --clean --console koboldcpp.py -n "koboldcpp-linux-arm64"
mkdir -p dist
cp './koboldcpp_default.so' dist
ls
- name: Install QEMU
run: |
sudo apt-get update
sudo apt-get install -y qemu-user-static binfmt-support
- name: Setup QEMU for ARM64
run: |
docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
- name: Build ARM64 PyInstaller
run: |
docker run --rm \
--platform linux/arm64 \
-v "${PWD}:/src" \
python:3.9-slim \
/bin/bash -c "
apt-get update && apt-get install -y build-essential && \
apt-get update && apt-get install -y gcc-12 g++-12 && \
export LD_LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/12:$LD_LIBRARY_PATH && \
pip install customtkinter pyinstaller tk && \
cd /src && \
pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil \
--add-data './koboldcpp_default.so:.' \
--add-data './kcpp_adapters:./kcpp_adapters' \
--add-data './koboldcpp.py:.' \
--add-data './klite.embd:.' \
--add-data './kcpp_docs.embd:.' \
--add-data './kcpp_sdui.embd:.' \
--add-data './taesd.embd:.' \
--add-data './taesd_xl.embd:.' \
--add-data './rwkv_vocab.embd:.' \
--add-data './rwkv_world_vocab.embd:.' \
--version-file './version.txt' \
--clean --console koboldcpp.py -n 'koboldcpp-linux-arm64'
"
- name: Save artifact
uses: actions/upload-artifact@v3

View file

@ -3,7 +3,6 @@ name: Koboldcpp Mac
on: workflow_dispatch
env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
NOAVX2: 1
jobs:
osx:

View file

@ -312,9 +312,12 @@ ifneq ($(filter aarch64%,$(UNAME_M)),)
# Apple M1, M2, etc.
# Raspberry Pi 3, 4, Zero 2 (64-bit)
ifdef LLAMA_PORTABLE
CFLAGS +=
CXXFLAGS +=
else
CFLAGS += -mcpu=native
CXXFLAGS += -mcpu=native
# sve is cooked so we are disabling it
CFLAGS += -mcpu=native -DLLAMA_NOSVE
CXXFLAGS += -mcpu=native -DLLAMA_NOSVE
endif
endif
@ -395,7 +398,7 @@ else
ifndef LLAMA_HIPBLAS
ifndef LLAMA_VULKAN
ifndef LLAMA_METAL
NOTIFY_MSG = @echo -e '\nYou did a basic CPU build. For faster speeds, install and link a BLAS library. \nSet LLAMA_VULKAN=1 to compile with Vulkan support. This is just a reminder, not an error.'
NOTIFY_MSG = @echo -e '\n***\nYou did a basic CPU build. For faster speeds, consider installing and linking a GPU BLAS library. For example, set LLAMA_VULKAN=1 to compile with Vulkan support. Read the KoboldCpp Wiki for more information. This is just a reminder, not an error.\n***\n'
endif
endif
endif

View file

@ -1,243 +0,0 @@
#include "arg.h"
#include "common.h"
#include "log.h"
#include "llama.h"
#include <algorithm>
#include <cstdio>
#include <string>
#include <vector>
static void print_usage(int, char ** argv) {
LOG("\nexample usage:\n");
LOG("\n %s -m model.gguf -p \"Hello my name is\" -n 32 -np 4\n", argv[0]);
LOG("\n");
}
int main(int argc, char ** argv) {
common_params params;
params.prompt = "Hello my name is";
params.n_predict = 32;
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON, print_usage)) {
return 1;
}
common_init();
// number of parallel batches
int n_parallel = params.n_parallel;
// total length of the sequences including the prompt
int n_predict = params.n_predict;
// init LLM
llama_backend_init();
llama_numa_init(params.numa);
// initialize the model
llama_model_params model_params = common_model_params_to_llama(params);
llama_model * model = llama_load_model_from_file(params.model.c_str(), model_params);
if (model == NULL) {
LOG_ERR("%s: error: unable to load model\n" , __func__);
return 1;
}
// tokenize the prompt
std::vector<llama_token> tokens_list;
tokens_list = common_tokenize(model, params.prompt, true);
const int n_kv_req = tokens_list.size() + (n_predict - tokens_list.size())*n_parallel;
// initialize the context
llama_context_params ctx_params = common_context_params_to_llama(params);
ctx_params.n_ctx = n_kv_req;
ctx_params.n_batch = std::max(n_predict, n_parallel);
llama_context * ctx = llama_new_context_with_model(model, ctx_params);
auto sparams = llama_sampler_chain_default_params();
llama_sampler * smpl = llama_sampler_chain_init(sparams);
llama_sampler_chain_add(smpl, llama_sampler_init_top_k(params.sampling.top_k));
llama_sampler_chain_add(smpl, llama_sampler_init_top_p(params.sampling.top_p, params.sampling.min_keep));
llama_sampler_chain_add(smpl, llama_sampler_init_temp (params.sampling.temp));
llama_sampler_chain_add(smpl, llama_sampler_init_dist (params.sampling.seed));
if (ctx == NULL) {
LOG_ERR("%s: error: failed to create the llama_context\n" , __func__);
return 1;
}
const int n_ctx = llama_n_ctx(ctx);
LOG_INF("\n%s: n_predict = %d, n_ctx = %d, n_batch = %u, n_parallel = %d, n_kv_req = %d\n", __func__, n_predict, n_ctx, ctx_params.n_batch, n_parallel, n_kv_req);
// make sure the KV cache is big enough to hold all the prompt and generated tokens
if (n_kv_req > n_ctx) {
LOG_ERR("%s: error: n_kv_req (%d) > n_ctx, the required KV cache size is not big enough\n", __func__, n_kv_req);
LOG_ERR("%s: either reduce n_parallel or increase n_ctx\n", __func__);
return 1;
}
// print the prompt token-by-token
LOG("\n");
for (auto id : tokens_list) {
LOG("%s", common_token_to_piece(ctx, id).c_str());
}
// create a llama_batch
// we use this object to submit token data for decoding
llama_batch batch = llama_batch_init(std::max(tokens_list.size(), (size_t) n_parallel), 0, n_parallel);
std::vector<llama_seq_id> seq_ids(n_parallel, 0);
for (int32_t i = 0; i < n_parallel; ++i) {
seq_ids[i] = i;
}
// evaluate the initial prompt
for (size_t i = 0; i < tokens_list.size(); ++i) {
common_batch_add(batch, tokens_list[i], i, seq_ids, false);
}
GGML_ASSERT(batch.n_tokens == (int) tokens_list.size());
if (llama_model_has_encoder(model)) {
if (llama_encode(ctx, batch)) {
LOG_ERR("%s : failed to eval\n", __func__);
return 1;
}
llama_token decoder_start_token_id = llama_model_decoder_start_token(model);
if (decoder_start_token_id == -1) {
decoder_start_token_id = llama_token_bos(model);
}
common_batch_clear(batch);
common_batch_add(batch, decoder_start_token_id, 0, seq_ids, false);
}
// llama_decode will output logits only for the last token of the prompt
batch.logits[batch.n_tokens - 1] = true;
if (llama_decode(ctx, batch) != 0) {
LOG_ERR("%s: llama_decode() failed\n", __func__);
return 1;
}
//// assign the system KV cache to all parallel sequences
//// this way, the parallel sequences will "reuse" the prompt tokens without having to copy them
//for (int32_t i = 1; i < n_parallel; ++i) {
// llama_kv_cache_seq_cp(ctx, 0, i, -1, -1);
//}
if (n_parallel > 1) {
LOG("\n\n%s: generating %d sequences ...\n", __func__, n_parallel);
}
// main loop
// we will store the parallel decoded sequences in this vector
std::vector<std::string> streams(n_parallel);
// remember the batch index of the last token for each parallel sequence
// we need this to determine which logits to sample from
std::vector<int32_t> i_batch(n_parallel, batch.n_tokens - 1);
int n_cur = batch.n_tokens;
int n_decode = 0;
const auto t_main_start = ggml_time_us();
while (n_cur <= n_predict) {
// prepare the next batch
common_batch_clear(batch);
// sample the next token for each parallel sequence / stream
for (int32_t i = 0; i < n_parallel; ++i) {
if (i_batch[i] < 0) {
// the stream has already finished
continue;
}
const llama_token new_token_id = llama_sampler_sample(smpl, ctx, i_batch[i]);
// is it an end of generation? -> mark the stream as finished
if (llama_token_is_eog(model, new_token_id) || n_cur == n_predict) {
i_batch[i] = -1;
LOG("\n");
if (n_parallel > 1) {
LOG_INF("%s: stream %d finished at n_cur = %d", __func__, i, n_cur);
}
continue;
}
// if there is only one stream, we print immediately to stdout
if (n_parallel == 1) {
LOG("%s", common_token_to_piece(ctx, new_token_id).c_str());
}
streams[i] += common_token_to_piece(ctx, new_token_id);
i_batch[i] = batch.n_tokens;
// push this new token for next evaluation
common_batch_add(batch, new_token_id, n_cur, { i }, true);
n_decode += 1;
}
// all streams are finished
if (batch.n_tokens == 0) {
break;
}
n_cur += 1;
// evaluate the current batch with the transformer model
if (llama_decode(ctx, batch)) {
LOG_ERR("%s : failed to eval, return code %d\n", __func__, 1);
return 1;
}
}
if (n_parallel > 1) {
LOG("\n");
for (int32_t i = 0; i < n_parallel; ++i) {
LOG("sequence %d:\n\n%s%s\n\n", i, params.prompt.c_str(), streams[i].c_str());
}
}
const auto t_main_end = ggml_time_us();
LOG_INF("%s: decoded %d tokens in %.2f s, speed: %.2f t/s\n",
__func__, n_decode, (t_main_end - t_main_start) / 1000000.0f, n_decode / ((t_main_end - t_main_start) / 1000000.0f));
LOG("\n");
llama_perf_sampler_print(smpl);
llama_perf_context_print(ctx);
fprintf(stderr, "\n");
llama_batch_free(batch);
llama_sampler_free(smpl);
llama_free(ctx);
llama_free_model(model);
llama_backend_free();
return 0;
}

File diff suppressed because it is too large Load diff

View file

@ -1,421 +0,0 @@
#include "arg.h"
#include "common.h"
#include "ggml.h"
#include "ggml-alloc.h"
#include <map>
#include <vector>
#include <string>
#include <thread>
#include <fstream>
static bool g_verbose = false;
struct tensor_transformation {
struct ggml_tensor * in;
struct ggml_tensor * out;
bool is_copy;
};
static std::string get_kv_str(struct gguf_context * ctx_gguf, const std::string & key){
int id = gguf_find_key(ctx_gguf, key.c_str());
return id < 0 ? "" : std::string(gguf_get_val_str(ctx_gguf, id));
}
static float get_kv_f32(struct gguf_context * ctx_gguf, const std::string & key) {
int id = gguf_find_key(ctx_gguf, key.c_str());
return id < 0 ? 0.0f : gguf_get_val_f32(ctx_gguf, id);
}
static void zeros(std::ofstream & file, size_t n) {
char zero = 0;
for (size_t i = 0; i < n; ++i) {
file.write(&zero, 1);
}
}
static std::string ggml_ne_string(const ggml_tensor * t) {
std::string str;
for (int i = 0; i < GGML_MAX_DIMS; ++i) {
str += std::to_string(t->ne[i]);
if (i + 1 < GGML_MAX_DIMS) {
str += ", ";
}
}
return str;
}
static struct gguf_context * load_gguf(std::string & fname, struct ggml_context ** ctx_ggml) {
struct gguf_init_params params = {
/*.no_alloc = */ true,
/*.ctx = */ ctx_ggml,
};
struct gguf_context * ctx_gguf = gguf_init_from_file(fname.c_str(), params);
if (!ctx_gguf) {
throw std::runtime_error("failed to load input GGUF from " + fname);
}
return ctx_gguf;
}
struct file_input {
struct ggml_context * ctx_meta = nullptr;
struct gguf_context * ctx_gguf = nullptr;
std::ifstream f_in;
std::map<std::string, ggml_tensor *> tensors;
float alpha;
float scale;
file_input(std::string & fname, float scale): f_in(fname, std::ios::binary), scale(scale) {
if (!f_in.is_open()) {
throw std::runtime_error("failed to open input gguf from " + fname);
}
ctx_gguf = load_gguf(fname, &ctx_meta);
alpha = get_kv_f32(ctx_gguf, "adapter.lora.alpha");
printf("%s: loaded gguf from %s\n", __func__, fname.c_str());
for (ggml_tensor * cur = ggml_get_first_tensor(ctx_meta); cur; cur = ggml_get_next_tensor(ctx_meta, cur)) {
std::string name(cur->name);
tensors[name] = cur;
if (g_verbose) {
printf("%s: %s\n", __func__, cur->name);
}
}
}
ggml_tensor * get_tensor(std::string name) {
if (tensors.find(name) == tensors.end()) {
return nullptr;
}
return tensors[name];
}
void read_tensor_data(std::string name, std::vector<uint8_t> & buf) {
if (tensors.find(name) == tensors.end()) {
throw std::runtime_error("cannot find tensor with name: " + name);
}
auto len = ggml_nbytes(tensors[name]);
if (buf.size() < len) {
buf.resize(len);
}
auto i_tensor_in = gguf_find_tensor(ctx_gguf, name.c_str()); // idx of tensor in the input file
auto offset = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i_tensor_in);
f_in.seekg(offset);
f_in.read((char* )buf.data(), len);
}
~file_input() {
gguf_free(ctx_gguf);
ggml_free(ctx_meta);
}
};
struct lora_merge_ctx {
// input base model + adapters
file_input base_model;
std::vector<std::unique_ptr<file_input>> adapters;
// for computing merged tensor
int n_threads;
ggml_backend_t backend = nullptr;
ggml_gallocr_t allocr = nullptr;
std::vector<uint8_t> read_buf;
// output file
struct gguf_context * ctx_out;
struct ggml_context * ctx_out_ggml;
std::ofstream fout;
lora_merge_ctx(
std::string & base_fname,
std::vector<common_lora_adapter_info> & lora_files,
std::string & outfile,
int n_threads) : base_model(base_fname, 0), n_threads(n_threads), fout(outfile, std::ios::binary) {
fout.exceptions(std::ofstream::failbit); // fail fast on write errors
if (gguf_find_key(base_model.ctx_gguf, LLM_KV_SPLIT_COUNT) >= 0) {
throw std::runtime_error("split model is not yet supported");
}
for (auto & lora_inp : lora_files) {
auto fname = lora_inp.path;
auto scale = lora_inp.scale;
std::unique_ptr<file_input> adapter(new file_input(fname, scale));
check_metadata_lora(adapter.get());
adapters.push_back(std::move(adapter));
}
ctx_out = gguf_init_empty();
struct ggml_init_params params = {
/*.mem_size =*/ gguf_get_n_tensors(base_model.ctx_gguf)*ggml_tensor_overhead(),
/*.mem_buffer =*/ NULL,
/*.no_alloc =*/ true,
};
ctx_out_ggml = ggml_init(params);
backend = ggml_backend_cpu_init();
allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(backend));
}
void check_metadata_lora(file_input * adapter) {
auto general_type = get_kv_str(adapter->ctx_gguf, "general.type");
if (general_type != "adapter") {
throw std::runtime_error("expect general.type to be 'adapter', but got: " + general_type);
}
auto adapter_type = get_kv_str(adapter->ctx_gguf, "adapter.type");
if (adapter_type != "lora") {
throw std::runtime_error("expect adapter.type to be 'lora', but got: " + adapter_type);
}
auto general_arch_base = get_kv_str(base_model.ctx_gguf, "general.architecture");
auto general_arch_lora = get_kv_str(adapter->ctx_gguf, "general.architecture");
if (general_arch_base != general_arch_lora) {
throw std::runtime_error("model arch and LoRA arch mismatch");
}
}
ggml_type get_out_tensor_type(struct ggml_tensor * t) {
if (t->type == GGML_TYPE_F32) {
return GGML_TYPE_F32;
} else {
return GGML_TYPE_F16;
}
}
void run_merge() {
// prepare metadata
gguf_set_kv(ctx_out, base_model.ctx_gguf);
// output is forced to f16 for now
gguf_set_val_u32(ctx_out, "general.file_type", LLAMA_FTYPE_MOSTLY_F16);
// check if all lora adapters have the same tensors
// TODO: remove this when we can support merging subset of adapters. Ref: https://github.com/ggerganov/llama.cpp/pull/8607#discussion_r1686027777
static const char * err_no_subset_adapter = "Input adapters do not have the same list of tensors. This is not yet supported. Please merge the adapter one-by-one instead of merging all at once.";
if (adapters.size() > 1) {
for (size_t i = 1; i < adapters.size(); ++i) {
if (adapters[0]->tensors.size() != adapters[i]->tensors.size()) {
throw std::runtime_error(err_no_subset_adapter);
}
for (auto & it : adapters[i]->tensors) {
if (adapters[0]->get_tensor(it.first) == nullptr) {
throw std::runtime_error(err_no_subset_adapter);
}
}
}
}
// mapping base tensor to out tensor (same shape with base, but different type)
std::vector<tensor_transformation> trans;
for (auto & it : base_model.tensors) {
bool t_a = true;
bool t_b = true;
for (auto & adapter : adapters) {
t_a &= nullptr != adapter->get_tensor(it.first + ".lora_a");
t_b &= nullptr != adapter->get_tensor(it.first + ".lora_b");
}
auto base_tensor = it.second;
if (!t_a && !t_b) {
// only copy
struct ggml_tensor * cpy_tensor = ggml_dup_tensor(ctx_out_ggml, base_tensor);
ggml_set_name(cpy_tensor, base_tensor->name);
trans.push_back({
cpy_tensor,
cpy_tensor,
true,
});
gguf_add_tensor(ctx_out, cpy_tensor);
} else if (t_a && t_b) {
// need merging
struct ggml_tensor * out_tensor = ggml_new_tensor(
ctx_out_ggml, get_out_tensor_type(base_tensor), GGML_MAX_DIMS, base_tensor->ne);
ggml_set_name(out_tensor, base_tensor->name);
trans.push_back({
base_tensor,
out_tensor,
false,
});
gguf_add_tensor(ctx_out, out_tensor);
} else {
throw std::runtime_error("tensor " + it.first + " missing either lora_a or lora_b");
}
}
// placeholder for the meta data
{
size_t meta_size = gguf_get_meta_size(ctx_out);
zeros(fout, meta_size);
}
// process base model tensors
size_t n_merged = 0;
for (auto & it : trans) {
if (!it.is_copy) {
merge_tensor(it.in, it.out);
n_merged++;
} else {
copy_tensor(it.in);
}
}
// write output metadata
{
std::vector<uint8_t> data(gguf_get_meta_size(ctx_out));
gguf_get_meta_data(ctx_out, data.data());
fout.seekp(0);
fout.write((const char *)data.data(), data.size());
}
printf("%s : merged %ld tensors with lora adapters\n", __func__, n_merged);
printf("%s : wrote %ld tensors to output file\n", __func__, trans.size());
}
void copy_tensor(struct ggml_tensor * base) {
printf("%s : %s [%s]\n", __func__, base->name, ggml_ne_string(base).c_str());
size_t len = ggml_nbytes(base);
base_model.read_tensor_data(base->name, read_buf);
fout.write((char* )read_buf.data(), len);
zeros(fout, GGML_PAD(len, GGUF_DEFAULT_ALIGNMENT) - len);
}
void merge_tensor(struct ggml_tensor * base, struct ggml_tensor * out) {
std::string name_base(base->name);
std::string name_lora_a = name_base + ".lora_a";
std::string name_lora_b = name_base + ".lora_b";
printf("%s : %s [%s]\n", __func__, base->name, ggml_ne_string(base).c_str());
// context for input tensor
std::vector<struct ggml_tensor *> inp_a(adapters.size());
std::vector<struct ggml_tensor *> inp_b(adapters.size());
struct ggml_init_params params {
/*.mem_size =*/ ggml_tensor_overhead()*(2+adapters.size()*2),
/*.mem_buffer =*/ NULL,
/*.no_alloc =*/ true,
};
struct ggml_context * ctx = ggml_init(params);
// alloc tensors
struct ggml_tensor * inp_base = ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, base->ne);
for (size_t i = 0; i < adapters.size(); ++i) {
auto t_a = adapters[i]->get_tensor(name_lora_a);
auto t_b = adapters[i]->get_tensor(name_lora_b);
// TODO: add support for quantized lora
if (ggml_is_quantized(t_a->type) || ggml_is_quantized(t_b->type)) {
throw std::runtime_error("quantized LoRA adapters is not supported, please retry with f16 or f32");
}
inp_a[i] = ggml_dup_tensor(ctx, t_a);
inp_b[i] = ggml_dup_tensor(ctx, t_b);
}
ggml_backend_buffer_t buffer = ggml_backend_alloc_ctx_tensors(ctx, backend);
// load base tensor to backend buffer
base_model.read_tensor_data(name_base, read_buf);
if (base->type != GGML_TYPE_F32) {
// optionally dequantize it
printf("%s : + dequantize base tensor from %s to F32\n", __func__, ggml_type_name(base->type));
auto nels = ggml_nelements(inp_base);
const auto * qtype = ggml_get_type_traits(base->type);
std::vector<uint8_t> dequant_buf(nels * sizeof(float));
qtype->to_float(read_buf.data(), (float *)dequant_buf.data(), nels);
ggml_backend_tensor_set(inp_base, dequant_buf.data(), 0, dequant_buf.size());
} else {
ggml_backend_tensor_set(inp_base, read_buf.data(), 0, ggml_nbytes(inp_base));
}
// load lora tensors to backend buffer
for (size_t i = 0; i < adapters.size(); ++i) {
adapters[i]->read_tensor_data(name_lora_a, read_buf);
ggml_backend_tensor_set(inp_a[i], read_buf.data(), 0, ggml_nbytes(inp_a[i]));
adapters[i]->read_tensor_data(name_lora_b, read_buf);
ggml_backend_tensor_set(inp_b[i], read_buf.data(), 0, ggml_nbytes(inp_b[i]));
}
// build graph
struct ggml_cgraph * gf;
{
static size_t buf_size = ggml_tensor_overhead()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead();
static std::vector<uint8_t> buf(buf_size);
struct ggml_init_params params0 = {
/*.mem_size =*/ buf_size,
/*.mem_buffer =*/ buf.data(),
/*.no_alloc =*/ true,
};
struct ggml_context * ctx0 = ggml_init(params0);
gf = ggml_new_graph(ctx0);
struct ggml_tensor * cur = inp_base;
for (size_t i = 0; i < adapters.size(); ++i) {
struct ggml_tensor * a_T = ggml_cont(ctx0, ggml_transpose(ctx0, ggml_cast(ctx0, inp_a[i], GGML_TYPE_F32)));
struct ggml_tensor * delta = ggml_mul_mat(ctx0, a_T, ggml_cast(ctx0, inp_b[i], GGML_TYPE_F32));
// scale
const float alpha = adapters[i]->alpha;
const float rank = (float) inp_b[i]->ne[0];
const float scale = alpha ? adapters[i]->scale * alpha / rank : adapters[i]->scale;
delta = ggml_scale(ctx0, delta, scale);
cur = ggml_add(ctx0, delta, cur);
printf("%s : + merging from adapter[%ld] type=%s\n", __func__, i, ggml_type_name(inp_a[i]->type));
printf("%s : input_scale=%f calculated_scale=%f rank=%d\n", __func__, adapters[i]->scale, scale, (int) inp_b[i]->ne[0]);
}
cur = ggml_cast(ctx0, cur, out->type);
printf("%s : + output type is %s\n", __func__, ggml_type_name(out->type));
ggml_build_forward_expand(gf, cur);
ggml_free(ctx0);
}
// compute
{
ggml_gallocr_alloc_graph(allocr, gf);
ggml_backend_cpu_set_n_threads(backend, n_threads);
ggml_backend_graph_compute(backend, gf);
}
// write data to output file
{
auto * result = ggml_graph_node(gf, -1);
size_t len = ggml_nbytes(result);
if (read_buf.size() < len) {
read_buf.resize(len);
}
ggml_backend_tensor_get(result, read_buf.data(), 0, len);
fout.write((char* )read_buf.data(), len);
zeros(fout, GGML_PAD(len, GGUF_DEFAULT_ALIGNMENT) - len);
}
ggml_free(ctx);
ggml_backend_buffer_free(buffer);
}
~lora_merge_ctx() {
ggml_gallocr_free(allocr);
ggml_backend_free(backend);
gguf_free(ctx_out);
ggml_free(ctx_out_ggml);
}
};
static void print_usage(int, char ** argv) {
printf("\nexample usage:\n");
printf("\n %s -m base-model.gguf --lora lora-file.gguf -o merged-model-f16.gguf\n", argv[0]);
printf("\nNOTE: output model is F16\n");
printf("\n");
}
int main(int argc, char ** argv) {
common_params params;
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_EXPORT_LORA, print_usage)) {
return 1;
}
g_verbose = (params.verbosity > 1);
try {
lora_merge_ctx ctx(params.model, params.lora_adapters, params.lora_outfile, params.cpuparams.n_threads);
ctx.run_merge();
} catch (const std::exception & err) {
fprintf(stderr, "%s\n", err.what());
exit(EXIT_FAILURE);
}
printf("done, output file is %s\n", params.lora_outfile.c_str());
return 0;
}

View file

@ -1,112 +0,0 @@
#include "unicode.h"
#include "llama-grammar.h"
#include <cstdio>
#include <cstdlib>
#include <sstream>
#include <fstream>
#include <string>
#include <vector>
static bool llama_grammar_validate(struct llama_grammar * grammar, const std::string & input_str, size_t & error_pos, std::string & error_msg) {
const auto cpts = unicode_cpts_from_utf8(input_str);
const llama_grammar_rules & rules = llama_grammar_get_rules (grammar);
llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(grammar);
size_t pos = 0;
for (const auto & cpt : cpts) {
const llama_grammar_stacks stacks_prev = llama_grammar_get_stacks(grammar); // copy
llama_grammar_accept(rules, stacks_prev, cpt, stacks_cur);
if (stacks_cur.empty()) {
error_pos = pos;
error_msg = "Unexpected character '" + unicode_cpt_to_utf8(cpt) + "'";
stacks_cur = stacks_prev;
return false;
}
++pos;
}
for (const auto & stack : stacks_cur) {
if (stack.empty()) {
return true;
}
}
error_pos = pos;
error_msg = "Unexpected end of input";
return false;
}
static void print_error_message(const std::string & input_str, size_t error_pos, const std::string & error_msg) {
fprintf(stdout, "Input string is invalid according to the grammar.\n");
fprintf(stdout, "Error: %s at position %zu\n", error_msg.c_str(), error_pos);
fprintf(stdout, "\n");
fprintf(stdout, "Input string:\n");
fprintf(stdout, "%s", input_str.substr(0, error_pos).c_str());
if (error_pos < input_str.size()) {
fprintf(stdout, "\033[1;31m%c", input_str[error_pos]);
if (error_pos+1 < input_str.size()) {
fprintf(stdout, "\033[0;31m%s", input_str.substr(error_pos+1).c_str());
}
fprintf(stdout, "\033[0m\n");
}
}
int main(int argc, char** argv) {
if (argc != 3) {
fprintf(stdout, "Usage: %s <grammar_filename> <input_filename>\n", argv[0]);
return 1;
}
const std::string grammar_filename = argv[1];
const std::string input_filename = argv[2];
// Read the GBNF grammar file
FILE* grammar_file = fopen(grammar_filename.c_str(), "r");
if (!grammar_file) {
fprintf(stdout, "Failed to open grammar file: %s\n", grammar_filename.c_str());
return 1;
}
std::string grammar_str;
{
std::ifstream grammar_file(grammar_filename);
GGML_ASSERT(grammar_file.is_open() && "Failed to open grammar file");
std::stringstream buffer;
buffer << grammar_file.rdbuf();
grammar_str = buffer.str();
}
llama_grammar * grammar = llama_grammar_init_impl(nullptr, grammar_str.c_str(), "root");
if (grammar == nullptr) {
throw std::runtime_error("Failed to initialize llama_grammar");
}
// Read the input file
std::string input_str;
{
std::ifstream input_file(input_filename);
GGML_ASSERT(input_file.is_open() && "Failed to open input file");
std::stringstream buffer;
buffer << input_file.rdbuf();
input_str = buffer.str();
}
// Validate the input string against the grammar
size_t error_pos;
std::string error_msg;
bool is_valid = llama_grammar_validate(grammar, input_str, error_pos, error_msg);
if (is_valid) {
fprintf(stdout, "Input string is valid according to the grammar.\n");
} else {
print_error_message(input_str, error_pos, error_msg);
}
// Clean up
llama_grammar_free_impl(grammar);
return 0;
}

View file

@ -1,5 +0,0 @@
set(TARGET llama-gen-docs)
add_executable(${TARGET} gen-docs.cpp)
install(TARGETS ${TARGET} RUNTIME)
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_17)

View file

@ -1,83 +0,0 @@
#include "arg.h"
#include "common.h"
#include <fstream>
#include <string>
// Export usage message (-h) to markdown format
static void write_table_header(std::ofstream & file) {
file << "| Argument | Explanation |\n";
file << "| -------- | ----------- |\n";
}
static void write_table_entry(std::ofstream & file, const common_arg & opt) {
file << "| `";
// args
for (const auto & arg : opt.args) {
if (arg == opt.args.front()) {
file << arg;
if (opt.args.size() > 1) file << ", ";
} else {
file << arg << (arg != opt.args.back() ? ", " : "");
}
}
// value hint
if (opt.value_hint) {
std::string md_value_hint(opt.value_hint);
string_replace_all(md_value_hint, "|", "\\|");
file << " " << md_value_hint;
}
if (opt.value_hint_2) {
std::string md_value_hint_2(opt.value_hint_2);
string_replace_all(md_value_hint_2, "|", "\\|");
file << " " << md_value_hint_2;
}
// help text
std::string md_help(opt.help);
string_replace_all(md_help, "\n", "<br/>");
string_replace_all(md_help, "|", "\\|");
file << "` | " << md_help << " |\n";
}
static void write_table(std::ofstream & file, std::vector<common_arg *> & opts) {
write_table_header(file);
for (const auto & opt : opts) {
write_table_entry(file, *opt);
}
}
static void export_md(std::string fname, llama_example ex) {
std::ofstream file(fname, std::ofstream::out | std::ofstream::trunc);
common_params params;
auto ctx_arg = common_params_parser_init(params, ex);
std::vector<common_arg *> common_options;
std::vector<common_arg *> sparam_options;
std::vector<common_arg *> specific_options;
for (auto & opt : ctx_arg.options) {
// in case multiple LLAMA_EXAMPLE_* are set, we prioritize the LLAMA_EXAMPLE_* matching current example
if (opt.is_sparam) {
sparam_options.push_back(&opt);
} else if (opt.in_example(ctx_arg.ex)) {
specific_options.push_back(&opt);
} else {
common_options.push_back(&opt);
}
}
file << "**Common params**\n\n";
write_table(file, common_options);
file << "\n\n**Sampling params**\n\n";
write_table(file, sparam_options);
file << "\n\n**Example-specific params**\n\n";
write_table(file, specific_options);
}
int main(int, char **) {
export_md("autogen-main.md", LLAMA_EXAMPLE_MAIN);
export_md("autogen-server.md", LLAMA_EXAMPLE_SERVER);
return 0;
}

View file

@ -1,22 +0,0 @@
set(TARGET llama-gguf-hash)
add_executable(${TARGET} gguf-hash.cpp)
install(TARGETS ${TARGET} RUNTIME)
# clibs dependencies
include_directories(deps/)
add_library(xxhash OBJECT deps/xxhash/xxhash.c deps/xxhash/xxhash.h)
target_link_libraries(${TARGET} PRIVATE xxhash)
add_library(sha1 OBJECT deps/sha1/sha1.c deps/sha1/sha1.h)
target_link_libraries(${TARGET} PRIVATE sha1)
if (NOT MSVC)
# disable warnings in 3rd party code
target_compile_options(sha1 PRIVATE -w)
endif()
add_library(sha256 OBJECT deps/sha256/sha256.c deps/sha256/sha256.h)
target_link_libraries(${TARGET} PRIVATE sha256)
target_link_libraries(${TARGET} PRIVATE ggml ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_17)

View file

@ -1,13 +0,0 @@
{
"name": "rotate-bits",
"version": "0.1.1",
"repo": "jb55/rotate-bits.h",
"description": "rotate bits",
"keywords": ["rotl", "rotr"],
"src": ["rotate-bits.h"],
"license": "Public Domain",
"development": {
"thlorenz/tap.c": "*"
}
}

View file

@ -1,46 +0,0 @@
#ifndef __ROTATE_DEFS_H
#define __ROTATE_DEFS_H
#ifdef _MSC_VER
#include <stdlib.h>
#define ROTL32(v, n) _rotl((v), (n))
#define ROTL64(v, n) _rotl64((v), (n))
#define ROTR32(v, n) _rotr((v), (n))
#define ROTR64(v, n) _rotr64((v), (n))
#else
#include <stdint.h>
#define U8V(v) ((uint8_t)(v) & 0xFFU)
#define U16V(v) ((uint16_t)(v) & 0xFFFFU)
#define U32V(v) ((uint32_t)(v) & 0xFFFFFFFFU)
#define U64V(v) ((uint64_t)(v) & 0xFFFFFFFFFFFFFFFFU)
#define ROTL32(v, n) \
(U32V((uint32_t)(v) << (n)) | ((uint32_t)(v) >> (32 - (n))))
// tests fail if we don't have this cast...
#define ROTL64(v, n) \
(U64V((uint64_t)(v) << (n)) | ((uint64_t)(v) >> (64 - (n))))
#define ROTR32(v, n) ROTL32(v, 32 - (n))
#define ROTR64(v, n) ROTL64(v, 64 - (n))
#endif
#define ROTL8(v, n) \
(U8V((uint8_t)(v) << (n)) | ((uint8_t)(v) >> (8 - (n))))
#define ROTL16(v, n) \
(U16V((uint16_t)(v) << (n)) | ((uint16_t)(v) >> (16 - (n))))
#define ROTR8(v, n) ROTL8(v, 8 - (n))
#define ROTR16(v, n) ROTL16(v, 16 - (n))
#endif

View file

@ -1,9 +0,0 @@
{
"name": "sha1",
"version": "0.0.1",
"repo": "clibs/sha1",
"description": "sha1 hash algorithm",
"keywords": ["sha1", "hash"],
"license": "public domain",
"src": ["sha1.c", "sha1.h"]
}

View file

@ -1,295 +0,0 @@
/*
SHA-1 in C
By Steve Reid <steve@edmweb.com>
100% Public Domain
Test Vectors (from FIPS PUB 180-1)
"abc"
A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D
"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1
A million repetitions of "a"
34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F
*/
/* #define LITTLE_ENDIAN * This should be #define'd already, if true. */
/* #define SHA1HANDSOFF * Copies data before messing with it. */
#define SHA1HANDSOFF
#include <stdio.h>
#include <string.h>
/* for uint32_t */
#include <stdint.h>
#include "sha1.h"
#define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits))))
/* blk0() and blk() perform the initial expand. */
/* I got the idea of expanding during the round function from SSLeay */
#if BYTE_ORDER == LITTLE_ENDIAN
#define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) \
|(rol(block->l[i],8)&0x00FF00FF))
#elif BYTE_ORDER == BIG_ENDIAN
#define blk0(i) block->l[i]
#else
#error "Endianness not defined!"
#endif
#define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] \
^block->l[(i+2)&15]^block->l[i&15],1))
/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
#define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rol(v,5);w=rol(w,30);
#define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=rol(w,30);
#define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=rol(w,30);
#define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30);
#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30);
/* Hash a single 512-bit block. This is the core of the algorithm. */
void SHA1Transform(
uint32_t state[5],
const unsigned char buffer[64]
)
{
uint32_t a, b, c, d, e;
typedef union
{
unsigned char c[64];
uint32_t l[16];
} CHAR64LONG16;
#ifdef SHA1HANDSOFF
CHAR64LONG16 block[1]; /* use array to appear as a pointer */
memcpy(block, buffer, 64);
#else
/* The following had better never be used because it causes the
* pointer-to-const buffer to be cast into a pointer to non-const.
* And the result is written through. I threw a "const" in, hoping
* this will cause a diagnostic.
*/
CHAR64LONG16 *block = (const CHAR64LONG16 *) buffer;
#endif
/* Copy context->state[] to working vars */
a = state[0];
b = state[1];
c = state[2];
d = state[3];
e = state[4];
/* 4 rounds of 20 operations each. Loop unrolled. */
R0(a, b, c, d, e, 0);
R0(e, a, b, c, d, 1);
R0(d, e, a, b, c, 2);
R0(c, d, e, a, b, 3);
R0(b, c, d, e, a, 4);
R0(a, b, c, d, e, 5);
R0(e, a, b, c, d, 6);
R0(d, e, a, b, c, 7);
R0(c, d, e, a, b, 8);
R0(b, c, d, e, a, 9);
R0(a, b, c, d, e, 10);
R0(e, a, b, c, d, 11);
R0(d, e, a, b, c, 12);
R0(c, d, e, a, b, 13);
R0(b, c, d, e, a, 14);
R0(a, b, c, d, e, 15);
R1(e, a, b, c, d, 16);
R1(d, e, a, b, c, 17);
R1(c, d, e, a, b, 18);
R1(b, c, d, e, a, 19);
R2(a, b, c, d, e, 20);
R2(e, a, b, c, d, 21);
R2(d, e, a, b, c, 22);
R2(c, d, e, a, b, 23);
R2(b, c, d, e, a, 24);
R2(a, b, c, d, e, 25);
R2(e, a, b, c, d, 26);
R2(d, e, a, b, c, 27);
R2(c, d, e, a, b, 28);
R2(b, c, d, e, a, 29);
R2(a, b, c, d, e, 30);
R2(e, a, b, c, d, 31);
R2(d, e, a, b, c, 32);
R2(c, d, e, a, b, 33);
R2(b, c, d, e, a, 34);
R2(a, b, c, d, e, 35);
R2(e, a, b, c, d, 36);
R2(d, e, a, b, c, 37);
R2(c, d, e, a, b, 38);
R2(b, c, d, e, a, 39);
R3(a, b, c, d, e, 40);
R3(e, a, b, c, d, 41);
R3(d, e, a, b, c, 42);
R3(c, d, e, a, b, 43);
R3(b, c, d, e, a, 44);
R3(a, b, c, d, e, 45);
R3(e, a, b, c, d, 46);
R3(d, e, a, b, c, 47);
R3(c, d, e, a, b, 48);
R3(b, c, d, e, a, 49);
R3(a, b, c, d, e, 50);
R3(e, a, b, c, d, 51);
R3(d, e, a, b, c, 52);
R3(c, d, e, a, b, 53);
R3(b, c, d, e, a, 54);
R3(a, b, c, d, e, 55);
R3(e, a, b, c, d, 56);
R3(d, e, a, b, c, 57);
R3(c, d, e, a, b, 58);
R3(b, c, d, e, a, 59);
R4(a, b, c, d, e, 60);
R4(e, a, b, c, d, 61);
R4(d, e, a, b, c, 62);
R4(c, d, e, a, b, 63);
R4(b, c, d, e, a, 64);
R4(a, b, c, d, e, 65);
R4(e, a, b, c, d, 66);
R4(d, e, a, b, c, 67);
R4(c, d, e, a, b, 68);
R4(b, c, d, e, a, 69);
R4(a, b, c, d, e, 70);
R4(e, a, b, c, d, 71);
R4(d, e, a, b, c, 72);
R4(c, d, e, a, b, 73);
R4(b, c, d, e, a, 74);
R4(a, b, c, d, e, 75);
R4(e, a, b, c, d, 76);
R4(d, e, a, b, c, 77);
R4(c, d, e, a, b, 78);
R4(b, c, d, e, a, 79);
/* Add the working vars back into context.state[] */
state[0] += a;
state[1] += b;
state[2] += c;
state[3] += d;
state[4] += e;
/* Wipe variables */
a = b = c = d = e = 0;
#ifdef SHA1HANDSOFF
memset(block, '\0', sizeof(block));
#endif
}
/* SHA1Init - Initialize new context */
void SHA1Init(
SHA1_CTX * context
)
{
/* SHA1 initialization constants */
context->state[0] = 0x67452301;
context->state[1] = 0xEFCDAB89;
context->state[2] = 0x98BADCFE;
context->state[3] = 0x10325476;
context->state[4] = 0xC3D2E1F0;
context->count[0] = context->count[1] = 0;
}
/* Run your data through this. */
void SHA1Update(
SHA1_CTX * context,
const unsigned char *data,
uint32_t len
)
{
uint32_t i;
uint32_t j;
j = context->count[0];
if ((context->count[0] += len << 3) < j)
context->count[1]++;
context->count[1] += (len >> 29);
j = (j >> 3) & 63;
if ((j + len) > 63)
{
memcpy(&context->buffer[j], data, (i = 64 - j));
SHA1Transform(context->state, context->buffer);
for (; i + 63 < len; i += 64)
{
SHA1Transform(context->state, &data[i]);
}
j = 0;
}
else
i = 0;
memcpy(&context->buffer[j], &data[i], len - i);
}
/* Add padding and return the message digest. */
void SHA1Final(
unsigned char digest[20],
SHA1_CTX * context
)
{
unsigned i;
unsigned char finalcount[8];
unsigned char c;
#if 0 /* untested "improvement" by DHR */
/* Convert context->count to a sequence of bytes
* in finalcount. Second element first, but
* big-endian order within element.
* But we do it all backwards.
*/
unsigned char *fcp = &finalcount[8];
for (i = 0; i < 2; i++)
{
uint32_t t = context->count[i];
int j;
for (j = 0; j < 4; t >>= 8, j++)
*--fcp = (unsigned char) t}
#else
for (i = 0; i < 8; i++)
{
finalcount[i] = (unsigned char) ((context->count[(i >= 4 ? 0 : 1)] >> ((3 - (i & 3)) * 8)) & 255); /* Endian independent */
}
#endif
c = 0200;
SHA1Update(context, &c, 1);
while ((context->count[0] & 504) != 448)
{
c = 0000;
SHA1Update(context, &c, 1);
}
SHA1Update(context, finalcount, 8); /* Should cause a SHA1Transform() */
for (i = 0; i < 20; i++)
{
digest[i] = (unsigned char)
((context->state[i >> 2] >> ((3 - (i & 3)) * 8)) & 255);
}
/* Wipe variables */
memset(context, '\0', sizeof(*context));
memset(&finalcount, '\0', sizeof(finalcount));
}
void SHA1(
char *hash_out,
const char *str,
uint32_t len)
{
SHA1_CTX ctx;
unsigned int ii;
SHA1Init(&ctx);
for (ii=0; ii<len; ii+=1)
SHA1Update(&ctx, (const unsigned char*)str + ii, 1);
SHA1Final((unsigned char *)hash_out, &ctx);
}

View file

@ -1,52 +0,0 @@
#ifndef SHA1_H
#define SHA1_H
/*
SHA-1 in C
By Steve Reid <steve@edmweb.com>
100% Public Domain
*/
#include "stdint.h"
#if defined(__cplusplus)
extern "C" {
#endif
typedef struct
{
uint32_t state[5];
uint32_t count[2];
unsigned char buffer[64];
} SHA1_CTX;
void SHA1Transform(
uint32_t state[5],
const unsigned char buffer[64]
);
void SHA1Init(
SHA1_CTX * context
);
void SHA1Update(
SHA1_CTX * context,
const unsigned char *data,
uint32_t len
);
void SHA1Final(
unsigned char digest[20],
SHA1_CTX * context
);
void SHA1(
char *hash_out,
const char *str,
uint32_t len);
#if defined(__cplusplus)
}
#endif
#endif /* SHA1_H */

View file

@ -1,15 +0,0 @@
{
"name": "sha256",
"version": "0.0.2",
"repo": "jb55/sha256.c",
"description": "sha256 in c",
"keywords": ["sha256", "sha2"],
"src": ["sha256.c", "sha256.h"],
"dependencies": {
"jb55/rotate-bits.h": "0.1.1"
},
"development": {
"thlorenz/tap.c": "*"
}
}

View file

@ -1,221 +0,0 @@
/* Crypto/Sha256.c -- SHA-256 Hash
2010-06-11 : Igor Pavlov : Public domain
This code is based on public domain code from Wei Dai's Crypto++ library. */
#include "rotate-bits/rotate-bits.h"
#include "sha256.h"
/* define it for speed optimization */
#define _SHA256_UNROLL
#define _SHA256_UNROLL2
void
sha256_init(sha256_t *p)
{
p->state[0] = 0x6a09e667;
p->state[1] = 0xbb67ae85;
p->state[2] = 0x3c6ef372;
p->state[3] = 0xa54ff53a;
p->state[4] = 0x510e527f;
p->state[5] = 0x9b05688c;
p->state[6] = 0x1f83d9ab;
p->state[7] = 0x5be0cd19;
p->count = 0;
}
#define S0(x) (ROTR32(x, 2) ^ ROTR32(x,13) ^ ROTR32(x, 22))
#define S1(x) (ROTR32(x, 6) ^ ROTR32(x,11) ^ ROTR32(x, 25))
#define s0(x) (ROTR32(x, 7) ^ ROTR32(x,18) ^ (x >> 3))
#define s1(x) (ROTR32(x,17) ^ ROTR32(x,19) ^ (x >> 10))
#define blk0(i) (W[i] = data[i])
#define blk2(i) (W[i&15] += s1(W[(i-2)&15]) + W[(i-7)&15] + s0(W[(i-15)&15]))
#define Ch(x,y,z) (z^(x&(y^z)))
#define Maj(x,y,z) ((x&y)|(z&(x|y)))
#define a(i) T[(0-(i))&7]
#define b(i) T[(1-(i))&7]
#define c(i) T[(2-(i))&7]
#define d(i) T[(3-(i))&7]
#define e(i) T[(4-(i))&7]
#define f(i) T[(5-(i))&7]
#define g(i) T[(6-(i))&7]
#define h(i) T[(7-(i))&7]
#ifdef _SHA256_UNROLL2
#define R(a,b,c,d,e,f,g,h, i) h += S1(e) + Ch(e,f,g) + K[i+j] + (j?blk2(i):blk0(i));\
d += h; h += S0(a) + Maj(a, b, c)
#define RX_8(i) \
R(a,b,c,d,e,f,g,h, i); \
R(h,a,b,c,d,e,f,g, (i+1)); \
R(g,h,a,b,c,d,e,f, (i+2)); \
R(f,g,h,a,b,c,d,e, (i+3)); \
R(e,f,g,h,a,b,c,d, (i+4)); \
R(d,e,f,g,h,a,b,c, (i+5)); \
R(c,d,e,f,g,h,a,b, (i+6)); \
R(b,c,d,e,f,g,h,a, (i+7))
#else
#define R(i) h(i) += S1(e(i)) + Ch(e(i),f(i),g(i)) + K[i+j] + (j?blk2(i):blk0(i));\
d(i) += h(i); h(i) += S0(a(i)) + Maj(a(i), b(i), c(i))
#ifdef _SHA256_UNROLL
#define RX_8(i) R(i+0); R(i+1); R(i+2); R(i+3); R(i+4); R(i+5); R(i+6); R(i+7);
#endif
#endif
static const uint32_t K[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
static void
sha256_transform(uint32_t *state, const uint32_t *data)
{
uint32_t W[16] = {0};
unsigned j;
#ifdef _SHA256_UNROLL2
uint32_t a,b,c,d,e,f,g,h;
a = state[0];
b = state[1];
c = state[2];
d = state[3];
e = state[4];
f = state[5];
g = state[6];
h = state[7];
#else
uint32_t T[8];
for (j = 0; j < 8; j++)
T[j] = state[j];
#endif
for (j = 0; j < 64; j += 16)
{
#if defined(_SHA256_UNROLL) || defined(_SHA256_UNROLL2)
RX_8(0); RX_8(8);
#else
unsigned i;
for (i = 0; i < 16; i++) { R(i); }
#endif
}
#ifdef _SHA256_UNROLL2
state[0] += a;
state[1] += b;
state[2] += c;
state[3] += d;
state[4] += e;
state[5] += f;
state[6] += g;
state[7] += h;
#else
for (j = 0; j < 8; j++)
state[j] += T[j];
#endif
/* Wipe variables */
/* memset(W, 0, sizeof(W)); */
/* memset(T, 0, sizeof(T)); */
}
#undef S0
#undef S1
#undef s0
#undef s1
static void
sha256_write_byte_block(sha256_t *p)
{
uint32_t data32[16];
unsigned i;
for (i = 0; i < 16; i++)
data32[i] =
((uint32_t)(p->buffer[i * 4 ]) << 24) +
((uint32_t)(p->buffer[i * 4 + 1]) << 16) +
((uint32_t)(p->buffer[i * 4 + 2]) << 8) +
((uint32_t)(p->buffer[i * 4 + 3]));
sha256_transform(p->state, data32);
}
void
sha256_hash(unsigned char *buf, const unsigned char *data, size_t size)
{
sha256_t hash;
sha256_init(&hash);
sha256_update(&hash, data, size);
sha256_final(&hash, buf);
}
void
sha256_update(sha256_t *p, const unsigned char *data, size_t size)
{
uint32_t curBufferPos = (uint32_t)p->count & 0x3F;
while (size > 0)
{
p->buffer[curBufferPos++] = *data++;
p->count++;
size--;
if (curBufferPos == 64)
{
curBufferPos = 0;
sha256_write_byte_block(p);
}
}
}
void
sha256_final(sha256_t *p, unsigned char *digest)
{
uint64_t lenInBits = (p->count << 3);
uint32_t curBufferPos = (uint32_t)p->count & 0x3F;
unsigned i;
p->buffer[curBufferPos++] = 0x80;
while (curBufferPos != (64 - 8))
{
curBufferPos &= 0x3F;
if (curBufferPos == 0)
sha256_write_byte_block(p);
p->buffer[curBufferPos++] = 0;
}
for (i = 0; i < 8; i++)
{
p->buffer[curBufferPos++] = (unsigned char)(lenInBits >> 56);
lenInBits <<= 8;
}
sha256_write_byte_block(p);
for (i = 0; i < 8; i++)
{
*digest++ = (unsigned char)(p->state[i] >> 24);
*digest++ = (unsigned char)(p->state[i] >> 16);
*digest++ = (unsigned char)(p->state[i] >> 8);
*digest++ = (unsigned char)(p->state[i]);
}
sha256_init(p);
}

View file

@ -1,24 +0,0 @@
/* Sha256.h -- SHA-256 Hash
2010-06-11 : Igor Pavlov : Public domain */
#ifndef __CRYPTO_SHA256_H
#define __CRYPTO_SHA256_H
#include <stdlib.h>
#include <stdint.h>
#define SHA256_DIGEST_SIZE 32
typedef struct sha256_t
{
uint32_t state[8];
uint64_t count;
unsigned char buffer[64];
} sha256_t;
void sha256_init(sha256_t *p);
void sha256_update(sha256_t *p, const unsigned char *data, size_t size);
void sha256_final(sha256_t *p, unsigned char *digest);
void sha256_hash(unsigned char *buf, const unsigned char *data, size_t size);
#endif

View file

@ -1,12 +0,0 @@
{
"name": "xxhash",
"version": "0.8.2",
"repo": "Cyan4973/xxhash",
"description": "Extremely fast non-cryptographic hash algorithm",
"keywords": ["xxhash", "hashing"],
"license": "BSD-2-Clause",
"src": [
"xxhash.c",
"xxhash.h"
]
}

View file

@ -1,42 +0,0 @@
/*
* xxHash - Extremely Fast Hash algorithm
* Copyright (C) 2012-2023 Yann Collet
*
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* You can contact the author at:
* - xxHash homepage: https://www.xxhash.com
* - xxHash source repository: https://github.com/Cyan4973/xxHash
*/
/*
* xxhash.c instantiates functions defined in xxhash.h
*/
#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
#define XXH_IMPLEMENTATION /* access definitions */
#include "xxhash.h"

File diff suppressed because it is too large Load diff

View file

@ -1,693 +0,0 @@
#include "ggml.h"
#include <cstdlib> /* abort() */
#include <cstddef>
#include <cstdio>
#include <string>
#include <stdexcept>
#include <algorithm>
#include <cstring>
#include <sstream>
#include <fstream>
#ifdef __cplusplus
extern "C" {
#endif
#include "xxhash/xxhash.h"
#include "sha1/sha1.h"
#include "sha256/sha256.h"
#ifdef __cplusplus
}
#endif
// uuid.uuid5(uuid.NAMESPACE_URL, 'en.wikipedia.org/wiki/Llama.cpp')
#define UUID_NAMESPACE_LLAMA_CPP "ef001206-dadc-5f6d-a15f-3359e577d4e5"
#define UUID_NAMESPACE_LLAMA_CPP_HEX 0xef, 0x00, 0x12, 0x06, 0xda, 0xdc, 0x5f, 0x6d, 0xa1, 0x5f, 0x33, 0x59, 0xe5, 0x77, 0xd4, 0xe5
#define HASH_TYPE_SHA256_STR "sha256"
#define HASH_TYPE_SHA1_STR "sha1"
#define HASH_TYPE_XXH64_STR "xxh64"
#define HASH_TYPE_UUID_STR "uuid"
typedef enum {
HASH_EXIT_SUCCESS = 0, // All hash has been generated or validated
HASH_EXIT_FAILURE = 1, // Generic Failure
HASH_EXIT_MISMATCH = 2, // Hash mismatched during validation
HASH_EXIT_MANIFEST_MISSING_ENTRY = 3, // Hash attempted validation but missing entry in manifest
HASH_EXIT_MANIFEST_UNKNOWN_HASH = 4, // Manifest is present, but we do not know any hash format within it
HASH_EXIT_MANIFEST_FILE_ERROR = 5 // Manifest is either missing or not a known format
} hash_exit_code_t;
typedef enum {
HASH_MANIFEST_NOT_FOUND,
HASH_MANIFEST_MISMATCH,
HASH_MANIFEST_OK,
} hash_manifest_result_t;
struct hash_params {
std::string input;
bool xxh64 = false;
bool sha1 = false;
bool sha256 = false;
bool uuid = false;
bool no_layer = false;
bool manifest_is_usable = false;
std::string manifest_file;
};
struct manifest_check_params {
bool xxh64 = false;
bool sha1 = false;
bool sha256 = false;
bool uuid = false;
};
static char const * hash_manifest_result_to_str(hash_manifest_result_t value) {
switch (value) {
case HASH_MANIFEST_NOT_FOUND: return "Not Found";
case HASH_MANIFEST_MISMATCH: return "Mismatch";
case HASH_MANIFEST_OK: return "Ok";
}
return "?";
}
static char const * hash_exit_code_to_str(hash_exit_code_t value) {
switch (value) {
case HASH_EXIT_SUCCESS: return "Success";
case HASH_EXIT_FAILURE: return "Failure";
case HASH_EXIT_MISMATCH: return "Mismatch";
case HASH_EXIT_MANIFEST_MISSING_ENTRY: return "Manifest Missing Entry";
case HASH_EXIT_MANIFEST_UNKNOWN_HASH: return "Manifest Unknown Hash";
case HASH_EXIT_MANIFEST_FILE_ERROR: return "Manifest File Error";
}
return "?";
}
static void hash_print_usage(const char * executable) {
const hash_params default_params;
printf("\n");
printf("usage: %s [options] GGUF_IN\n", executable);
printf("\n");
printf("Hash a GGUF file");
printf("\n");
printf("options:\n");
printf(" -h, --help show this help message and exit\n");
printf(" --xxh64 use xxh64 hash\n");
printf(" --sha1 use sha1 hash\n");
printf(" --sha256 use sha256 hash\n");
printf(" --all use all hash\n");
printf(" --no-layer exclude per layer hash\n");
printf(" --uuid generate UUIDv5 ID\n");
printf(" -c, --check <manifest> verify against a manifest\n");
printf("\n");
}
static void hash_params_parse_ex(int argc, const char ** argv, hash_params & params) {
std::string arg;
bool invalid_param = false;
const std::string arg_prefix = "--";
int arg_idx = 1;
for (; arg_idx < argc && strncmp(argv[arg_idx], "--", 2) == 0; arg_idx++) {
arg = argv[arg_idx];
if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
std::replace(arg.begin(), arg.end(), '_', '-');
}
bool arg_found = false;
if (arg == "-h" || arg == "--help") {
hash_print_usage(argv[0]);
exit(0);
}
if (arg == "--xxh64") {
arg_found = true;
params.xxh64 = true;
}
if (arg == "--sha1") {
arg_found = true;
params.sha1 = true;
}
if (arg == "--uuid") {
arg_found = true;
params.uuid = true;
}
if (arg == "--sha256") {
arg_found = true;
params.sha256 = true;
}
if (arg == "--all") {
arg_found = true;
params.sha256 = true;
params.sha1 = true;
params.xxh64 = true;
}
if (arg == "--no-layer") {
arg_found = true;
params.no_layer = true;
}
if (arg == "-c" || arg == "--check") {
if (++arg_idx >= argc) {
invalid_param = true;
break;
}
arg_found = true;
params.manifest_file = argv[arg_idx];
}
if (!arg_found) {
throw std::invalid_argument("error: unknown argument: " + arg);
}
}
if (invalid_param) {
throw std::invalid_argument("error: invalid parameter for argument:" + arg);
}
if (argc - arg_idx < 1) {
throw std::invalid_argument("error: bad arguments");
}
params.input = argv[arg_idx++];
}
static bool hash_params_parse(int argc, const char ** argv, hash_params & params) {
bool result = true;
try {
hash_params_parse_ex(argc, argv, params);
}
catch (const std::invalid_argument & ex) {
fprintf(stderr, "%s\n", ex.what());
hash_print_usage(argv[0]);
exit(EXIT_FAILURE);
}
return result;
}
static bool manifest_type(const std::string & manifest_file, manifest_check_params & manifest_check) {
if (manifest_file.empty()) {
return false;
}
std::ifstream file(manifest_file);
if (!file.is_open()) {
return false;
}
std::string manifest_entry_line;
while (getline(file, manifest_entry_line)) {
// hash_type_str hash_str tensor_name
// e.g. 'xxh64 f66e9cd66a4396a0 test.gguf:tensor_0'
std::istringstream line_stream(manifest_entry_line);
std::string file_hash_type;
if (line_stream >> file_hash_type) {
if (file_hash_type == HASH_TYPE_SHA256_STR) {
manifest_check.sha256 = true;
} else if (file_hash_type == HASH_TYPE_SHA1_STR) {
manifest_check.sha1 = true;
} else if (file_hash_type == HASH_TYPE_XXH64_STR) {
manifest_check.xxh64 = true;
} else if (file_hash_type == HASH_TYPE_UUID_STR) {
manifest_check.uuid = true;
}
}
}
return true;
}
static hash_manifest_result_t manifest_verify(const std::string& manifest_file, const std::string& hash_type_str, const std::string& hash_str, const std::string& tensor_name) {
if (manifest_file.empty()) {
return HASH_MANIFEST_NOT_FOUND;
}
std::ifstream file(manifest_file);
if (!file.is_open()) {
return HASH_MANIFEST_NOT_FOUND;
}
std::string manifest_entry_line;
while (getline(file, manifest_entry_line)) {
std::istringstream line_stream(manifest_entry_line);
std::string file_hash_type;
std::string file_hash;
std::string file_tensor_name;
if (line_stream >> file_hash_type >> file_hash >> file_tensor_name) {
// Line parsed. Check hash validity
if (file_hash_type != hash_type_str) {
continue;
}
if (file_tensor_name != tensor_name) {
continue;
}
return (file_hash == hash_str) ? HASH_MANIFEST_OK : HASH_MANIFEST_MISMATCH;
}
}
return HASH_MANIFEST_NOT_FOUND;
}
static void generate_uuidv5(const unsigned char sha1_digest[20], unsigned char uuid[16]) {
// Ref: https://www.rfc-editor.org/rfc/rfc9562.html#section-5.5
// Assumes that digest was processed correctly with the expected namespace
for (int i = 0; i < 16; i++) {
uuid[i] = sha1_digest[i];
}
// Set bits corresponding to UUID ver 5
uuid[ 6] &= ~(0xF << 4);
uuid[ 6] |= (5 << 4);
// Set bits corresponding to UUID variant 0b10XX
uuid[ 8] &= ~(0xc << 4);
uuid[ 8] |= (0x8 << 4);
}
static hash_exit_code_t gguf_hash(const hash_params & hash_params) {
const std::string & fname = hash_params.input;
struct ggml_context * ctx_data = NULL;
struct gguf_init_params params = {
/*.no_alloc = */ false,
/*.ctx = */ &ctx_data,
};
// xxh64 init
XXH64_state_t* xxh64_model_hash_state = NULL;
if (hash_params.xxh64) {
xxh64_model_hash_state = XXH64_createState();
if (xxh64_model_hash_state==NULL) {
abort();
}
XXH64_hash_t const seed = 0;
if (XXH64_reset(xxh64_model_hash_state, seed) == XXH_ERROR) {
abort();
}
}
// sha1 init
SHA1_CTX sha1_model_hash_ctx;
if (hash_params.sha1) {
SHA1Init(&sha1_model_hash_ctx);
}
// sha256 init
sha256_t sha256_model_hash_ctx;
if (hash_params.sha256) {
sha256_init(&sha256_model_hash_ctx);
}
// sha1 for uuid init
SHA1_CTX sha1_for_uuid_ctx;
if (hash_params.uuid) {
unsigned char const uuidv5_namespace[] = {UUID_NAMESPACE_LLAMA_CPP_HEX};
SHA1Init(&sha1_for_uuid_ctx);
SHA1Update( &sha1_for_uuid_ctx, (unsigned char const *)uuidv5_namespace, sizeof(uuidv5_namespace));
}
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
const int n_tensors = gguf_get_n_tensors(ctx);
bool tensor_layer_in_manifest = false;
bool model_in_manifest = false;
bool tensor_layer_has_mismatch = false;
bool model_has_mismatch = false;
for (int i = 0; i < n_tensors; ++i) {
const char * name = gguf_get_tensor_name(ctx, i);
struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
auto n_bytes = ggml_nbytes(cur);
auto *raw_data = cur->data;
const std::string tensor_layer_name = fname + ":" + name;
if (hash_params.xxh64) {
if (!hash_params.no_layer) {
// Per Layer Hash
XXH64_hash_t hash = XXH64(raw_data, n_bytes, 0);
char hex_result[17];
for (int offset = 0; offset < 8; offset++) {
unsigned int shift_bits_by = (8 * (8 - offset - 1));
snprintf( ( hex_result + (2*offset)), sizeof(hex_result) - (2*offset), "%02x", (unsigned char) (hash >> shift_bits_by)&0xff);
}
if (hash_params.manifest_is_usable) {
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_XXH64_STR, hex_result, tensor_layer_name);
switch (verify_result) {
case HASH_MANIFEST_NOT_FOUND:
break;
case HASH_MANIFEST_MISMATCH:
tensor_layer_in_manifest = true;
tensor_layer_has_mismatch = true;
break;
case HASH_MANIFEST_OK:
tensor_layer_in_manifest = true;
break;
}
printf("%-8s %-s %s - %s\n", HASH_TYPE_XXH64_STR, hex_result, tensor_layer_name.c_str(), hash_manifest_result_to_str(verify_result));
} else {
printf("%-8s %-s %s\n", HASH_TYPE_XXH64_STR, hex_result, tensor_layer_name.c_str());
}
}
// Overall Model Hash
if (XXH64_update(xxh64_model_hash_state, raw_data, n_bytes) == XXH_ERROR) abort();
}
if (hash_params.sha1) {
if (!hash_params.no_layer) {
// Per Layer Hash
char result[21]; // sha1 outputs 20 bytes
SHA1( result, (const char *)raw_data, n_bytes);
char hex_result[41] = {0};
for (int offset = 0; offset < 20; offset++) {
snprintf( ( hex_result + (2*offset)), sizeof(hex_result) - (2*offset), "%02x", result[offset]&0xff);
}
if (hash_params.manifest_is_usable) {
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_SHA1_STR, hex_result, tensor_layer_name);
switch (verify_result) {
case HASH_MANIFEST_NOT_FOUND:
break;
case HASH_MANIFEST_MISMATCH:
tensor_layer_in_manifest = true;
tensor_layer_has_mismatch = true;
break;
case HASH_MANIFEST_OK:
tensor_layer_in_manifest = true;
break;
}
printf("%-8s %-s %s - %s\n", HASH_TYPE_SHA1_STR, hex_result, tensor_layer_name.c_str(), hash_manifest_result_to_str(verify_result));
} else {
printf("%-8s %-s %s\n", HASH_TYPE_SHA1_STR, hex_result, tensor_layer_name.c_str());
}
}
// Overall Model Hash
SHA1Update( &sha1_model_hash_ctx, (unsigned char const *)raw_data, n_bytes);
}
if (hash_params.sha256) {
if (!hash_params.no_layer) {
// Per Layer Hash
unsigned char result[SHA256_DIGEST_SIZE]; // sha256 outputs 32 bytes
sha256_hash((unsigned char*) result, (const unsigned char *)raw_data, n_bytes);
char hex_result[SHA256_DIGEST_SIZE * 2 + 1] = {0};
for (int offset = 0; offset < SHA256_DIGEST_SIZE; offset++) {
snprintf( ( hex_result + (2*offset)), sizeof(hex_result) - (2*offset), "%02x", result[offset]&0xff);
}
if (hash_params.manifest_is_usable) {
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_SHA256_STR, hex_result, tensor_layer_name);
switch (verify_result) {
case HASH_MANIFEST_NOT_FOUND:
break;
case HASH_MANIFEST_MISMATCH:
tensor_layer_in_manifest = true;
tensor_layer_has_mismatch = true;
break;
case HASH_MANIFEST_OK:
tensor_layer_in_manifest = true;
break;
}
printf("%-8s %-s %s - %s\n", HASH_TYPE_SHA256_STR, hex_result, tensor_layer_name.c_str(), hash_manifest_result_to_str(verify_result));
} else {
printf("%-8s %-s %s\n", HASH_TYPE_SHA256_STR, hex_result, tensor_layer_name.c_str());
}
}
// Overall Model Hash
sha256_update( &sha256_model_hash_ctx, (unsigned char const *)raw_data, n_bytes);
}
if (hash_params.uuid) {
SHA1Update( &sha1_for_uuid_ctx, (unsigned char const *)raw_data, n_bytes);
}
}
if (hash_params.xxh64) {
XXH64_hash_t const hash = XXH64_digest(xxh64_model_hash_state);
char hex_result[17];
for (int offset = 0; offset < 8; offset++) {
unsigned int shift_bits_by = (8 * (8 - offset - 1));
snprintf( ( hex_result + (2*offset)), sizeof(hex_result) - (2*offset), "%02x", (unsigned char) (hash >> shift_bits_by)&0xff);
}
if (hash_params.manifest_is_usable) {
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_XXH64_STR, hex_result, fname);
switch (verify_result) {
case HASH_MANIFEST_NOT_FOUND:
break;
case HASH_MANIFEST_MISMATCH:
model_in_manifest = true;
model_has_mismatch = true;
break;
case HASH_MANIFEST_OK:
model_in_manifest = true;
break;
}
printf("%-8s %-s %s - %s\n", HASH_TYPE_XXH64_STR, hex_result, fname.c_str(), hash_manifest_result_to_str(verify_result));
} else {
printf("%-8s %-s %s\n", HASH_TYPE_XXH64_STR, hex_result, fname.c_str());
}
}
if (hash_params.sha1) {
unsigned char result[21];
SHA1Final(result, &sha1_model_hash_ctx);
char hex_result[41];
for (int offset = 0; offset < 20; offset++) {
snprintf( ( hex_result + (2*offset)), sizeof(hex_result) - (2*offset), "%02x", result[offset]&0xff);
}
if (hash_params.manifest_is_usable) {
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_SHA1_STR, hex_result, fname);
switch (verify_result) {
case HASH_MANIFEST_NOT_FOUND:
break;
case HASH_MANIFEST_MISMATCH:
model_in_manifest = true;
model_has_mismatch = true;
break;
case HASH_MANIFEST_OK:
model_in_manifest = true;
break;
}
printf("%-8s %-s %s - %s\n", HASH_TYPE_SHA1_STR, hex_result, fname.c_str(), hash_manifest_result_to_str(verify_result));
} else {
printf("%-8s %-s %s\n", HASH_TYPE_SHA1_STR, hex_result, fname.c_str());
}
}
if (hash_params.sha256) {
unsigned char result[SHA256_DIGEST_SIZE]; // sha256 outputs 32 bytes
sha256_final( &sha256_model_hash_ctx, result);
char hex_result[SHA256_DIGEST_SIZE * 2 + 1] = {0};
for (int offset = 0; offset < SHA256_DIGEST_SIZE; offset++) {
snprintf( ( hex_result + (2*offset)), sizeof(hex_result) - (2*offset), "%02x", result[offset]&0xff);
}
if (hash_params.manifest_is_usable) {
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_SHA256_STR, hex_result, fname);
switch (verify_result) {
case HASH_MANIFEST_NOT_FOUND:
break;
case HASH_MANIFEST_MISMATCH:
model_in_manifest = true;
model_has_mismatch = true;
break;
case HASH_MANIFEST_OK:
model_in_manifest = true;
break;
}
printf("%-8s %-s %s - %s\n", HASH_TYPE_SHA256_STR, hex_result, fname.c_str(), hash_manifest_result_to_str(verify_result));
} else {
printf("%-8s %-s %s\n", HASH_TYPE_SHA256_STR, hex_result, fname.c_str());
}
}
if (hash_params.uuid) {
unsigned char result[21];
SHA1Final(result, &sha1_for_uuid_ctx);
unsigned char uuid[16];
generate_uuidv5(result, uuid);
char string_buffer[37] = {0};
snprintf(string_buffer, sizeof(string_buffer), "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
uuid[0], uuid[1], uuid[2], uuid[3],
uuid[4], uuid[5], uuid[6], uuid[7],
uuid[8], uuid[9], uuid[10], uuid[11],
uuid[12], uuid[13], uuid[14], uuid[15]);
if (hash_params.manifest_is_usable) {
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_SHA256_STR, string_buffer, fname);
switch (verify_result) {
case HASH_MANIFEST_NOT_FOUND:
break;
case HASH_MANIFEST_MISMATCH:
model_in_manifest = true;
model_has_mismatch = true;
break;
case HASH_MANIFEST_OK:
model_in_manifest = true;
break;
}
printf("%-8s %-s %s - %s\n", HASH_TYPE_UUID_STR, string_buffer, fname.c_str(), hash_manifest_result_to_str(verify_result));
} else {
printf("%-8s %-s %s\n", HASH_TYPE_UUID_STR, string_buffer, fname.c_str());
}
}
ggml_free(ctx_data);
gguf_free(ctx);
if (hash_params.manifest_is_usable) {
// In hash verification mode
if (!model_in_manifest) {
// model missing in manifest?
// Check tensor layer...
if (!tensor_layer_in_manifest) {
// Still missing? Maybe we are reading the wrong manifest.
return HASH_EXIT_MANIFEST_MISSING_ENTRY;
}
if (tensor_layer_has_mismatch) {
// Per tensor check found error
return HASH_EXIT_FAILURE;
}
// All per tensor layer checks passed? Sounds good enough.
return HASH_EXIT_SUCCESS;
}
// Overall model check passed, but let's check per layer just in case
// If missing, we don't care too much as the overall model checked
if (tensor_layer_in_manifest && tensor_layer_has_mismatch) {
return HASH_EXIT_FAILURE;
}
if (model_has_mismatch) {
// model has failed hash somewhere in the model
return HASH_EXIT_FAILURE;
}
// All checks appears to be fine
return HASH_EXIT_SUCCESS;
}
// In hash generation mode
return HASH_EXIT_SUCCESS;
}
int main(int argc, const char ** argv) {
hash_params params;
manifest_check_params manifest_check;
hash_params_parse(argc, argv, params);
if (!params.manifest_file.empty()) {
if (!manifest_type(params.manifest_file, manifest_check)) {
printf("ERROR cannot open manifest %s", params.manifest_file.c_str());
return HASH_EXIT_MANIFEST_FILE_ERROR;
}
if (!manifest_check.sha256 && !manifest_check.sha1 && !manifest_check.xxh64 && !manifest_check.uuid) {
printf("ERROR manifest does not have any known hash format in %s", params.manifest_file.c_str());
return HASH_EXIT_MANIFEST_UNKNOWN_HASH;
}
printf("manifest %s", params.manifest_file.c_str());
if (manifest_check.sha256) {
printf(" sha256");
}
if (manifest_check.sha1) {
printf(" sha1");
}
if (manifest_check.xxh64) {
printf(" xxh64");
}
if (manifest_check.uuid) {
printf(" uuid");
}
printf("\n");
// Autoselect the highest security hash if manifest is provided but
// the user has not specifically defined the hash they care about
if (!params.xxh64 && !params.sha1 && !params.uuid && !params.sha256) {
// User has not selected a specific value, pick most secure hash
if (manifest_check.sha256) {
params.sha256 = true;
} else if (manifest_check.sha1) {
params.sha1 = true;
} else if (manifest_check.xxh64) {
params.xxh64 = true;
} else if (manifest_check.uuid) {
params.uuid = true;
}
}
params.manifest_is_usable = true;
}
// By default if no swich argument provided, assume xxh64
if (!params.xxh64 && !params.sha1 && !params.uuid && !params.sha256) {
params.xxh64 = true;
}
hash_exit_code_t exit_code = gguf_hash(params);
if (params.manifest_is_usable) {
printf("\nVerification results for %s - %s\n", params.manifest_file.c_str(), hash_exit_code_to_str(exit_code));
}
return exit_code;
}

View file

@ -741,7 +741,7 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
UNUSED(blocklen);
#if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__)
#if defined(__ARM_FEATURE_SVE)
#if defined(__ARM_FEATURE_SVE) && ! defined(LLAMA_NOSVE)
if (ggml_cpu_has_sve() && ggml_cpu_get_sve_cnt() == QK8_0) {
const void * b_ptr = vx;
const void * a_ptr = vy;
@ -2081,7 +2081,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
UNUSED(blocklen);
#if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__)
#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8)
#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8) && ! defined(LLAMA_NOSVE)
if (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0) {
const void * b_ptr = vx;
const void * a_ptr = vy;

View file

@ -59,7 +59,7 @@ struct ggml_compute_params {
#endif
#endif
#if defined(__ARM_FEATURE_SVE)
#if defined(__ARM_FEATURE_SVE) && ! defined(LLAMA_NOSVE)
#include <arm_sve.h>
#include <sys/prctl.h>
#endif

View file

@ -1829,7 +1829,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
int ib = 0;
float sumf = 0;
#if defined(__ARM_FEATURE_SVE)
#if defined(__ARM_FEATURE_SVE) && ! defined(LLAMA_NOSVE)
svfloat32_t sumv0 = svdup_n_f32(0.0f);
svfloat32_t sumv1 = svdup_n_f32(0.0f);
@ -3419,7 +3419,7 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * r
int ib = 0;
float sumf = 0;
#if defined(__ARM_FEATURE_SVE)
#if defined(__ARM_FEATURE_SVE) && ! defined(LLAMA_NOSVE)
svfloat32_t sumv0 = svdup_n_f32(0.0f);
svfloat32_t sumv1 = svdup_n_f32(0.0f);

View file

@ -40,7 +40,7 @@
#include <omp.h>
#endif
#if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_MATMUL_INT8)
#if (defined(__ARM_FEATURE_SVE) && ! defined(LLAMA_NOSVE)) || defined(__ARM_FEATURE_MATMUL_INT8)
#undef GGML_USE_LLAMAFILE
#endif
@ -2442,7 +2442,7 @@ static void ggml_init_arm_arch_features(void) {
ggml_arm_arch_features.has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
ggml_arm_arch_features.has_sve = !!(hwcap & HWCAP_SVE);
#if defined(__ARM_FEATURE_SVE)
#if defined(__ARM_FEATURE_SVE) && ! defined(LLAMA_NOSVE)
ggml_arm_arch_features.sve_cnt = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
#endif
#elif defined(__APPLE__)
@ -2479,7 +2479,7 @@ static void ggml_init_arm_arch_features(void) {
ggml_arm_arch_features.has_i8mm = 0;
#endif
#if defined(__ARM_FEATURE_SVE)
#if defined(__ARM_FEATURE_SVE) && ! defined(LLAMA_NOSVE)
ggml_arm_arch_features.has_sve = 1;
ggml_arm_arch_features.sve_cnt = 16;
#else
@ -13946,7 +13946,7 @@ int ggml_cpu_has_dotprod(void) {
}
int ggml_cpu_has_sve(void) {
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SVE)
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SVE) && ! defined(LLAMA_NOSVE)
return ggml_arm_arch_features.has_sve;
#else
return 0;
@ -13962,7 +13962,7 @@ int ggml_cpu_has_matmul_int8(void) {
}
int ggml_cpu_get_sve_cnt(void) {
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SVE)
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SVE) && ! defined(LLAMA_NOSVE)
return ggml_arm_arch_features.sve_cnt;
#else
return 0;

View file

@ -10,7 +10,7 @@
#include <stdint.h>
#include <string.h>
#ifdef __ARM_FEATURE_SVE
#if defined(__ARM_FEATURE_SVE) && ! defined(LLAMA_NOSVE)
#include <arm_sve.h>
#endif // __ARM_FEATURE_SVE

View file

@ -4344,7 +4344,7 @@ def main(launch_args,start_server=True):
show_gui()
except Exception as ex:
exitcounter = 999
ermsg = "Reason: " + str(ex) + "\nFile selection GUI unsupported.\ncustomtkinter python module required!\n\nPlease check command line options with --help"
ermsg = "Reason: " + str(ex) + "\nFile selection GUI unsupported.\ncustomtkinter python module required!\n\nYou must use the command line instead, e.g. python ./koboldcpp.py --help"
show_gui_msgbox("Warning, GUI failed to start",ermsg)
if args.skiplauncher:
print("Note: In order to use --skiplauncher, you need to specify a model with --model")