mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 17:44:38 +00:00
cleanup, fix native build for arm (+28 squashed commit)
Squashed commit: [d1f6a4154] bundle library [947ab84b7] undo [0f9aba8d8] test [e9ac93873] test [920438202] test [1c6d98804
] Revert "quick test" This reverts commitacf8ec8940
. [acf8ec894
] quick test [6a9937233
] undo [5a263a5bd
] test [ddfd82bca
] test [0b30e45da
] test [c3bfece55
] messed up [2a4b37fe0
] Revert "test" This reverts commit80a1fcaeaf
. [80a1fcaea
] test [e2aa7d944
] test [264d80200
] test [f5b123173
] undo [1ffacc484
] test [63c0be926
] undo [510e0377e
] ofast try fix [4ac199b20
] try fix sigill [1bc987ba2
] try fix illegal instruction [7697252b1
] edit [f87087b28
] check gcc ver [e9dfe2cef
] try using qemu to do the pyinstaller [b411192db
] revert [25b5301e5
] try using qemu to do the pyinstaller [58038cddc
] try using qemu to do the pyinstaller
This commit is contained in:
parent
e9d2332dd8
commit
a11bba5893
28 changed files with 69 additions and 10898 deletions
70
.github/workflows/kcpp-build-release-arm64.yaml
vendored
70
.github/workflows/kcpp-build-release-arm64.yaml
vendored
|
@ -3,7 +3,6 @@ name: Koboldcpp Linux ARM64
|
|||
on: workflow_dispatch
|
||||
env:
|
||||
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
||||
NOAVX2: 1
|
||||
|
||||
jobs:
|
||||
linux-arm:
|
||||
|
@ -15,35 +14,68 @@ jobs:
|
|||
with:
|
||||
ref: ${{ github.head_ref || github.ref_name }}
|
||||
|
||||
- name: Build Dependencies
|
||||
id: depends1
|
||||
- name: Install Dependencies
|
||||
id: depends
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y python3 python3-pip python3-dev build-essential \
|
||||
sudo apt-get install -y python3-tk python3-pip python3-dev build-essential \
|
||||
libffi-dev libssl-dev libbz2-dev libreadline-dev libsqlite3-dev \
|
||||
crossbuild-essential-arm64 qemu qemu-user qemu-user-static \
|
||||
gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
|
||||
crossbuild-essential-arm64 gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
|
||||
|
||||
- name: Python Dependencies
|
||||
id: depends2
|
||||
- name: Install New GCC for Cross-Compilation
|
||||
run: |
|
||||
pip install customtkinter pyinstaller tk
|
||||
|
||||
- name: Build with ARM NEON Support
|
||||
id: build_binary
|
||||
run: |
|
||||
# Enable cross-compilation for ARM
|
||||
export QEMU_LD_PREFIX=/usr/aarch64-linux-gnu
|
||||
export CC=aarch64-linux-gnu-gcc
|
||||
export CXX=aarch64-linux-gnu-g++
|
||||
sudo apt-get install -y software-properties-common
|
||||
sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y gcc-12 g++-12 gcc-12-aarch64-linux-gnu g++-12-aarch64-linux-gnu
|
||||
export CC=/usr/bin/aarch64-linux-gnu-gcc-12
|
||||
export CXX=/usr/bin/aarch64-linux-gnu-g++-12
|
||||
export AR=aarch64-linux-gnu-ar
|
||||
export UNAME_M=aarch64
|
||||
export UNAME_S=Linux
|
||||
|
||||
export PATH=/usr/bin:$PATH
|
||||
make LLAMA_PORTABLE=1
|
||||
chmod +x './create_ver_file.sh'
|
||||
. create_ver_file.sh
|
||||
pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --version-file './version.txt' --clean --console koboldcpp.py -n "koboldcpp-linux-arm64"
|
||||
mkdir -p dist
|
||||
cp './koboldcpp_default.so' dist
|
||||
ls
|
||||
|
||||
- name: Install QEMU
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y qemu-user-static binfmt-support
|
||||
|
||||
- name: Setup QEMU for ARM64
|
||||
run: |
|
||||
docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
|
||||
|
||||
- name: Build ARM64 PyInstaller
|
||||
run: |
|
||||
docker run --rm \
|
||||
--platform linux/arm64 \
|
||||
-v "${PWD}:/src" \
|
||||
python:3.9-slim \
|
||||
/bin/bash -c "
|
||||
apt-get update && apt-get install -y build-essential && \
|
||||
apt-get update && apt-get install -y gcc-12 g++-12 && \
|
||||
export LD_LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/12:$LD_LIBRARY_PATH && \
|
||||
pip install customtkinter pyinstaller tk && \
|
||||
cd /src && \
|
||||
pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil \
|
||||
--add-data './koboldcpp_default.so:.' \
|
||||
--add-data './kcpp_adapters:./kcpp_adapters' \
|
||||
--add-data './koboldcpp.py:.' \
|
||||
--add-data './klite.embd:.' \
|
||||
--add-data './kcpp_docs.embd:.' \
|
||||
--add-data './kcpp_sdui.embd:.' \
|
||||
--add-data './taesd.embd:.' \
|
||||
--add-data './taesd_xl.embd:.' \
|
||||
--add-data './rwkv_vocab.embd:.' \
|
||||
--add-data './rwkv_world_vocab.embd:.' \
|
||||
--version-file './version.txt' \
|
||||
--clean --console koboldcpp.py -n 'koboldcpp-linux-arm64'
|
||||
"
|
||||
|
||||
- name: Save artifact
|
||||
uses: actions/upload-artifact@v3
|
||||
|
|
|
@ -3,7 +3,6 @@ name: Koboldcpp Mac
|
|||
on: workflow_dispatch
|
||||
env:
|
||||
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
||||
NOAVX2: 1
|
||||
|
||||
jobs:
|
||||
osx:
|
||||
|
|
9
Makefile
9
Makefile
|
@ -312,9 +312,12 @@ ifneq ($(filter aarch64%,$(UNAME_M)),)
|
|||
# Apple M1, M2, etc.
|
||||
# Raspberry Pi 3, 4, Zero 2 (64-bit)
|
||||
ifdef LLAMA_PORTABLE
|
||||
CFLAGS +=
|
||||
CXXFLAGS +=
|
||||
else
|
||||
CFLAGS += -mcpu=native
|
||||
CXXFLAGS += -mcpu=native
|
||||
# sve is cooked so we are disabling it
|
||||
CFLAGS += -mcpu=native -DLLAMA_NOSVE
|
||||
CXXFLAGS += -mcpu=native -DLLAMA_NOSVE
|
||||
endif
|
||||
endif
|
||||
|
||||
|
@ -395,7 +398,7 @@ else
|
|||
ifndef LLAMA_HIPBLAS
|
||||
ifndef LLAMA_VULKAN
|
||||
ifndef LLAMA_METAL
|
||||
NOTIFY_MSG = @echo -e '\nYou did a basic CPU build. For faster speeds, install and link a BLAS library. \nSet LLAMA_VULKAN=1 to compile with Vulkan support. This is just a reminder, not an error.'
|
||||
NOTIFY_MSG = @echo -e '\n***\nYou did a basic CPU build. For faster speeds, consider installing and linking a GPU BLAS library. For example, set LLAMA_VULKAN=1 to compile with Vulkan support. Read the KoboldCpp Wiki for more information. This is just a reminder, not an error.\n***\n'
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
|
|
@ -1,243 +0,0 @@
|
|||
#include "arg.h"
|
||||
#include "common.h"
|
||||
#include "log.h"
|
||||
#include "llama.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
static void print_usage(int, char ** argv) {
|
||||
LOG("\nexample usage:\n");
|
||||
LOG("\n %s -m model.gguf -p \"Hello my name is\" -n 32 -np 4\n", argv[0]);
|
||||
LOG("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
common_params params;
|
||||
|
||||
params.prompt = "Hello my name is";
|
||||
params.n_predict = 32;
|
||||
|
||||
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON, print_usage)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
common_init();
|
||||
|
||||
// number of parallel batches
|
||||
int n_parallel = params.n_parallel;
|
||||
|
||||
// total length of the sequences including the prompt
|
||||
int n_predict = params.n_predict;
|
||||
|
||||
// init LLM
|
||||
|
||||
llama_backend_init();
|
||||
llama_numa_init(params.numa);
|
||||
|
||||
// initialize the model
|
||||
|
||||
llama_model_params model_params = common_model_params_to_llama(params);
|
||||
|
||||
llama_model * model = llama_load_model_from_file(params.model.c_str(), model_params);
|
||||
|
||||
if (model == NULL) {
|
||||
LOG_ERR("%s: error: unable to load model\n" , __func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// tokenize the prompt
|
||||
|
||||
std::vector<llama_token> tokens_list;
|
||||
tokens_list = common_tokenize(model, params.prompt, true);
|
||||
|
||||
const int n_kv_req = tokens_list.size() + (n_predict - tokens_list.size())*n_parallel;
|
||||
|
||||
// initialize the context
|
||||
|
||||
llama_context_params ctx_params = common_context_params_to_llama(params);
|
||||
|
||||
ctx_params.n_ctx = n_kv_req;
|
||||
ctx_params.n_batch = std::max(n_predict, n_parallel);
|
||||
|
||||
llama_context * ctx = llama_new_context_with_model(model, ctx_params);
|
||||
|
||||
auto sparams = llama_sampler_chain_default_params();
|
||||
|
||||
llama_sampler * smpl = llama_sampler_chain_init(sparams);
|
||||
|
||||
llama_sampler_chain_add(smpl, llama_sampler_init_top_k(params.sampling.top_k));
|
||||
llama_sampler_chain_add(smpl, llama_sampler_init_top_p(params.sampling.top_p, params.sampling.min_keep));
|
||||
llama_sampler_chain_add(smpl, llama_sampler_init_temp (params.sampling.temp));
|
||||
llama_sampler_chain_add(smpl, llama_sampler_init_dist (params.sampling.seed));
|
||||
|
||||
if (ctx == NULL) {
|
||||
LOG_ERR("%s: error: failed to create the llama_context\n" , __func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
const int n_ctx = llama_n_ctx(ctx);
|
||||
|
||||
LOG_INF("\n%s: n_predict = %d, n_ctx = %d, n_batch = %u, n_parallel = %d, n_kv_req = %d\n", __func__, n_predict, n_ctx, ctx_params.n_batch, n_parallel, n_kv_req);
|
||||
|
||||
// make sure the KV cache is big enough to hold all the prompt and generated tokens
|
||||
if (n_kv_req > n_ctx) {
|
||||
LOG_ERR("%s: error: n_kv_req (%d) > n_ctx, the required KV cache size is not big enough\n", __func__, n_kv_req);
|
||||
LOG_ERR("%s: either reduce n_parallel or increase n_ctx\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// print the prompt token-by-token
|
||||
|
||||
LOG("\n");
|
||||
|
||||
for (auto id : tokens_list) {
|
||||
LOG("%s", common_token_to_piece(ctx, id).c_str());
|
||||
}
|
||||
|
||||
// create a llama_batch
|
||||
// we use this object to submit token data for decoding
|
||||
llama_batch batch = llama_batch_init(std::max(tokens_list.size(), (size_t) n_parallel), 0, n_parallel);
|
||||
|
||||
std::vector<llama_seq_id> seq_ids(n_parallel, 0);
|
||||
for (int32_t i = 0; i < n_parallel; ++i) {
|
||||
seq_ids[i] = i;
|
||||
}
|
||||
|
||||
// evaluate the initial prompt
|
||||
for (size_t i = 0; i < tokens_list.size(); ++i) {
|
||||
common_batch_add(batch, tokens_list[i], i, seq_ids, false);
|
||||
}
|
||||
GGML_ASSERT(batch.n_tokens == (int) tokens_list.size());
|
||||
|
||||
if (llama_model_has_encoder(model)) {
|
||||
if (llama_encode(ctx, batch)) {
|
||||
LOG_ERR("%s : failed to eval\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
llama_token decoder_start_token_id = llama_model_decoder_start_token(model);
|
||||
if (decoder_start_token_id == -1) {
|
||||
decoder_start_token_id = llama_token_bos(model);
|
||||
}
|
||||
|
||||
common_batch_clear(batch);
|
||||
common_batch_add(batch, decoder_start_token_id, 0, seq_ids, false);
|
||||
}
|
||||
|
||||
// llama_decode will output logits only for the last token of the prompt
|
||||
batch.logits[batch.n_tokens - 1] = true;
|
||||
|
||||
if (llama_decode(ctx, batch) != 0) {
|
||||
LOG_ERR("%s: llama_decode() failed\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
//// assign the system KV cache to all parallel sequences
|
||||
//// this way, the parallel sequences will "reuse" the prompt tokens without having to copy them
|
||||
//for (int32_t i = 1; i < n_parallel; ++i) {
|
||||
// llama_kv_cache_seq_cp(ctx, 0, i, -1, -1);
|
||||
//}
|
||||
|
||||
if (n_parallel > 1) {
|
||||
LOG("\n\n%s: generating %d sequences ...\n", __func__, n_parallel);
|
||||
}
|
||||
|
||||
// main loop
|
||||
|
||||
// we will store the parallel decoded sequences in this vector
|
||||
std::vector<std::string> streams(n_parallel);
|
||||
|
||||
// remember the batch index of the last token for each parallel sequence
|
||||
// we need this to determine which logits to sample from
|
||||
std::vector<int32_t> i_batch(n_parallel, batch.n_tokens - 1);
|
||||
|
||||
int n_cur = batch.n_tokens;
|
||||
int n_decode = 0;
|
||||
|
||||
const auto t_main_start = ggml_time_us();
|
||||
|
||||
while (n_cur <= n_predict) {
|
||||
// prepare the next batch
|
||||
common_batch_clear(batch);
|
||||
|
||||
// sample the next token for each parallel sequence / stream
|
||||
for (int32_t i = 0; i < n_parallel; ++i) {
|
||||
if (i_batch[i] < 0) {
|
||||
// the stream has already finished
|
||||
continue;
|
||||
}
|
||||
|
||||
const llama_token new_token_id = llama_sampler_sample(smpl, ctx, i_batch[i]);
|
||||
|
||||
// is it an end of generation? -> mark the stream as finished
|
||||
if (llama_token_is_eog(model, new_token_id) || n_cur == n_predict) {
|
||||
i_batch[i] = -1;
|
||||
LOG("\n");
|
||||
if (n_parallel > 1) {
|
||||
LOG_INF("%s: stream %d finished at n_cur = %d", __func__, i, n_cur);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// if there is only one stream, we print immediately to stdout
|
||||
if (n_parallel == 1) {
|
||||
LOG("%s", common_token_to_piece(ctx, new_token_id).c_str());
|
||||
}
|
||||
|
||||
streams[i] += common_token_to_piece(ctx, new_token_id);
|
||||
|
||||
i_batch[i] = batch.n_tokens;
|
||||
|
||||
// push this new token for next evaluation
|
||||
common_batch_add(batch, new_token_id, n_cur, { i }, true);
|
||||
|
||||
n_decode += 1;
|
||||
}
|
||||
|
||||
// all streams are finished
|
||||
if (batch.n_tokens == 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
n_cur += 1;
|
||||
|
||||
// evaluate the current batch with the transformer model
|
||||
if (llama_decode(ctx, batch)) {
|
||||
LOG_ERR("%s : failed to eval, return code %d\n", __func__, 1);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (n_parallel > 1) {
|
||||
LOG("\n");
|
||||
|
||||
for (int32_t i = 0; i < n_parallel; ++i) {
|
||||
LOG("sequence %d:\n\n%s%s\n\n", i, params.prompt.c_str(), streams[i].c_str());
|
||||
}
|
||||
}
|
||||
|
||||
const auto t_main_end = ggml_time_us();
|
||||
|
||||
LOG_INF("%s: decoded %d tokens in %.2f s, speed: %.2f t/s\n",
|
||||
__func__, n_decode, (t_main_end - t_main_start) / 1000000.0f, n_decode / ((t_main_end - t_main_start) / 1000000.0f));
|
||||
|
||||
LOG("\n");
|
||||
llama_perf_sampler_print(smpl);
|
||||
llama_perf_context_print(ctx);
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
llama_batch_free(batch);
|
||||
|
||||
llama_sampler_free(smpl);
|
||||
llama_free(ctx);
|
||||
llama_free_model(model);
|
||||
|
||||
llama_backend_free();
|
||||
|
||||
return 0;
|
||||
}
|
File diff suppressed because it is too large
Load diff
|
@ -1,421 +0,0 @@
|
|||
#include "arg.h"
|
||||
#include "common.h"
|
||||
#include "ggml.h"
|
||||
#include "ggml-alloc.h"
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <fstream>
|
||||
|
||||
static bool g_verbose = false;
|
||||
|
||||
struct tensor_transformation {
|
||||
struct ggml_tensor * in;
|
||||
struct ggml_tensor * out;
|
||||
bool is_copy;
|
||||
};
|
||||
|
||||
static std::string get_kv_str(struct gguf_context * ctx_gguf, const std::string & key){
|
||||
int id = gguf_find_key(ctx_gguf, key.c_str());
|
||||
return id < 0 ? "" : std::string(gguf_get_val_str(ctx_gguf, id));
|
||||
}
|
||||
|
||||
static float get_kv_f32(struct gguf_context * ctx_gguf, const std::string & key) {
|
||||
int id = gguf_find_key(ctx_gguf, key.c_str());
|
||||
return id < 0 ? 0.0f : gguf_get_val_f32(ctx_gguf, id);
|
||||
}
|
||||
|
||||
static void zeros(std::ofstream & file, size_t n) {
|
||||
char zero = 0;
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
file.write(&zero, 1);
|
||||
}
|
||||
}
|
||||
|
||||
static std::string ggml_ne_string(const ggml_tensor * t) {
|
||||
std::string str;
|
||||
for (int i = 0; i < GGML_MAX_DIMS; ++i) {
|
||||
str += std::to_string(t->ne[i]);
|
||||
if (i + 1 < GGML_MAX_DIMS) {
|
||||
str += ", ";
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
static struct gguf_context * load_gguf(std::string & fname, struct ggml_context ** ctx_ggml) {
|
||||
struct gguf_init_params params = {
|
||||
/*.no_alloc = */ true,
|
||||
/*.ctx = */ ctx_ggml,
|
||||
};
|
||||
struct gguf_context * ctx_gguf = gguf_init_from_file(fname.c_str(), params);
|
||||
if (!ctx_gguf) {
|
||||
throw std::runtime_error("failed to load input GGUF from " + fname);
|
||||
}
|
||||
return ctx_gguf;
|
||||
}
|
||||
|
||||
struct file_input {
|
||||
struct ggml_context * ctx_meta = nullptr;
|
||||
struct gguf_context * ctx_gguf = nullptr;
|
||||
std::ifstream f_in;
|
||||
std::map<std::string, ggml_tensor *> tensors;
|
||||
float alpha;
|
||||
float scale;
|
||||
|
||||
file_input(std::string & fname, float scale): f_in(fname, std::ios::binary), scale(scale) {
|
||||
if (!f_in.is_open()) {
|
||||
throw std::runtime_error("failed to open input gguf from " + fname);
|
||||
}
|
||||
|
||||
ctx_gguf = load_gguf(fname, &ctx_meta);
|
||||
alpha = get_kv_f32(ctx_gguf, "adapter.lora.alpha");
|
||||
printf("%s: loaded gguf from %s\n", __func__, fname.c_str());
|
||||
|
||||
for (ggml_tensor * cur = ggml_get_first_tensor(ctx_meta); cur; cur = ggml_get_next_tensor(ctx_meta, cur)) {
|
||||
std::string name(cur->name);
|
||||
tensors[name] = cur;
|
||||
if (g_verbose) {
|
||||
printf("%s: %s\n", __func__, cur->name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ggml_tensor * get_tensor(std::string name) {
|
||||
if (tensors.find(name) == tensors.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
return tensors[name];
|
||||
}
|
||||
|
||||
void read_tensor_data(std::string name, std::vector<uint8_t> & buf) {
|
||||
if (tensors.find(name) == tensors.end()) {
|
||||
throw std::runtime_error("cannot find tensor with name: " + name);
|
||||
}
|
||||
auto len = ggml_nbytes(tensors[name]);
|
||||
if (buf.size() < len) {
|
||||
buf.resize(len);
|
||||
}
|
||||
auto i_tensor_in = gguf_find_tensor(ctx_gguf, name.c_str()); // idx of tensor in the input file
|
||||
auto offset = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i_tensor_in);
|
||||
f_in.seekg(offset);
|
||||
f_in.read((char* )buf.data(), len);
|
||||
}
|
||||
|
||||
~file_input() {
|
||||
gguf_free(ctx_gguf);
|
||||
ggml_free(ctx_meta);
|
||||
}
|
||||
};
|
||||
|
||||
struct lora_merge_ctx {
|
||||
// input base model + adapters
|
||||
file_input base_model;
|
||||
std::vector<std::unique_ptr<file_input>> adapters;
|
||||
|
||||
// for computing merged tensor
|
||||
int n_threads;
|
||||
ggml_backend_t backend = nullptr;
|
||||
ggml_gallocr_t allocr = nullptr;
|
||||
std::vector<uint8_t> read_buf;
|
||||
|
||||
// output file
|
||||
struct gguf_context * ctx_out;
|
||||
struct ggml_context * ctx_out_ggml;
|
||||
std::ofstream fout;
|
||||
|
||||
lora_merge_ctx(
|
||||
std::string & base_fname,
|
||||
std::vector<common_lora_adapter_info> & lora_files,
|
||||
std::string & outfile,
|
||||
int n_threads) : base_model(base_fname, 0), n_threads(n_threads), fout(outfile, std::ios::binary) {
|
||||
fout.exceptions(std::ofstream::failbit); // fail fast on write errors
|
||||
|
||||
if (gguf_find_key(base_model.ctx_gguf, LLM_KV_SPLIT_COUNT) >= 0) {
|
||||
throw std::runtime_error("split model is not yet supported");
|
||||
}
|
||||
|
||||
for (auto & lora_inp : lora_files) {
|
||||
auto fname = lora_inp.path;
|
||||
auto scale = lora_inp.scale;
|
||||
std::unique_ptr<file_input> adapter(new file_input(fname, scale));
|
||||
check_metadata_lora(adapter.get());
|
||||
adapters.push_back(std::move(adapter));
|
||||
}
|
||||
|
||||
ctx_out = gguf_init_empty();
|
||||
struct ggml_init_params params = {
|
||||
/*.mem_size =*/ gguf_get_n_tensors(base_model.ctx_gguf)*ggml_tensor_overhead(),
|
||||
/*.mem_buffer =*/ NULL,
|
||||
/*.no_alloc =*/ true,
|
||||
};
|
||||
ctx_out_ggml = ggml_init(params);
|
||||
backend = ggml_backend_cpu_init();
|
||||
allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(backend));
|
||||
}
|
||||
|
||||
void check_metadata_lora(file_input * adapter) {
|
||||
auto general_type = get_kv_str(adapter->ctx_gguf, "general.type");
|
||||
if (general_type != "adapter") {
|
||||
throw std::runtime_error("expect general.type to be 'adapter', but got: " + general_type);
|
||||
}
|
||||
|
||||
auto adapter_type = get_kv_str(adapter->ctx_gguf, "adapter.type");
|
||||
if (adapter_type != "lora") {
|
||||
throw std::runtime_error("expect adapter.type to be 'lora', but got: " + adapter_type);
|
||||
}
|
||||
|
||||
auto general_arch_base = get_kv_str(base_model.ctx_gguf, "general.architecture");
|
||||
auto general_arch_lora = get_kv_str(adapter->ctx_gguf, "general.architecture");
|
||||
if (general_arch_base != general_arch_lora) {
|
||||
throw std::runtime_error("model arch and LoRA arch mismatch");
|
||||
}
|
||||
}
|
||||
|
||||
ggml_type get_out_tensor_type(struct ggml_tensor * t) {
|
||||
if (t->type == GGML_TYPE_F32) {
|
||||
return GGML_TYPE_F32;
|
||||
} else {
|
||||
return GGML_TYPE_F16;
|
||||
}
|
||||
}
|
||||
|
||||
void run_merge() {
|
||||
// prepare metadata
|
||||
gguf_set_kv(ctx_out, base_model.ctx_gguf);
|
||||
// output is forced to f16 for now
|
||||
gguf_set_val_u32(ctx_out, "general.file_type", LLAMA_FTYPE_MOSTLY_F16);
|
||||
|
||||
// check if all lora adapters have the same tensors
|
||||
// TODO: remove this when we can support merging subset of adapters. Ref: https://github.com/ggerganov/llama.cpp/pull/8607#discussion_r1686027777
|
||||
static const char * err_no_subset_adapter = "Input adapters do not have the same list of tensors. This is not yet supported. Please merge the adapter one-by-one instead of merging all at once.";
|
||||
if (adapters.size() > 1) {
|
||||
for (size_t i = 1; i < adapters.size(); ++i) {
|
||||
if (adapters[0]->tensors.size() != adapters[i]->tensors.size()) {
|
||||
throw std::runtime_error(err_no_subset_adapter);
|
||||
}
|
||||
for (auto & it : adapters[i]->tensors) {
|
||||
if (adapters[0]->get_tensor(it.first) == nullptr) {
|
||||
throw std::runtime_error(err_no_subset_adapter);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// mapping base tensor to out tensor (same shape with base, but different type)
|
||||
std::vector<tensor_transformation> trans;
|
||||
for (auto & it : base_model.tensors) {
|
||||
bool t_a = true;
|
||||
bool t_b = true;
|
||||
for (auto & adapter : adapters) {
|
||||
t_a &= nullptr != adapter->get_tensor(it.first + ".lora_a");
|
||||
t_b &= nullptr != adapter->get_tensor(it.first + ".lora_b");
|
||||
}
|
||||
auto base_tensor = it.second;
|
||||
if (!t_a && !t_b) {
|
||||
// only copy
|
||||
struct ggml_tensor * cpy_tensor = ggml_dup_tensor(ctx_out_ggml, base_tensor);
|
||||
ggml_set_name(cpy_tensor, base_tensor->name);
|
||||
trans.push_back({
|
||||
cpy_tensor,
|
||||
cpy_tensor,
|
||||
true,
|
||||
});
|
||||
gguf_add_tensor(ctx_out, cpy_tensor);
|
||||
} else if (t_a && t_b) {
|
||||
// need merging
|
||||
struct ggml_tensor * out_tensor = ggml_new_tensor(
|
||||
ctx_out_ggml, get_out_tensor_type(base_tensor), GGML_MAX_DIMS, base_tensor->ne);
|
||||
ggml_set_name(out_tensor, base_tensor->name);
|
||||
trans.push_back({
|
||||
base_tensor,
|
||||
out_tensor,
|
||||
false,
|
||||
});
|
||||
gguf_add_tensor(ctx_out, out_tensor);
|
||||
} else {
|
||||
throw std::runtime_error("tensor " + it.first + " missing either lora_a or lora_b");
|
||||
}
|
||||
}
|
||||
|
||||
// placeholder for the meta data
|
||||
{
|
||||
size_t meta_size = gguf_get_meta_size(ctx_out);
|
||||
zeros(fout, meta_size);
|
||||
}
|
||||
|
||||
// process base model tensors
|
||||
size_t n_merged = 0;
|
||||
for (auto & it : trans) {
|
||||
if (!it.is_copy) {
|
||||
merge_tensor(it.in, it.out);
|
||||
n_merged++;
|
||||
} else {
|
||||
copy_tensor(it.in);
|
||||
}
|
||||
}
|
||||
|
||||
// write output metadata
|
||||
{
|
||||
std::vector<uint8_t> data(gguf_get_meta_size(ctx_out));
|
||||
gguf_get_meta_data(ctx_out, data.data());
|
||||
fout.seekp(0);
|
||||
fout.write((const char *)data.data(), data.size());
|
||||
}
|
||||
|
||||
printf("%s : merged %ld tensors with lora adapters\n", __func__, n_merged);
|
||||
printf("%s : wrote %ld tensors to output file\n", __func__, trans.size());
|
||||
}
|
||||
|
||||
void copy_tensor(struct ggml_tensor * base) {
|
||||
printf("%s : %s [%s]\n", __func__, base->name, ggml_ne_string(base).c_str());
|
||||
size_t len = ggml_nbytes(base);
|
||||
base_model.read_tensor_data(base->name, read_buf);
|
||||
fout.write((char* )read_buf.data(), len);
|
||||
zeros(fout, GGML_PAD(len, GGUF_DEFAULT_ALIGNMENT) - len);
|
||||
}
|
||||
|
||||
void merge_tensor(struct ggml_tensor * base, struct ggml_tensor * out) {
|
||||
std::string name_base(base->name);
|
||||
std::string name_lora_a = name_base + ".lora_a";
|
||||
std::string name_lora_b = name_base + ".lora_b";
|
||||
|
||||
printf("%s : %s [%s]\n", __func__, base->name, ggml_ne_string(base).c_str());
|
||||
|
||||
// context for input tensor
|
||||
std::vector<struct ggml_tensor *> inp_a(adapters.size());
|
||||
std::vector<struct ggml_tensor *> inp_b(adapters.size());
|
||||
struct ggml_init_params params {
|
||||
/*.mem_size =*/ ggml_tensor_overhead()*(2+adapters.size()*2),
|
||||
/*.mem_buffer =*/ NULL,
|
||||
/*.no_alloc =*/ true,
|
||||
};
|
||||
struct ggml_context * ctx = ggml_init(params);
|
||||
|
||||
// alloc tensors
|
||||
struct ggml_tensor * inp_base = ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, base->ne);
|
||||
for (size_t i = 0; i < adapters.size(); ++i) {
|
||||
auto t_a = adapters[i]->get_tensor(name_lora_a);
|
||||
auto t_b = adapters[i]->get_tensor(name_lora_b);
|
||||
// TODO: add support for quantized lora
|
||||
if (ggml_is_quantized(t_a->type) || ggml_is_quantized(t_b->type)) {
|
||||
throw std::runtime_error("quantized LoRA adapters is not supported, please retry with f16 or f32");
|
||||
}
|
||||
inp_a[i] = ggml_dup_tensor(ctx, t_a);
|
||||
inp_b[i] = ggml_dup_tensor(ctx, t_b);
|
||||
}
|
||||
ggml_backend_buffer_t buffer = ggml_backend_alloc_ctx_tensors(ctx, backend);
|
||||
|
||||
// load base tensor to backend buffer
|
||||
base_model.read_tensor_data(name_base, read_buf);
|
||||
if (base->type != GGML_TYPE_F32) {
|
||||
// optionally dequantize it
|
||||
printf("%s : + dequantize base tensor from %s to F32\n", __func__, ggml_type_name(base->type));
|
||||
auto nels = ggml_nelements(inp_base);
|
||||
const auto * qtype = ggml_get_type_traits(base->type);
|
||||
std::vector<uint8_t> dequant_buf(nels * sizeof(float));
|
||||
qtype->to_float(read_buf.data(), (float *)dequant_buf.data(), nels);
|
||||
ggml_backend_tensor_set(inp_base, dequant_buf.data(), 0, dequant_buf.size());
|
||||
} else {
|
||||
ggml_backend_tensor_set(inp_base, read_buf.data(), 0, ggml_nbytes(inp_base));
|
||||
}
|
||||
|
||||
// load lora tensors to backend buffer
|
||||
for (size_t i = 0; i < adapters.size(); ++i) {
|
||||
adapters[i]->read_tensor_data(name_lora_a, read_buf);
|
||||
ggml_backend_tensor_set(inp_a[i], read_buf.data(), 0, ggml_nbytes(inp_a[i]));
|
||||
adapters[i]->read_tensor_data(name_lora_b, read_buf);
|
||||
ggml_backend_tensor_set(inp_b[i], read_buf.data(), 0, ggml_nbytes(inp_b[i]));
|
||||
}
|
||||
|
||||
// build graph
|
||||
struct ggml_cgraph * gf;
|
||||
{
|
||||
static size_t buf_size = ggml_tensor_overhead()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead();
|
||||
static std::vector<uint8_t> buf(buf_size);
|
||||
struct ggml_init_params params0 = {
|
||||
/*.mem_size =*/ buf_size,
|
||||
/*.mem_buffer =*/ buf.data(),
|
||||
/*.no_alloc =*/ true,
|
||||
};
|
||||
struct ggml_context * ctx0 = ggml_init(params0);
|
||||
gf = ggml_new_graph(ctx0);
|
||||
struct ggml_tensor * cur = inp_base;
|
||||
for (size_t i = 0; i < adapters.size(); ++i) {
|
||||
struct ggml_tensor * a_T = ggml_cont(ctx0, ggml_transpose(ctx0, ggml_cast(ctx0, inp_a[i], GGML_TYPE_F32)));
|
||||
struct ggml_tensor * delta = ggml_mul_mat(ctx0, a_T, ggml_cast(ctx0, inp_b[i], GGML_TYPE_F32));
|
||||
// scale
|
||||
const float alpha = adapters[i]->alpha;
|
||||
const float rank = (float) inp_b[i]->ne[0];
|
||||
const float scale = alpha ? adapters[i]->scale * alpha / rank : adapters[i]->scale;
|
||||
delta = ggml_scale(ctx0, delta, scale);
|
||||
cur = ggml_add(ctx0, delta, cur);
|
||||
printf("%s : + merging from adapter[%ld] type=%s\n", __func__, i, ggml_type_name(inp_a[i]->type));
|
||||
printf("%s : input_scale=%f calculated_scale=%f rank=%d\n", __func__, adapters[i]->scale, scale, (int) inp_b[i]->ne[0]);
|
||||
}
|
||||
cur = ggml_cast(ctx0, cur, out->type);
|
||||
printf("%s : + output type is %s\n", __func__, ggml_type_name(out->type));
|
||||
ggml_build_forward_expand(gf, cur);
|
||||
ggml_free(ctx0);
|
||||
}
|
||||
|
||||
// compute
|
||||
{
|
||||
ggml_gallocr_alloc_graph(allocr, gf);
|
||||
ggml_backend_cpu_set_n_threads(backend, n_threads);
|
||||
ggml_backend_graph_compute(backend, gf);
|
||||
}
|
||||
|
||||
// write data to output file
|
||||
{
|
||||
auto * result = ggml_graph_node(gf, -1);
|
||||
size_t len = ggml_nbytes(result);
|
||||
if (read_buf.size() < len) {
|
||||
read_buf.resize(len);
|
||||
}
|
||||
ggml_backend_tensor_get(result, read_buf.data(), 0, len);
|
||||
fout.write((char* )read_buf.data(), len);
|
||||
zeros(fout, GGML_PAD(len, GGUF_DEFAULT_ALIGNMENT) - len);
|
||||
}
|
||||
|
||||
ggml_free(ctx);
|
||||
ggml_backend_buffer_free(buffer);
|
||||
}
|
||||
|
||||
~lora_merge_ctx() {
|
||||
ggml_gallocr_free(allocr);
|
||||
ggml_backend_free(backend);
|
||||
gguf_free(ctx_out);
|
||||
ggml_free(ctx_out_ggml);
|
||||
}
|
||||
};
|
||||
|
||||
static void print_usage(int, char ** argv) {
|
||||
printf("\nexample usage:\n");
|
||||
printf("\n %s -m base-model.gguf --lora lora-file.gguf -o merged-model-f16.gguf\n", argv[0]);
|
||||
printf("\nNOTE: output model is F16\n");
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
common_params params;
|
||||
|
||||
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_EXPORT_LORA, print_usage)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
g_verbose = (params.verbosity > 1);
|
||||
try {
|
||||
lora_merge_ctx ctx(params.model, params.lora_adapters, params.lora_outfile, params.cpuparams.n_threads);
|
||||
ctx.run_merge();
|
||||
} catch (const std::exception & err) {
|
||||
fprintf(stderr, "%s\n", err.what());
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
printf("done, output file is %s\n", params.lora_outfile.c_str());
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,112 +0,0 @@
|
|||
#include "unicode.h"
|
||||
#include "llama-grammar.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
static bool llama_grammar_validate(struct llama_grammar * grammar, const std::string & input_str, size_t & error_pos, std::string & error_msg) {
|
||||
const auto cpts = unicode_cpts_from_utf8(input_str);
|
||||
|
||||
const llama_grammar_rules & rules = llama_grammar_get_rules (grammar);
|
||||
llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(grammar);
|
||||
|
||||
size_t pos = 0;
|
||||
for (const auto & cpt : cpts) {
|
||||
const llama_grammar_stacks stacks_prev = llama_grammar_get_stacks(grammar); // copy
|
||||
|
||||
llama_grammar_accept(rules, stacks_prev, cpt, stacks_cur);
|
||||
|
||||
if (stacks_cur.empty()) {
|
||||
error_pos = pos;
|
||||
error_msg = "Unexpected character '" + unicode_cpt_to_utf8(cpt) + "'";
|
||||
stacks_cur = stacks_prev;
|
||||
return false;
|
||||
}
|
||||
++pos;
|
||||
}
|
||||
|
||||
for (const auto & stack : stacks_cur) {
|
||||
if (stack.empty()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
error_pos = pos;
|
||||
error_msg = "Unexpected end of input";
|
||||
return false;
|
||||
}
|
||||
|
||||
static void print_error_message(const std::string & input_str, size_t error_pos, const std::string & error_msg) {
|
||||
fprintf(stdout, "Input string is invalid according to the grammar.\n");
|
||||
fprintf(stdout, "Error: %s at position %zu\n", error_msg.c_str(), error_pos);
|
||||
fprintf(stdout, "\n");
|
||||
fprintf(stdout, "Input string:\n");
|
||||
fprintf(stdout, "%s", input_str.substr(0, error_pos).c_str());
|
||||
if (error_pos < input_str.size()) {
|
||||
fprintf(stdout, "\033[1;31m%c", input_str[error_pos]);
|
||||
if (error_pos+1 < input_str.size()) {
|
||||
fprintf(stdout, "\033[0;31m%s", input_str.substr(error_pos+1).c_str());
|
||||
}
|
||||
fprintf(stdout, "\033[0m\n");
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
if (argc != 3) {
|
||||
fprintf(stdout, "Usage: %s <grammar_filename> <input_filename>\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
const std::string grammar_filename = argv[1];
|
||||
const std::string input_filename = argv[2];
|
||||
|
||||
// Read the GBNF grammar file
|
||||
FILE* grammar_file = fopen(grammar_filename.c_str(), "r");
|
||||
if (!grammar_file) {
|
||||
fprintf(stdout, "Failed to open grammar file: %s\n", grammar_filename.c_str());
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::string grammar_str;
|
||||
{
|
||||
std::ifstream grammar_file(grammar_filename);
|
||||
GGML_ASSERT(grammar_file.is_open() && "Failed to open grammar file");
|
||||
std::stringstream buffer;
|
||||
buffer << grammar_file.rdbuf();
|
||||
grammar_str = buffer.str();
|
||||
}
|
||||
|
||||
llama_grammar * grammar = llama_grammar_init_impl(nullptr, grammar_str.c_str(), "root");
|
||||
if (grammar == nullptr) {
|
||||
throw std::runtime_error("Failed to initialize llama_grammar");
|
||||
}
|
||||
// Read the input file
|
||||
std::string input_str;
|
||||
{
|
||||
std::ifstream input_file(input_filename);
|
||||
GGML_ASSERT(input_file.is_open() && "Failed to open input file");
|
||||
std::stringstream buffer;
|
||||
buffer << input_file.rdbuf();
|
||||
input_str = buffer.str();
|
||||
}
|
||||
|
||||
// Validate the input string against the grammar
|
||||
size_t error_pos;
|
||||
std::string error_msg;
|
||||
bool is_valid = llama_grammar_validate(grammar, input_str, error_pos, error_msg);
|
||||
|
||||
if (is_valid) {
|
||||
fprintf(stdout, "Input string is valid according to the grammar.\n");
|
||||
} else {
|
||||
print_error_message(input_str, error_pos, error_msg);
|
||||
}
|
||||
|
||||
// Clean up
|
||||
llama_grammar_free_impl(grammar);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,5 +0,0 @@
|
|||
set(TARGET llama-gen-docs)
|
||||
add_executable(${TARGET} gen-docs.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
|
@ -1,83 +0,0 @@
|
|||
#include "arg.h"
|
||||
#include "common.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
|
||||
// Export usage message (-h) to markdown format
|
||||
|
||||
static void write_table_header(std::ofstream & file) {
|
||||
file << "| Argument | Explanation |\n";
|
||||
file << "| -------- | ----------- |\n";
|
||||
}
|
||||
|
||||
static void write_table_entry(std::ofstream & file, const common_arg & opt) {
|
||||
file << "| `";
|
||||
// args
|
||||
for (const auto & arg : opt.args) {
|
||||
if (arg == opt.args.front()) {
|
||||
file << arg;
|
||||
if (opt.args.size() > 1) file << ", ";
|
||||
} else {
|
||||
file << arg << (arg != opt.args.back() ? ", " : "");
|
||||
}
|
||||
}
|
||||
// value hint
|
||||
if (opt.value_hint) {
|
||||
std::string md_value_hint(opt.value_hint);
|
||||
string_replace_all(md_value_hint, "|", "\\|");
|
||||
file << " " << md_value_hint;
|
||||
}
|
||||
if (opt.value_hint_2) {
|
||||
std::string md_value_hint_2(opt.value_hint_2);
|
||||
string_replace_all(md_value_hint_2, "|", "\\|");
|
||||
file << " " << md_value_hint_2;
|
||||
}
|
||||
// help text
|
||||
std::string md_help(opt.help);
|
||||
string_replace_all(md_help, "\n", "<br/>");
|
||||
string_replace_all(md_help, "|", "\\|");
|
||||
file << "` | " << md_help << " |\n";
|
||||
}
|
||||
|
||||
static void write_table(std::ofstream & file, std::vector<common_arg *> & opts) {
|
||||
write_table_header(file);
|
||||
for (const auto & opt : opts) {
|
||||
write_table_entry(file, *opt);
|
||||
}
|
||||
}
|
||||
|
||||
static void export_md(std::string fname, llama_example ex) {
|
||||
std::ofstream file(fname, std::ofstream::out | std::ofstream::trunc);
|
||||
|
||||
common_params params;
|
||||
auto ctx_arg = common_params_parser_init(params, ex);
|
||||
|
||||
std::vector<common_arg *> common_options;
|
||||
std::vector<common_arg *> sparam_options;
|
||||
std::vector<common_arg *> specific_options;
|
||||
for (auto & opt : ctx_arg.options) {
|
||||
// in case multiple LLAMA_EXAMPLE_* are set, we prioritize the LLAMA_EXAMPLE_* matching current example
|
||||
if (opt.is_sparam) {
|
||||
sparam_options.push_back(&opt);
|
||||
} else if (opt.in_example(ctx_arg.ex)) {
|
||||
specific_options.push_back(&opt);
|
||||
} else {
|
||||
common_options.push_back(&opt);
|
||||
}
|
||||
}
|
||||
|
||||
file << "**Common params**\n\n";
|
||||
write_table(file, common_options);
|
||||
file << "\n\n**Sampling params**\n\n";
|
||||
write_table(file, sparam_options);
|
||||
file << "\n\n**Example-specific params**\n\n";
|
||||
write_table(file, specific_options);
|
||||
}
|
||||
|
||||
int main(int, char **) {
|
||||
export_md("autogen-main.md", LLAMA_EXAMPLE_MAIN);
|
||||
export_md("autogen-server.md", LLAMA_EXAMPLE_SERVER);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,22 +0,0 @@
|
|||
set(TARGET llama-gguf-hash)
|
||||
add_executable(${TARGET} gguf-hash.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
|
||||
# clibs dependencies
|
||||
include_directories(deps/)
|
||||
|
||||
add_library(xxhash OBJECT deps/xxhash/xxhash.c deps/xxhash/xxhash.h)
|
||||
target_link_libraries(${TARGET} PRIVATE xxhash)
|
||||
|
||||
add_library(sha1 OBJECT deps/sha1/sha1.c deps/sha1/sha1.h)
|
||||
target_link_libraries(${TARGET} PRIVATE sha1)
|
||||
if (NOT MSVC)
|
||||
# disable warnings in 3rd party code
|
||||
target_compile_options(sha1 PRIVATE -w)
|
||||
endif()
|
||||
|
||||
add_library(sha256 OBJECT deps/sha256/sha256.c deps/sha256/sha256.h)
|
||||
target_link_libraries(${TARGET} PRIVATE sha256)
|
||||
|
||||
target_link_libraries(${TARGET} PRIVATE ggml ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
|
@ -1,13 +0,0 @@
|
|||
{
|
||||
"name": "rotate-bits",
|
||||
"version": "0.1.1",
|
||||
"repo": "jb55/rotate-bits.h",
|
||||
"description": "rotate bits",
|
||||
"keywords": ["rotl", "rotr"],
|
||||
"src": ["rotate-bits.h"],
|
||||
"license": "Public Domain",
|
||||
"development": {
|
||||
"thlorenz/tap.c": "*"
|
||||
}
|
||||
}
|
||||
|
|
@ -1,46 +0,0 @@
|
|||
|
||||
|
||||
#ifndef __ROTATE_DEFS_H
|
||||
#define __ROTATE_DEFS_H
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define ROTL32(v, n) _rotl((v), (n))
|
||||
#define ROTL64(v, n) _rotl64((v), (n))
|
||||
|
||||
#define ROTR32(v, n) _rotr((v), (n))
|
||||
#define ROTR64(v, n) _rotr64((v), (n))
|
||||
|
||||
#else
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define U8V(v) ((uint8_t)(v) & 0xFFU)
|
||||
#define U16V(v) ((uint16_t)(v) & 0xFFFFU)
|
||||
#define U32V(v) ((uint32_t)(v) & 0xFFFFFFFFU)
|
||||
#define U64V(v) ((uint64_t)(v) & 0xFFFFFFFFFFFFFFFFU)
|
||||
|
||||
#define ROTL32(v, n) \
|
||||
(U32V((uint32_t)(v) << (n)) | ((uint32_t)(v) >> (32 - (n))))
|
||||
|
||||
// tests fail if we don't have this cast...
|
||||
#define ROTL64(v, n) \
|
||||
(U64V((uint64_t)(v) << (n)) | ((uint64_t)(v) >> (64 - (n))))
|
||||
|
||||
#define ROTR32(v, n) ROTL32(v, 32 - (n))
|
||||
#define ROTR64(v, n) ROTL64(v, 64 - (n))
|
||||
|
||||
#endif
|
||||
|
||||
#define ROTL8(v, n) \
|
||||
(U8V((uint8_t)(v) << (n)) | ((uint8_t)(v) >> (8 - (n))))
|
||||
|
||||
#define ROTL16(v, n) \
|
||||
(U16V((uint16_t)(v) << (n)) | ((uint16_t)(v) >> (16 - (n))))
|
||||
|
||||
#define ROTR8(v, n) ROTL8(v, 8 - (n))
|
||||
#define ROTR16(v, n) ROTL16(v, 16 - (n))
|
||||
|
||||
#endif
|
|
@ -1,9 +0,0 @@
|
|||
{
|
||||
"name": "sha1",
|
||||
"version": "0.0.1",
|
||||
"repo": "clibs/sha1",
|
||||
"description": "sha1 hash algorithm",
|
||||
"keywords": ["sha1", "hash"],
|
||||
"license": "public domain",
|
||||
"src": ["sha1.c", "sha1.h"]
|
||||
}
|
|
@ -1,295 +0,0 @@
|
|||
/*
|
||||
SHA-1 in C
|
||||
By Steve Reid <steve@edmweb.com>
|
||||
100% Public Domain
|
||||
|
||||
Test Vectors (from FIPS PUB 180-1)
|
||||
"abc"
|
||||
A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D
|
||||
"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
|
||||
84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1
|
||||
A million repetitions of "a"
|
||||
34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F
|
||||
*/
|
||||
|
||||
/* #define LITTLE_ENDIAN * This should be #define'd already, if true. */
|
||||
/* #define SHA1HANDSOFF * Copies data before messing with it. */
|
||||
|
||||
#define SHA1HANDSOFF
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
/* for uint32_t */
|
||||
#include <stdint.h>
|
||||
|
||||
#include "sha1.h"
|
||||
|
||||
|
||||
#define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits))))
|
||||
|
||||
/* blk0() and blk() perform the initial expand. */
|
||||
/* I got the idea of expanding during the round function from SSLeay */
|
||||
#if BYTE_ORDER == LITTLE_ENDIAN
|
||||
#define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) \
|
||||
|(rol(block->l[i],8)&0x00FF00FF))
|
||||
#elif BYTE_ORDER == BIG_ENDIAN
|
||||
#define blk0(i) block->l[i]
|
||||
#else
|
||||
#error "Endianness not defined!"
|
||||
#endif
|
||||
#define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] \
|
||||
^block->l[(i+2)&15]^block->l[i&15],1))
|
||||
|
||||
/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
|
||||
#define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rol(v,5);w=rol(w,30);
|
||||
#define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=rol(w,30);
|
||||
#define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=rol(w,30);
|
||||
#define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30);
|
||||
#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30);
|
||||
|
||||
|
||||
/* Hash a single 512-bit block. This is the core of the algorithm. */
|
||||
|
||||
void SHA1Transform(
|
||||
uint32_t state[5],
|
||||
const unsigned char buffer[64]
|
||||
)
|
||||
{
|
||||
uint32_t a, b, c, d, e;
|
||||
|
||||
typedef union
|
||||
{
|
||||
unsigned char c[64];
|
||||
uint32_t l[16];
|
||||
} CHAR64LONG16;
|
||||
|
||||
#ifdef SHA1HANDSOFF
|
||||
CHAR64LONG16 block[1]; /* use array to appear as a pointer */
|
||||
|
||||
memcpy(block, buffer, 64);
|
||||
#else
|
||||
/* The following had better never be used because it causes the
|
||||
* pointer-to-const buffer to be cast into a pointer to non-const.
|
||||
* And the result is written through. I threw a "const" in, hoping
|
||||
* this will cause a diagnostic.
|
||||
*/
|
||||
CHAR64LONG16 *block = (const CHAR64LONG16 *) buffer;
|
||||
#endif
|
||||
/* Copy context->state[] to working vars */
|
||||
a = state[0];
|
||||
b = state[1];
|
||||
c = state[2];
|
||||
d = state[3];
|
||||
e = state[4];
|
||||
/* 4 rounds of 20 operations each. Loop unrolled. */
|
||||
R0(a, b, c, d, e, 0);
|
||||
R0(e, a, b, c, d, 1);
|
||||
R0(d, e, a, b, c, 2);
|
||||
R0(c, d, e, a, b, 3);
|
||||
R0(b, c, d, e, a, 4);
|
||||
R0(a, b, c, d, e, 5);
|
||||
R0(e, a, b, c, d, 6);
|
||||
R0(d, e, a, b, c, 7);
|
||||
R0(c, d, e, a, b, 8);
|
||||
R0(b, c, d, e, a, 9);
|
||||
R0(a, b, c, d, e, 10);
|
||||
R0(e, a, b, c, d, 11);
|
||||
R0(d, e, a, b, c, 12);
|
||||
R0(c, d, e, a, b, 13);
|
||||
R0(b, c, d, e, a, 14);
|
||||
R0(a, b, c, d, e, 15);
|
||||
R1(e, a, b, c, d, 16);
|
||||
R1(d, e, a, b, c, 17);
|
||||
R1(c, d, e, a, b, 18);
|
||||
R1(b, c, d, e, a, 19);
|
||||
R2(a, b, c, d, e, 20);
|
||||
R2(e, a, b, c, d, 21);
|
||||
R2(d, e, a, b, c, 22);
|
||||
R2(c, d, e, a, b, 23);
|
||||
R2(b, c, d, e, a, 24);
|
||||
R2(a, b, c, d, e, 25);
|
||||
R2(e, a, b, c, d, 26);
|
||||
R2(d, e, a, b, c, 27);
|
||||
R2(c, d, e, a, b, 28);
|
||||
R2(b, c, d, e, a, 29);
|
||||
R2(a, b, c, d, e, 30);
|
||||
R2(e, a, b, c, d, 31);
|
||||
R2(d, e, a, b, c, 32);
|
||||
R2(c, d, e, a, b, 33);
|
||||
R2(b, c, d, e, a, 34);
|
||||
R2(a, b, c, d, e, 35);
|
||||
R2(e, a, b, c, d, 36);
|
||||
R2(d, e, a, b, c, 37);
|
||||
R2(c, d, e, a, b, 38);
|
||||
R2(b, c, d, e, a, 39);
|
||||
R3(a, b, c, d, e, 40);
|
||||
R3(e, a, b, c, d, 41);
|
||||
R3(d, e, a, b, c, 42);
|
||||
R3(c, d, e, a, b, 43);
|
||||
R3(b, c, d, e, a, 44);
|
||||
R3(a, b, c, d, e, 45);
|
||||
R3(e, a, b, c, d, 46);
|
||||
R3(d, e, a, b, c, 47);
|
||||
R3(c, d, e, a, b, 48);
|
||||
R3(b, c, d, e, a, 49);
|
||||
R3(a, b, c, d, e, 50);
|
||||
R3(e, a, b, c, d, 51);
|
||||
R3(d, e, a, b, c, 52);
|
||||
R3(c, d, e, a, b, 53);
|
||||
R3(b, c, d, e, a, 54);
|
||||
R3(a, b, c, d, e, 55);
|
||||
R3(e, a, b, c, d, 56);
|
||||
R3(d, e, a, b, c, 57);
|
||||
R3(c, d, e, a, b, 58);
|
||||
R3(b, c, d, e, a, 59);
|
||||
R4(a, b, c, d, e, 60);
|
||||
R4(e, a, b, c, d, 61);
|
||||
R4(d, e, a, b, c, 62);
|
||||
R4(c, d, e, a, b, 63);
|
||||
R4(b, c, d, e, a, 64);
|
||||
R4(a, b, c, d, e, 65);
|
||||
R4(e, a, b, c, d, 66);
|
||||
R4(d, e, a, b, c, 67);
|
||||
R4(c, d, e, a, b, 68);
|
||||
R4(b, c, d, e, a, 69);
|
||||
R4(a, b, c, d, e, 70);
|
||||
R4(e, a, b, c, d, 71);
|
||||
R4(d, e, a, b, c, 72);
|
||||
R4(c, d, e, a, b, 73);
|
||||
R4(b, c, d, e, a, 74);
|
||||
R4(a, b, c, d, e, 75);
|
||||
R4(e, a, b, c, d, 76);
|
||||
R4(d, e, a, b, c, 77);
|
||||
R4(c, d, e, a, b, 78);
|
||||
R4(b, c, d, e, a, 79);
|
||||
/* Add the working vars back into context.state[] */
|
||||
state[0] += a;
|
||||
state[1] += b;
|
||||
state[2] += c;
|
||||
state[3] += d;
|
||||
state[4] += e;
|
||||
/* Wipe variables */
|
||||
a = b = c = d = e = 0;
|
||||
#ifdef SHA1HANDSOFF
|
||||
memset(block, '\0', sizeof(block));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/* SHA1Init - Initialize new context */
|
||||
|
||||
void SHA1Init(
|
||||
SHA1_CTX * context
|
||||
)
|
||||
{
|
||||
/* SHA1 initialization constants */
|
||||
context->state[0] = 0x67452301;
|
||||
context->state[1] = 0xEFCDAB89;
|
||||
context->state[2] = 0x98BADCFE;
|
||||
context->state[3] = 0x10325476;
|
||||
context->state[4] = 0xC3D2E1F0;
|
||||
context->count[0] = context->count[1] = 0;
|
||||
}
|
||||
|
||||
|
||||
/* Run your data through this. */
|
||||
|
||||
void SHA1Update(
|
||||
SHA1_CTX * context,
|
||||
const unsigned char *data,
|
||||
uint32_t len
|
||||
)
|
||||
{
|
||||
uint32_t i;
|
||||
|
||||
uint32_t j;
|
||||
|
||||
j = context->count[0];
|
||||
if ((context->count[0] += len << 3) < j)
|
||||
context->count[1]++;
|
||||
context->count[1] += (len >> 29);
|
||||
j = (j >> 3) & 63;
|
||||
if ((j + len) > 63)
|
||||
{
|
||||
memcpy(&context->buffer[j], data, (i = 64 - j));
|
||||
SHA1Transform(context->state, context->buffer);
|
||||
for (; i + 63 < len; i += 64)
|
||||
{
|
||||
SHA1Transform(context->state, &data[i]);
|
||||
}
|
||||
j = 0;
|
||||
}
|
||||
else
|
||||
i = 0;
|
||||
memcpy(&context->buffer[j], &data[i], len - i);
|
||||
}
|
||||
|
||||
|
||||
/* Add padding and return the message digest. */
|
||||
|
||||
void SHA1Final(
|
||||
unsigned char digest[20],
|
||||
SHA1_CTX * context
|
||||
)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
unsigned char finalcount[8];
|
||||
|
||||
unsigned char c;
|
||||
|
||||
#if 0 /* untested "improvement" by DHR */
|
||||
/* Convert context->count to a sequence of bytes
|
||||
* in finalcount. Second element first, but
|
||||
* big-endian order within element.
|
||||
* But we do it all backwards.
|
||||
*/
|
||||
unsigned char *fcp = &finalcount[8];
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
uint32_t t = context->count[i];
|
||||
|
||||
int j;
|
||||
|
||||
for (j = 0; j < 4; t >>= 8, j++)
|
||||
*--fcp = (unsigned char) t}
|
||||
#else
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
finalcount[i] = (unsigned char) ((context->count[(i >= 4 ? 0 : 1)] >> ((3 - (i & 3)) * 8)) & 255); /* Endian independent */
|
||||
}
|
||||
#endif
|
||||
c = 0200;
|
||||
SHA1Update(context, &c, 1);
|
||||
while ((context->count[0] & 504) != 448)
|
||||
{
|
||||
c = 0000;
|
||||
SHA1Update(context, &c, 1);
|
||||
}
|
||||
SHA1Update(context, finalcount, 8); /* Should cause a SHA1Transform() */
|
||||
for (i = 0; i < 20; i++)
|
||||
{
|
||||
digest[i] = (unsigned char)
|
||||
((context->state[i >> 2] >> ((3 - (i & 3)) * 8)) & 255);
|
||||
}
|
||||
/* Wipe variables */
|
||||
memset(context, '\0', sizeof(*context));
|
||||
memset(&finalcount, '\0', sizeof(finalcount));
|
||||
}
|
||||
|
||||
void SHA1(
|
||||
char *hash_out,
|
||||
const char *str,
|
||||
uint32_t len)
|
||||
{
|
||||
SHA1_CTX ctx;
|
||||
unsigned int ii;
|
||||
|
||||
SHA1Init(&ctx);
|
||||
for (ii=0; ii<len; ii+=1)
|
||||
SHA1Update(&ctx, (const unsigned char*)str + ii, 1);
|
||||
SHA1Final((unsigned char *)hash_out, &ctx);
|
||||
}
|
||||
|
|
@ -1,52 +0,0 @@
|
|||
#ifndef SHA1_H
|
||||
#define SHA1_H
|
||||
|
||||
/*
|
||||
SHA-1 in C
|
||||
By Steve Reid <steve@edmweb.com>
|
||||
100% Public Domain
|
||||
*/
|
||||
|
||||
#include "stdint.h"
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32_t state[5];
|
||||
uint32_t count[2];
|
||||
unsigned char buffer[64];
|
||||
} SHA1_CTX;
|
||||
|
||||
void SHA1Transform(
|
||||
uint32_t state[5],
|
||||
const unsigned char buffer[64]
|
||||
);
|
||||
|
||||
void SHA1Init(
|
||||
SHA1_CTX * context
|
||||
);
|
||||
|
||||
void SHA1Update(
|
||||
SHA1_CTX * context,
|
||||
const unsigned char *data,
|
||||
uint32_t len
|
||||
);
|
||||
|
||||
void SHA1Final(
|
||||
unsigned char digest[20],
|
||||
SHA1_CTX * context
|
||||
);
|
||||
|
||||
void SHA1(
|
||||
char *hash_out,
|
||||
const char *str,
|
||||
uint32_t len);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* SHA1_H */
|
|
@ -1,15 +0,0 @@
|
|||
{
|
||||
"name": "sha256",
|
||||
"version": "0.0.2",
|
||||
"repo": "jb55/sha256.c",
|
||||
"description": "sha256 in c",
|
||||
"keywords": ["sha256", "sha2"],
|
||||
"src": ["sha256.c", "sha256.h"],
|
||||
"dependencies": {
|
||||
"jb55/rotate-bits.h": "0.1.1"
|
||||
},
|
||||
"development": {
|
||||
"thlorenz/tap.c": "*"
|
||||
}
|
||||
}
|
||||
|
|
@ -1,221 +0,0 @@
|
|||
/* Crypto/Sha256.c -- SHA-256 Hash
|
||||
2010-06-11 : Igor Pavlov : Public domain
|
||||
This code is based on public domain code from Wei Dai's Crypto++ library. */
|
||||
|
||||
#include "rotate-bits/rotate-bits.h"
|
||||
#include "sha256.h"
|
||||
|
||||
/* define it for speed optimization */
|
||||
#define _SHA256_UNROLL
|
||||
#define _SHA256_UNROLL2
|
||||
|
||||
void
|
||||
sha256_init(sha256_t *p)
|
||||
{
|
||||
p->state[0] = 0x6a09e667;
|
||||
p->state[1] = 0xbb67ae85;
|
||||
p->state[2] = 0x3c6ef372;
|
||||
p->state[3] = 0xa54ff53a;
|
||||
p->state[4] = 0x510e527f;
|
||||
p->state[5] = 0x9b05688c;
|
||||
p->state[6] = 0x1f83d9ab;
|
||||
p->state[7] = 0x5be0cd19;
|
||||
p->count = 0;
|
||||
}
|
||||
|
||||
#define S0(x) (ROTR32(x, 2) ^ ROTR32(x,13) ^ ROTR32(x, 22))
|
||||
#define S1(x) (ROTR32(x, 6) ^ ROTR32(x,11) ^ ROTR32(x, 25))
|
||||
#define s0(x) (ROTR32(x, 7) ^ ROTR32(x,18) ^ (x >> 3))
|
||||
#define s1(x) (ROTR32(x,17) ^ ROTR32(x,19) ^ (x >> 10))
|
||||
|
||||
#define blk0(i) (W[i] = data[i])
|
||||
#define blk2(i) (W[i&15] += s1(W[(i-2)&15]) + W[(i-7)&15] + s0(W[(i-15)&15]))
|
||||
|
||||
#define Ch(x,y,z) (z^(x&(y^z)))
|
||||
#define Maj(x,y,z) ((x&y)|(z&(x|y)))
|
||||
|
||||
#define a(i) T[(0-(i))&7]
|
||||
#define b(i) T[(1-(i))&7]
|
||||
#define c(i) T[(2-(i))&7]
|
||||
#define d(i) T[(3-(i))&7]
|
||||
#define e(i) T[(4-(i))&7]
|
||||
#define f(i) T[(5-(i))&7]
|
||||
#define g(i) T[(6-(i))&7]
|
||||
#define h(i) T[(7-(i))&7]
|
||||
|
||||
|
||||
#ifdef _SHA256_UNROLL2
|
||||
|
||||
#define R(a,b,c,d,e,f,g,h, i) h += S1(e) + Ch(e,f,g) + K[i+j] + (j?blk2(i):blk0(i));\
|
||||
d += h; h += S0(a) + Maj(a, b, c)
|
||||
|
||||
#define RX_8(i) \
|
||||
R(a,b,c,d,e,f,g,h, i); \
|
||||
R(h,a,b,c,d,e,f,g, (i+1)); \
|
||||
R(g,h,a,b,c,d,e,f, (i+2)); \
|
||||
R(f,g,h,a,b,c,d,e, (i+3)); \
|
||||
R(e,f,g,h,a,b,c,d, (i+4)); \
|
||||
R(d,e,f,g,h,a,b,c, (i+5)); \
|
||||
R(c,d,e,f,g,h,a,b, (i+6)); \
|
||||
R(b,c,d,e,f,g,h,a, (i+7))
|
||||
|
||||
#else
|
||||
|
||||
#define R(i) h(i) += S1(e(i)) + Ch(e(i),f(i),g(i)) + K[i+j] + (j?blk2(i):blk0(i));\
|
||||
d(i) += h(i); h(i) += S0(a(i)) + Maj(a(i), b(i), c(i))
|
||||
|
||||
#ifdef _SHA256_UNROLL
|
||||
|
||||
#define RX_8(i) R(i+0); R(i+1); R(i+2); R(i+3); R(i+4); R(i+5); R(i+6); R(i+7);
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
static const uint32_t K[64] = {
|
||||
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
|
||||
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
||||
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
|
||||
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
|
||||
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
|
||||
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
|
||||
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
|
||||
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
|
||||
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
|
||||
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
|
||||
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
|
||||
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
|
||||
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
|
||||
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
|
||||
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
|
||||
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
|
||||
};
|
||||
|
||||
static void
|
||||
sha256_transform(uint32_t *state, const uint32_t *data)
|
||||
{
|
||||
uint32_t W[16] = {0};
|
||||
unsigned j;
|
||||
#ifdef _SHA256_UNROLL2
|
||||
uint32_t a,b,c,d,e,f,g,h;
|
||||
a = state[0];
|
||||
b = state[1];
|
||||
c = state[2];
|
||||
d = state[3];
|
||||
e = state[4];
|
||||
f = state[5];
|
||||
g = state[6];
|
||||
h = state[7];
|
||||
#else
|
||||
uint32_t T[8];
|
||||
for (j = 0; j < 8; j++)
|
||||
T[j] = state[j];
|
||||
#endif
|
||||
|
||||
for (j = 0; j < 64; j += 16)
|
||||
{
|
||||
#if defined(_SHA256_UNROLL) || defined(_SHA256_UNROLL2)
|
||||
RX_8(0); RX_8(8);
|
||||
#else
|
||||
unsigned i;
|
||||
for (i = 0; i < 16; i++) { R(i); }
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef _SHA256_UNROLL2
|
||||
state[0] += a;
|
||||
state[1] += b;
|
||||
state[2] += c;
|
||||
state[3] += d;
|
||||
state[4] += e;
|
||||
state[5] += f;
|
||||
state[6] += g;
|
||||
state[7] += h;
|
||||
#else
|
||||
for (j = 0; j < 8; j++)
|
||||
state[j] += T[j];
|
||||
#endif
|
||||
|
||||
/* Wipe variables */
|
||||
/* memset(W, 0, sizeof(W)); */
|
||||
/* memset(T, 0, sizeof(T)); */
|
||||
}
|
||||
|
||||
#undef S0
|
||||
#undef S1
|
||||
#undef s0
|
||||
#undef s1
|
||||
|
||||
static void
|
||||
sha256_write_byte_block(sha256_t *p)
|
||||
{
|
||||
uint32_t data32[16];
|
||||
unsigned i;
|
||||
for (i = 0; i < 16; i++)
|
||||
data32[i] =
|
||||
((uint32_t)(p->buffer[i * 4 ]) << 24) +
|
||||
((uint32_t)(p->buffer[i * 4 + 1]) << 16) +
|
||||
((uint32_t)(p->buffer[i * 4 + 2]) << 8) +
|
||||
((uint32_t)(p->buffer[i * 4 + 3]));
|
||||
sha256_transform(p->state, data32);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
sha256_hash(unsigned char *buf, const unsigned char *data, size_t size)
|
||||
{
|
||||
sha256_t hash;
|
||||
sha256_init(&hash);
|
||||
sha256_update(&hash, data, size);
|
||||
sha256_final(&hash, buf);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
sha256_update(sha256_t *p, const unsigned char *data, size_t size)
|
||||
{
|
||||
uint32_t curBufferPos = (uint32_t)p->count & 0x3F;
|
||||
while (size > 0)
|
||||
{
|
||||
p->buffer[curBufferPos++] = *data++;
|
||||
p->count++;
|
||||
size--;
|
||||
if (curBufferPos == 64)
|
||||
{
|
||||
curBufferPos = 0;
|
||||
sha256_write_byte_block(p);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
sha256_final(sha256_t *p, unsigned char *digest)
|
||||
{
|
||||
uint64_t lenInBits = (p->count << 3);
|
||||
uint32_t curBufferPos = (uint32_t)p->count & 0x3F;
|
||||
unsigned i;
|
||||
p->buffer[curBufferPos++] = 0x80;
|
||||
while (curBufferPos != (64 - 8))
|
||||
{
|
||||
curBufferPos &= 0x3F;
|
||||
if (curBufferPos == 0)
|
||||
sha256_write_byte_block(p);
|
||||
p->buffer[curBufferPos++] = 0;
|
||||
}
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
p->buffer[curBufferPos++] = (unsigned char)(lenInBits >> 56);
|
||||
lenInBits <<= 8;
|
||||
}
|
||||
sha256_write_byte_block(p);
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
*digest++ = (unsigned char)(p->state[i] >> 24);
|
||||
*digest++ = (unsigned char)(p->state[i] >> 16);
|
||||
*digest++ = (unsigned char)(p->state[i] >> 8);
|
||||
*digest++ = (unsigned char)(p->state[i]);
|
||||
}
|
||||
sha256_init(p);
|
||||
}
|
|
@ -1,24 +0,0 @@
|
|||
/* Sha256.h -- SHA-256 Hash
|
||||
2010-06-11 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __CRYPTO_SHA256_H
|
||||
#define __CRYPTO_SHA256_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define SHA256_DIGEST_SIZE 32
|
||||
|
||||
typedef struct sha256_t
|
||||
{
|
||||
uint32_t state[8];
|
||||
uint64_t count;
|
||||
unsigned char buffer[64];
|
||||
} sha256_t;
|
||||
|
||||
void sha256_init(sha256_t *p);
|
||||
void sha256_update(sha256_t *p, const unsigned char *data, size_t size);
|
||||
void sha256_final(sha256_t *p, unsigned char *digest);
|
||||
void sha256_hash(unsigned char *buf, const unsigned char *data, size_t size);
|
||||
|
||||
#endif
|
|
@ -1,12 +0,0 @@
|
|||
{
|
||||
"name": "xxhash",
|
||||
"version": "0.8.2",
|
||||
"repo": "Cyan4973/xxhash",
|
||||
"description": "Extremely fast non-cryptographic hash algorithm",
|
||||
"keywords": ["xxhash", "hashing"],
|
||||
"license": "BSD-2-Clause",
|
||||
"src": [
|
||||
"xxhash.c",
|
||||
"xxhash.h"
|
||||
]
|
||||
}
|
|
@ -1,42 +0,0 @@
|
|||
/*
|
||||
* xxHash - Extremely Fast Hash algorithm
|
||||
* Copyright (C) 2012-2023 Yann Collet
|
||||
*
|
||||
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following disclaimer
|
||||
* in the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* You can contact the author at:
|
||||
* - xxHash homepage: https://www.xxhash.com
|
||||
* - xxHash source repository: https://github.com/Cyan4973/xxHash
|
||||
*/
|
||||
|
||||
/*
|
||||
* xxhash.c instantiates functions defined in xxhash.h
|
||||
*/
|
||||
|
||||
#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
|
||||
#define XXH_IMPLEMENTATION /* access definitions */
|
||||
|
||||
#include "xxhash.h"
|
File diff suppressed because it is too large
Load diff
|
@ -1,693 +0,0 @@
|
|||
#include "ggml.h"
|
||||
|
||||
#include <cstdlib> /* abort() */
|
||||
#include <cstddef>
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
#include <stdexcept>
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "xxhash/xxhash.h"
|
||||
#include "sha1/sha1.h"
|
||||
#include "sha256/sha256.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
// uuid.uuid5(uuid.NAMESPACE_URL, 'en.wikipedia.org/wiki/Llama.cpp')
|
||||
#define UUID_NAMESPACE_LLAMA_CPP "ef001206-dadc-5f6d-a15f-3359e577d4e5"
|
||||
#define UUID_NAMESPACE_LLAMA_CPP_HEX 0xef, 0x00, 0x12, 0x06, 0xda, 0xdc, 0x5f, 0x6d, 0xa1, 0x5f, 0x33, 0x59, 0xe5, 0x77, 0xd4, 0xe5
|
||||
|
||||
|
||||
#define HASH_TYPE_SHA256_STR "sha256"
|
||||
#define HASH_TYPE_SHA1_STR "sha1"
|
||||
#define HASH_TYPE_XXH64_STR "xxh64"
|
||||
#define HASH_TYPE_UUID_STR "uuid"
|
||||
|
||||
|
||||
typedef enum {
|
||||
HASH_EXIT_SUCCESS = 0, // All hash has been generated or validated
|
||||
HASH_EXIT_FAILURE = 1, // Generic Failure
|
||||
HASH_EXIT_MISMATCH = 2, // Hash mismatched during validation
|
||||
HASH_EXIT_MANIFEST_MISSING_ENTRY = 3, // Hash attempted validation but missing entry in manifest
|
||||
HASH_EXIT_MANIFEST_UNKNOWN_HASH = 4, // Manifest is present, but we do not know any hash format within it
|
||||
HASH_EXIT_MANIFEST_FILE_ERROR = 5 // Manifest is either missing or not a known format
|
||||
} hash_exit_code_t;
|
||||
|
||||
|
||||
typedef enum {
|
||||
HASH_MANIFEST_NOT_FOUND,
|
||||
HASH_MANIFEST_MISMATCH,
|
||||
HASH_MANIFEST_OK,
|
||||
} hash_manifest_result_t;
|
||||
|
||||
|
||||
struct hash_params {
|
||||
std::string input;
|
||||
bool xxh64 = false;
|
||||
bool sha1 = false;
|
||||
bool sha256 = false;
|
||||
bool uuid = false;
|
||||
|
||||
bool no_layer = false;
|
||||
|
||||
bool manifest_is_usable = false;
|
||||
std::string manifest_file;
|
||||
};
|
||||
|
||||
struct manifest_check_params {
|
||||
bool xxh64 = false;
|
||||
bool sha1 = false;
|
||||
bool sha256 = false;
|
||||
bool uuid = false;
|
||||
};
|
||||
|
||||
static char const * hash_manifest_result_to_str(hash_manifest_result_t value) {
|
||||
switch (value) {
|
||||
case HASH_MANIFEST_NOT_FOUND: return "Not Found";
|
||||
case HASH_MANIFEST_MISMATCH: return "Mismatch";
|
||||
case HASH_MANIFEST_OK: return "Ok";
|
||||
}
|
||||
return "?";
|
||||
}
|
||||
|
||||
static char const * hash_exit_code_to_str(hash_exit_code_t value) {
|
||||
switch (value) {
|
||||
case HASH_EXIT_SUCCESS: return "Success";
|
||||
case HASH_EXIT_FAILURE: return "Failure";
|
||||
case HASH_EXIT_MISMATCH: return "Mismatch";
|
||||
case HASH_EXIT_MANIFEST_MISSING_ENTRY: return "Manifest Missing Entry";
|
||||
case HASH_EXIT_MANIFEST_UNKNOWN_HASH: return "Manifest Unknown Hash";
|
||||
case HASH_EXIT_MANIFEST_FILE_ERROR: return "Manifest File Error";
|
||||
}
|
||||
return "?";
|
||||
}
|
||||
|
||||
static void hash_print_usage(const char * executable) {
|
||||
const hash_params default_params;
|
||||
printf("\n");
|
||||
printf("usage: %s [options] GGUF_IN\n", executable);
|
||||
printf("\n");
|
||||
printf("Hash a GGUF file");
|
||||
printf("\n");
|
||||
printf("options:\n");
|
||||
printf(" -h, --help show this help message and exit\n");
|
||||
printf(" --xxh64 use xxh64 hash\n");
|
||||
printf(" --sha1 use sha1 hash\n");
|
||||
printf(" --sha256 use sha256 hash\n");
|
||||
printf(" --all use all hash\n");
|
||||
printf(" --no-layer exclude per layer hash\n");
|
||||
printf(" --uuid generate UUIDv5 ID\n");
|
||||
printf(" -c, --check <manifest> verify against a manifest\n");
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static void hash_params_parse_ex(int argc, const char ** argv, hash_params & params) {
|
||||
std::string arg;
|
||||
bool invalid_param = false;
|
||||
const std::string arg_prefix = "--";
|
||||
|
||||
int arg_idx = 1;
|
||||
for (; arg_idx < argc && strncmp(argv[arg_idx], "--", 2) == 0; arg_idx++) {
|
||||
arg = argv[arg_idx];
|
||||
if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
|
||||
std::replace(arg.begin(), arg.end(), '_', '-');
|
||||
}
|
||||
|
||||
bool arg_found = false;
|
||||
if (arg == "-h" || arg == "--help") {
|
||||
hash_print_usage(argv[0]);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if (arg == "--xxh64") {
|
||||
arg_found = true;
|
||||
params.xxh64 = true;
|
||||
}
|
||||
|
||||
if (arg == "--sha1") {
|
||||
arg_found = true;
|
||||
params.sha1 = true;
|
||||
}
|
||||
|
||||
if (arg == "--uuid") {
|
||||
arg_found = true;
|
||||
params.uuid = true;
|
||||
}
|
||||
|
||||
if (arg == "--sha256") {
|
||||
arg_found = true;
|
||||
params.sha256 = true;
|
||||
}
|
||||
|
||||
if (arg == "--all") {
|
||||
arg_found = true;
|
||||
params.sha256 = true;
|
||||
params.sha1 = true;
|
||||
params.xxh64 = true;
|
||||
}
|
||||
|
||||
if (arg == "--no-layer") {
|
||||
arg_found = true;
|
||||
params.no_layer = true;
|
||||
}
|
||||
|
||||
if (arg == "-c" || arg == "--check") {
|
||||
if (++arg_idx >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
arg_found = true;
|
||||
params.manifest_file = argv[arg_idx];
|
||||
}
|
||||
|
||||
if (!arg_found) {
|
||||
throw std::invalid_argument("error: unknown argument: " + arg);
|
||||
}
|
||||
}
|
||||
|
||||
if (invalid_param) {
|
||||
throw std::invalid_argument("error: invalid parameter for argument:" + arg);
|
||||
}
|
||||
|
||||
if (argc - arg_idx < 1) {
|
||||
throw std::invalid_argument("error: bad arguments");
|
||||
}
|
||||
|
||||
params.input = argv[arg_idx++];
|
||||
}
|
||||
|
||||
static bool hash_params_parse(int argc, const char ** argv, hash_params & params) {
|
||||
bool result = true;
|
||||
try {
|
||||
hash_params_parse_ex(argc, argv, params);
|
||||
}
|
||||
catch (const std::invalid_argument & ex) {
|
||||
fprintf(stderr, "%s\n", ex.what());
|
||||
hash_print_usage(argv[0]);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static bool manifest_type(const std::string & manifest_file, manifest_check_params & manifest_check) {
|
||||
if (manifest_file.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::ifstream file(manifest_file);
|
||||
if (!file.is_open()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string manifest_entry_line;
|
||||
while (getline(file, manifest_entry_line)) {
|
||||
// hash_type_str hash_str tensor_name
|
||||
// e.g. 'xxh64 f66e9cd66a4396a0 test.gguf:tensor_0'
|
||||
std::istringstream line_stream(manifest_entry_line);
|
||||
std::string file_hash_type;
|
||||
if (line_stream >> file_hash_type) {
|
||||
if (file_hash_type == HASH_TYPE_SHA256_STR) {
|
||||
manifest_check.sha256 = true;
|
||||
} else if (file_hash_type == HASH_TYPE_SHA1_STR) {
|
||||
manifest_check.sha1 = true;
|
||||
} else if (file_hash_type == HASH_TYPE_XXH64_STR) {
|
||||
manifest_check.xxh64 = true;
|
||||
} else if (file_hash_type == HASH_TYPE_UUID_STR) {
|
||||
manifest_check.uuid = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static hash_manifest_result_t manifest_verify(const std::string& manifest_file, const std::string& hash_type_str, const std::string& hash_str, const std::string& tensor_name) {
|
||||
if (manifest_file.empty()) {
|
||||
return HASH_MANIFEST_NOT_FOUND;
|
||||
}
|
||||
|
||||
std::ifstream file(manifest_file);
|
||||
if (!file.is_open()) {
|
||||
return HASH_MANIFEST_NOT_FOUND;
|
||||
}
|
||||
|
||||
std::string manifest_entry_line;
|
||||
while (getline(file, manifest_entry_line)) {
|
||||
std::istringstream line_stream(manifest_entry_line);
|
||||
std::string file_hash_type;
|
||||
std::string file_hash;
|
||||
std::string file_tensor_name;
|
||||
if (line_stream >> file_hash_type >> file_hash >> file_tensor_name) {
|
||||
// Line parsed. Check hash validity
|
||||
|
||||
if (file_hash_type != hash_type_str) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (file_tensor_name != tensor_name) {
|
||||
continue;
|
||||
}
|
||||
|
||||
return (file_hash == hash_str) ? HASH_MANIFEST_OK : HASH_MANIFEST_MISMATCH;
|
||||
}
|
||||
}
|
||||
|
||||
return HASH_MANIFEST_NOT_FOUND;
|
||||
}
|
||||
|
||||
static void generate_uuidv5(const unsigned char sha1_digest[20], unsigned char uuid[16]) {
|
||||
// Ref: https://www.rfc-editor.org/rfc/rfc9562.html#section-5.5
|
||||
// Assumes that digest was processed correctly with the expected namespace
|
||||
for (int i = 0; i < 16; i++) {
|
||||
uuid[i] = sha1_digest[i];
|
||||
}
|
||||
|
||||
// Set bits corresponding to UUID ver 5
|
||||
uuid[ 6] &= ~(0xF << 4);
|
||||
uuid[ 6] |= (5 << 4);
|
||||
|
||||
// Set bits corresponding to UUID variant 0b10XX
|
||||
uuid[ 8] &= ~(0xc << 4);
|
||||
uuid[ 8] |= (0x8 << 4);
|
||||
}
|
||||
|
||||
static hash_exit_code_t gguf_hash(const hash_params & hash_params) {
|
||||
const std::string & fname = hash_params.input;
|
||||
struct ggml_context * ctx_data = NULL;
|
||||
|
||||
struct gguf_init_params params = {
|
||||
/*.no_alloc = */ false,
|
||||
/*.ctx = */ &ctx_data,
|
||||
};
|
||||
|
||||
// xxh64 init
|
||||
XXH64_state_t* xxh64_model_hash_state = NULL;
|
||||
if (hash_params.xxh64) {
|
||||
xxh64_model_hash_state = XXH64_createState();
|
||||
if (xxh64_model_hash_state==NULL) {
|
||||
abort();
|
||||
}
|
||||
|
||||
XXH64_hash_t const seed = 0;
|
||||
if (XXH64_reset(xxh64_model_hash_state, seed) == XXH_ERROR) {
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
// sha1 init
|
||||
SHA1_CTX sha1_model_hash_ctx;
|
||||
if (hash_params.sha1) {
|
||||
SHA1Init(&sha1_model_hash_ctx);
|
||||
}
|
||||
|
||||
// sha256 init
|
||||
sha256_t sha256_model_hash_ctx;
|
||||
if (hash_params.sha256) {
|
||||
sha256_init(&sha256_model_hash_ctx);
|
||||
}
|
||||
|
||||
// sha1 for uuid init
|
||||
SHA1_CTX sha1_for_uuid_ctx;
|
||||
if (hash_params.uuid) {
|
||||
unsigned char const uuidv5_namespace[] = {UUID_NAMESPACE_LLAMA_CPP_HEX};
|
||||
SHA1Init(&sha1_for_uuid_ctx);
|
||||
SHA1Update( &sha1_for_uuid_ctx, (unsigned char const *)uuidv5_namespace, sizeof(uuidv5_namespace));
|
||||
}
|
||||
|
||||
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
|
||||
const int n_tensors = gguf_get_n_tensors(ctx);
|
||||
bool tensor_layer_in_manifest = false;
|
||||
bool model_in_manifest = false;
|
||||
bool tensor_layer_has_mismatch = false;
|
||||
bool model_has_mismatch = false;
|
||||
for (int i = 0; i < n_tensors; ++i) {
|
||||
const char * name = gguf_get_tensor_name(ctx, i);
|
||||
struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
|
||||
auto n_bytes = ggml_nbytes(cur);
|
||||
auto *raw_data = cur->data;
|
||||
const std::string tensor_layer_name = fname + ":" + name;
|
||||
|
||||
if (hash_params.xxh64) {
|
||||
|
||||
if (!hash_params.no_layer) {
|
||||
// Per Layer Hash
|
||||
XXH64_hash_t hash = XXH64(raw_data, n_bytes, 0);
|
||||
|
||||
char hex_result[17];
|
||||
for (int offset = 0; offset < 8; offset++) {
|
||||
unsigned int shift_bits_by = (8 * (8 - offset - 1));
|
||||
snprintf( ( hex_result + (2*offset)), sizeof(hex_result) - (2*offset), "%02x", (unsigned char) (hash >> shift_bits_by)&0xff);
|
||||
}
|
||||
|
||||
if (hash_params.manifest_is_usable) {
|
||||
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_XXH64_STR, hex_result, tensor_layer_name);
|
||||
|
||||
switch (verify_result) {
|
||||
case HASH_MANIFEST_NOT_FOUND:
|
||||
break;
|
||||
case HASH_MANIFEST_MISMATCH:
|
||||
tensor_layer_in_manifest = true;
|
||||
tensor_layer_has_mismatch = true;
|
||||
break;
|
||||
case HASH_MANIFEST_OK:
|
||||
tensor_layer_in_manifest = true;
|
||||
break;
|
||||
}
|
||||
|
||||
printf("%-8s %-s %s - %s\n", HASH_TYPE_XXH64_STR, hex_result, tensor_layer_name.c_str(), hash_manifest_result_to_str(verify_result));
|
||||
} else {
|
||||
printf("%-8s %-s %s\n", HASH_TYPE_XXH64_STR, hex_result, tensor_layer_name.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
// Overall Model Hash
|
||||
if (XXH64_update(xxh64_model_hash_state, raw_data, n_bytes) == XXH_ERROR) abort();
|
||||
}
|
||||
|
||||
if (hash_params.sha1) {
|
||||
|
||||
if (!hash_params.no_layer) {
|
||||
// Per Layer Hash
|
||||
char result[21]; // sha1 outputs 20 bytes
|
||||
SHA1( result, (const char *)raw_data, n_bytes);
|
||||
|
||||
char hex_result[41] = {0};
|
||||
for (int offset = 0; offset < 20; offset++) {
|
||||
snprintf( ( hex_result + (2*offset)), sizeof(hex_result) - (2*offset), "%02x", result[offset]&0xff);
|
||||
}
|
||||
|
||||
if (hash_params.manifest_is_usable) {
|
||||
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_SHA1_STR, hex_result, tensor_layer_name);
|
||||
|
||||
switch (verify_result) {
|
||||
case HASH_MANIFEST_NOT_FOUND:
|
||||
break;
|
||||
case HASH_MANIFEST_MISMATCH:
|
||||
tensor_layer_in_manifest = true;
|
||||
tensor_layer_has_mismatch = true;
|
||||
break;
|
||||
case HASH_MANIFEST_OK:
|
||||
tensor_layer_in_manifest = true;
|
||||
break;
|
||||
}
|
||||
|
||||
printf("%-8s %-s %s - %s\n", HASH_TYPE_SHA1_STR, hex_result, tensor_layer_name.c_str(), hash_manifest_result_to_str(verify_result));
|
||||
} else {
|
||||
printf("%-8s %-s %s\n", HASH_TYPE_SHA1_STR, hex_result, tensor_layer_name.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
// Overall Model Hash
|
||||
SHA1Update( &sha1_model_hash_ctx, (unsigned char const *)raw_data, n_bytes);
|
||||
}
|
||||
|
||||
if (hash_params.sha256) {
|
||||
|
||||
if (!hash_params.no_layer) {
|
||||
// Per Layer Hash
|
||||
unsigned char result[SHA256_DIGEST_SIZE]; // sha256 outputs 32 bytes
|
||||
sha256_hash((unsigned char*) result, (const unsigned char *)raw_data, n_bytes);
|
||||
|
||||
char hex_result[SHA256_DIGEST_SIZE * 2 + 1] = {0};
|
||||
for (int offset = 0; offset < SHA256_DIGEST_SIZE; offset++) {
|
||||
snprintf( ( hex_result + (2*offset)), sizeof(hex_result) - (2*offset), "%02x", result[offset]&0xff);
|
||||
}
|
||||
|
||||
if (hash_params.manifest_is_usable) {
|
||||
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_SHA256_STR, hex_result, tensor_layer_name);
|
||||
|
||||
switch (verify_result) {
|
||||
case HASH_MANIFEST_NOT_FOUND:
|
||||
break;
|
||||
case HASH_MANIFEST_MISMATCH:
|
||||
tensor_layer_in_manifest = true;
|
||||
tensor_layer_has_mismatch = true;
|
||||
break;
|
||||
case HASH_MANIFEST_OK:
|
||||
tensor_layer_in_manifest = true;
|
||||
break;
|
||||
}
|
||||
|
||||
printf("%-8s %-s %s - %s\n", HASH_TYPE_SHA256_STR, hex_result, tensor_layer_name.c_str(), hash_manifest_result_to_str(verify_result));
|
||||
} else {
|
||||
printf("%-8s %-s %s\n", HASH_TYPE_SHA256_STR, hex_result, tensor_layer_name.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
// Overall Model Hash
|
||||
sha256_update( &sha256_model_hash_ctx, (unsigned char const *)raw_data, n_bytes);
|
||||
}
|
||||
|
||||
if (hash_params.uuid) {
|
||||
SHA1Update( &sha1_for_uuid_ctx, (unsigned char const *)raw_data, n_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
if (hash_params.xxh64) {
|
||||
XXH64_hash_t const hash = XXH64_digest(xxh64_model_hash_state);
|
||||
|
||||
char hex_result[17];
|
||||
for (int offset = 0; offset < 8; offset++) {
|
||||
unsigned int shift_bits_by = (8 * (8 - offset - 1));
|
||||
snprintf( ( hex_result + (2*offset)), sizeof(hex_result) - (2*offset), "%02x", (unsigned char) (hash >> shift_bits_by)&0xff);
|
||||
}
|
||||
|
||||
if (hash_params.manifest_is_usable) {
|
||||
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_XXH64_STR, hex_result, fname);
|
||||
|
||||
switch (verify_result) {
|
||||
case HASH_MANIFEST_NOT_FOUND:
|
||||
break;
|
||||
case HASH_MANIFEST_MISMATCH:
|
||||
model_in_manifest = true;
|
||||
model_has_mismatch = true;
|
||||
break;
|
||||
case HASH_MANIFEST_OK:
|
||||
model_in_manifest = true;
|
||||
break;
|
||||
}
|
||||
|
||||
printf("%-8s %-s %s - %s\n", HASH_TYPE_XXH64_STR, hex_result, fname.c_str(), hash_manifest_result_to_str(verify_result));
|
||||
} else {
|
||||
printf("%-8s %-s %s\n", HASH_TYPE_XXH64_STR, hex_result, fname.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
if (hash_params.sha1) {
|
||||
unsigned char result[21];
|
||||
SHA1Final(result, &sha1_model_hash_ctx);
|
||||
|
||||
char hex_result[41];
|
||||
for (int offset = 0; offset < 20; offset++) {
|
||||
snprintf( ( hex_result + (2*offset)), sizeof(hex_result) - (2*offset), "%02x", result[offset]&0xff);
|
||||
}
|
||||
|
||||
if (hash_params.manifest_is_usable) {
|
||||
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_SHA1_STR, hex_result, fname);
|
||||
|
||||
switch (verify_result) {
|
||||
case HASH_MANIFEST_NOT_FOUND:
|
||||
break;
|
||||
case HASH_MANIFEST_MISMATCH:
|
||||
model_in_manifest = true;
|
||||
model_has_mismatch = true;
|
||||
break;
|
||||
case HASH_MANIFEST_OK:
|
||||
model_in_manifest = true;
|
||||
break;
|
||||
}
|
||||
|
||||
printf("%-8s %-s %s - %s\n", HASH_TYPE_SHA1_STR, hex_result, fname.c_str(), hash_manifest_result_to_str(verify_result));
|
||||
} else {
|
||||
printf("%-8s %-s %s\n", HASH_TYPE_SHA1_STR, hex_result, fname.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
if (hash_params.sha256) {
|
||||
unsigned char result[SHA256_DIGEST_SIZE]; // sha256 outputs 32 bytes
|
||||
sha256_final( &sha256_model_hash_ctx, result);
|
||||
|
||||
char hex_result[SHA256_DIGEST_SIZE * 2 + 1] = {0};
|
||||
for (int offset = 0; offset < SHA256_DIGEST_SIZE; offset++) {
|
||||
snprintf( ( hex_result + (2*offset)), sizeof(hex_result) - (2*offset), "%02x", result[offset]&0xff);
|
||||
}
|
||||
|
||||
if (hash_params.manifest_is_usable) {
|
||||
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_SHA256_STR, hex_result, fname);
|
||||
|
||||
switch (verify_result) {
|
||||
case HASH_MANIFEST_NOT_FOUND:
|
||||
break;
|
||||
case HASH_MANIFEST_MISMATCH:
|
||||
model_in_manifest = true;
|
||||
model_has_mismatch = true;
|
||||
break;
|
||||
case HASH_MANIFEST_OK:
|
||||
model_in_manifest = true;
|
||||
break;
|
||||
}
|
||||
|
||||
printf("%-8s %-s %s - %s\n", HASH_TYPE_SHA256_STR, hex_result, fname.c_str(), hash_manifest_result_to_str(verify_result));
|
||||
} else {
|
||||
printf("%-8s %-s %s\n", HASH_TYPE_SHA256_STR, hex_result, fname.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
if (hash_params.uuid) {
|
||||
unsigned char result[21];
|
||||
SHA1Final(result, &sha1_for_uuid_ctx);
|
||||
|
||||
unsigned char uuid[16];
|
||||
generate_uuidv5(result, uuid);
|
||||
|
||||
char string_buffer[37] = {0};
|
||||
snprintf(string_buffer, sizeof(string_buffer), "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
|
||||
uuid[0], uuid[1], uuid[2], uuid[3],
|
||||
uuid[4], uuid[5], uuid[6], uuid[7],
|
||||
uuid[8], uuid[9], uuid[10], uuid[11],
|
||||
uuid[12], uuid[13], uuid[14], uuid[15]);
|
||||
|
||||
if (hash_params.manifest_is_usable) {
|
||||
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_SHA256_STR, string_buffer, fname);
|
||||
|
||||
switch (verify_result) {
|
||||
case HASH_MANIFEST_NOT_FOUND:
|
||||
break;
|
||||
case HASH_MANIFEST_MISMATCH:
|
||||
model_in_manifest = true;
|
||||
model_has_mismatch = true;
|
||||
break;
|
||||
case HASH_MANIFEST_OK:
|
||||
model_in_manifest = true;
|
||||
break;
|
||||
}
|
||||
|
||||
printf("%-8s %-s %s - %s\n", HASH_TYPE_UUID_STR, string_buffer, fname.c_str(), hash_manifest_result_to_str(verify_result));
|
||||
} else {
|
||||
printf("%-8s %-s %s\n", HASH_TYPE_UUID_STR, string_buffer, fname.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
ggml_free(ctx_data);
|
||||
gguf_free(ctx);
|
||||
|
||||
|
||||
if (hash_params.manifest_is_usable) {
|
||||
// In hash verification mode
|
||||
|
||||
if (!model_in_manifest) {
|
||||
// model missing in manifest?
|
||||
|
||||
// Check tensor layer...
|
||||
if (!tensor_layer_in_manifest) {
|
||||
// Still missing? Maybe we are reading the wrong manifest.
|
||||
return HASH_EXIT_MANIFEST_MISSING_ENTRY;
|
||||
}
|
||||
|
||||
if (tensor_layer_has_mismatch) {
|
||||
// Per tensor check found error
|
||||
return HASH_EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// All per tensor layer checks passed? Sounds good enough.
|
||||
return HASH_EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
// Overall model check passed, but let's check per layer just in case
|
||||
// If missing, we don't care too much as the overall model checked
|
||||
if (tensor_layer_in_manifest && tensor_layer_has_mismatch) {
|
||||
return HASH_EXIT_FAILURE;
|
||||
}
|
||||
|
||||
if (model_has_mismatch) {
|
||||
// model has failed hash somewhere in the model
|
||||
return HASH_EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// All checks appears to be fine
|
||||
return HASH_EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
// In hash generation mode
|
||||
return HASH_EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
int main(int argc, const char ** argv) {
|
||||
hash_params params;
|
||||
manifest_check_params manifest_check;
|
||||
hash_params_parse(argc, argv, params);
|
||||
|
||||
if (!params.manifest_file.empty()) {
|
||||
if (!manifest_type(params.manifest_file, manifest_check)) {
|
||||
printf("ERROR cannot open manifest %s", params.manifest_file.c_str());
|
||||
return HASH_EXIT_MANIFEST_FILE_ERROR;
|
||||
}
|
||||
|
||||
if (!manifest_check.sha256 && !manifest_check.sha1 && !manifest_check.xxh64 && !manifest_check.uuid) {
|
||||
printf("ERROR manifest does not have any known hash format in %s", params.manifest_file.c_str());
|
||||
return HASH_EXIT_MANIFEST_UNKNOWN_HASH;
|
||||
}
|
||||
|
||||
printf("manifest %s", params.manifest_file.c_str());
|
||||
|
||||
if (manifest_check.sha256) {
|
||||
printf(" sha256");
|
||||
}
|
||||
|
||||
if (manifest_check.sha1) {
|
||||
printf(" sha1");
|
||||
}
|
||||
|
||||
if (manifest_check.xxh64) {
|
||||
printf(" xxh64");
|
||||
}
|
||||
|
||||
if (manifest_check.uuid) {
|
||||
printf(" uuid");
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
|
||||
// Autoselect the highest security hash if manifest is provided but
|
||||
// the user has not specifically defined the hash they care about
|
||||
if (!params.xxh64 && !params.sha1 && !params.uuid && !params.sha256) {
|
||||
// User has not selected a specific value, pick most secure hash
|
||||
if (manifest_check.sha256) {
|
||||
params.sha256 = true;
|
||||
} else if (manifest_check.sha1) {
|
||||
params.sha1 = true;
|
||||
} else if (manifest_check.xxh64) {
|
||||
params.xxh64 = true;
|
||||
} else if (manifest_check.uuid) {
|
||||
params.uuid = true;
|
||||
}
|
||||
}
|
||||
|
||||
params.manifest_is_usable = true;
|
||||
}
|
||||
|
||||
// By default if no swich argument provided, assume xxh64
|
||||
if (!params.xxh64 && !params.sha1 && !params.uuid && !params.sha256) {
|
||||
params.xxh64 = true;
|
||||
}
|
||||
|
||||
hash_exit_code_t exit_code = gguf_hash(params);
|
||||
|
||||
if (params.manifest_is_usable) {
|
||||
printf("\nVerification results for %s - %s\n", params.manifest_file.c_str(), hash_exit_code_to_str(exit_code));
|
||||
}
|
||||
|
||||
return exit_code;
|
||||
}
|
|
@ -741,7 +741,7 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
|
|||
UNUSED(blocklen);
|
||||
|
||||
#if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__)
|
||||
#if defined(__ARM_FEATURE_SVE)
|
||||
#if defined(__ARM_FEATURE_SVE) && ! defined(LLAMA_NOSVE)
|
||||
if (ggml_cpu_has_sve() && ggml_cpu_get_sve_cnt() == QK8_0) {
|
||||
const void * b_ptr = vx;
|
||||
const void * a_ptr = vy;
|
||||
|
@ -2081,7 +2081,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
|
|||
UNUSED(blocklen);
|
||||
|
||||
#if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__)
|
||||
#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8)
|
||||
#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8) && ! defined(LLAMA_NOSVE)
|
||||
if (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0) {
|
||||
const void * b_ptr = vx;
|
||||
const void * a_ptr = vy;
|
||||
|
|
|
@ -59,7 +59,7 @@ struct ggml_compute_params {
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__ARM_FEATURE_SVE)
|
||||
#if defined(__ARM_FEATURE_SVE) && ! defined(LLAMA_NOSVE)
|
||||
#include <arm_sve.h>
|
||||
#include <sys/prctl.h>
|
||||
#endif
|
||||
|
|
|
@ -1829,7 +1829,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
|
|||
int ib = 0;
|
||||
float sumf = 0;
|
||||
|
||||
#if defined(__ARM_FEATURE_SVE)
|
||||
#if defined(__ARM_FEATURE_SVE) && ! defined(LLAMA_NOSVE)
|
||||
svfloat32_t sumv0 = svdup_n_f32(0.0f);
|
||||
svfloat32_t sumv1 = svdup_n_f32(0.0f);
|
||||
|
||||
|
@ -3419,7 +3419,7 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * r
|
|||
int ib = 0;
|
||||
float sumf = 0;
|
||||
|
||||
#if defined(__ARM_FEATURE_SVE)
|
||||
#if defined(__ARM_FEATURE_SVE) && ! defined(LLAMA_NOSVE)
|
||||
svfloat32_t sumv0 = svdup_n_f32(0.0f);
|
||||
svfloat32_t sumv1 = svdup_n_f32(0.0f);
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@
|
|||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
#if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_MATMUL_INT8)
|
||||
#if (defined(__ARM_FEATURE_SVE) && ! defined(LLAMA_NOSVE)) || defined(__ARM_FEATURE_MATMUL_INT8)
|
||||
#undef GGML_USE_LLAMAFILE
|
||||
#endif
|
||||
|
||||
|
@ -2442,7 +2442,7 @@ static void ggml_init_arm_arch_features(void) {
|
|||
ggml_arm_arch_features.has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
|
||||
ggml_arm_arch_features.has_sve = !!(hwcap & HWCAP_SVE);
|
||||
|
||||
#if defined(__ARM_FEATURE_SVE)
|
||||
#if defined(__ARM_FEATURE_SVE) && ! defined(LLAMA_NOSVE)
|
||||
ggml_arm_arch_features.sve_cnt = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
|
||||
#endif
|
||||
#elif defined(__APPLE__)
|
||||
|
@ -2479,7 +2479,7 @@ static void ggml_init_arm_arch_features(void) {
|
|||
ggml_arm_arch_features.has_i8mm = 0;
|
||||
#endif
|
||||
|
||||
#if defined(__ARM_FEATURE_SVE)
|
||||
#if defined(__ARM_FEATURE_SVE) && ! defined(LLAMA_NOSVE)
|
||||
ggml_arm_arch_features.has_sve = 1;
|
||||
ggml_arm_arch_features.sve_cnt = 16;
|
||||
#else
|
||||
|
@ -13946,7 +13946,7 @@ int ggml_cpu_has_dotprod(void) {
|
|||
}
|
||||
|
||||
int ggml_cpu_has_sve(void) {
|
||||
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SVE)
|
||||
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SVE) && ! defined(LLAMA_NOSVE)
|
||||
return ggml_arm_arch_features.has_sve;
|
||||
#else
|
||||
return 0;
|
||||
|
@ -13962,7 +13962,7 @@ int ggml_cpu_has_matmul_int8(void) {
|
|||
}
|
||||
|
||||
int ggml_cpu_get_sve_cnt(void) {
|
||||
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SVE)
|
||||
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SVE) && ! defined(LLAMA_NOSVE)
|
||||
return ggml_arm_arch_features.sve_cnt;
|
||||
#else
|
||||
return 0;
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef __ARM_FEATURE_SVE
|
||||
#if defined(__ARM_FEATURE_SVE) && ! defined(LLAMA_NOSVE)
|
||||
#include <arm_sve.h>
|
||||
#endif // __ARM_FEATURE_SVE
|
||||
|
||||
|
|
|
@ -4344,7 +4344,7 @@ def main(launch_args,start_server=True):
|
|||
show_gui()
|
||||
except Exception as ex:
|
||||
exitcounter = 999
|
||||
ermsg = "Reason: " + str(ex) + "\nFile selection GUI unsupported.\ncustomtkinter python module required!\n\nPlease check command line options with --help"
|
||||
ermsg = "Reason: " + str(ex) + "\nFile selection GUI unsupported.\ncustomtkinter python module required!\n\nYou must use the command line instead, e.g. python ./koboldcpp.py --help"
|
||||
show_gui_msgbox("Warning, GUI failed to start",ermsg)
|
||||
if args.skiplauncher:
|
||||
print("Note: In order to use --skiplauncher, you need to specify a model with --model")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue