mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .devops/main-intel.Dockerfile # .devops/main-vulkan.Dockerfile # .devops/server-intel.Dockerfile # .devops/server-vulkan.Dockerfile # .github/workflows/bench.yml # .github/workflows/build.yml # .github/workflows/python-lint.yml # .github/workflows/server.yml # .gitignore # Makefile # README-sycl.md # README.md # ci/run.sh # flake.lock # llama.cpp # models/ggml-vocab-falcon.gguf # models/ggml-vocab-llama-spm.gguf # models/ggml-vocab-mpt.gguf # models/ggml-vocab-stablelm.gguf # models/ggml-vocab-starcoder.gguf # requirements.txt # scripts/check-requirements.sh # tests/CMakeLists.txt # tests/test-backend-ops.cpp # tests/test-grammar-integration.cpp # tests/test-tokenizer-0-bpe.py # tests/test-tokenizer-0-spm.py # tests/test-tokenizer-1-spm.cpp
This commit is contained in:
commit
17a24d753c
52 changed files with 4978 additions and 1249 deletions
|
@ -24,6 +24,7 @@ struct Stats {
|
|||
};
|
||||
|
||||
struct StatParams {
|
||||
std::string dataset;
|
||||
std::string ofile = "imatrix.dat";
|
||||
int n_output_frequency = 10;
|
||||
int verbosity = 1;
|
||||
|
@ -47,7 +48,7 @@ private:
|
|||
std::vector<float> m_src1_data;
|
||||
std::vector<char> m_ids; // the expert ids from ggml_mul_mat_id
|
||||
//
|
||||
void save_imatrix(const char * file_name) const;
|
||||
void save_imatrix(const char * file_name, const char * dataset) const;
|
||||
void keep_imatrix(int ncall) const;
|
||||
};
|
||||
|
||||
|
@ -200,7 +201,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
|
|||
}
|
||||
|
||||
void IMatrixCollector::save_imatrix() const {
|
||||
save_imatrix(m_params.ofile.empty() ? "imatrix.dat" : m_params.ofile.c_str());
|
||||
save_imatrix(m_params.ofile.empty() ? "imatrix.dat" : m_params.ofile.c_str(), m_params.dataset.c_str());
|
||||
}
|
||||
|
||||
void IMatrixCollector::keep_imatrix(int ncall) const {
|
||||
|
@ -208,24 +209,33 @@ void IMatrixCollector::keep_imatrix(int ncall) const {
|
|||
if (file_name.empty()) file_name = "imatrix.dat";
|
||||
file_name += ".at_";
|
||||
file_name += std::to_string(ncall);
|
||||
save_imatrix(file_name.c_str());
|
||||
save_imatrix(file_name.c_str(), m_params.dataset.c_str());
|
||||
}
|
||||
|
||||
void IMatrixCollector::save_imatrix(const char * fname) const {
|
||||
void IMatrixCollector::save_imatrix(const char * fname, const char * dataset) const {
|
||||
std::ofstream out(fname, std::ios::binary);
|
||||
int n_entries = m_stats.size();
|
||||
out.write((const char*)&n_entries, sizeof(n_entries));
|
||||
for (auto& p : m_stats) {
|
||||
out.write((const char *) &n_entries, sizeof(n_entries));
|
||||
for (const auto & p : m_stats) {
|
||||
int len = p.first.size();
|
||||
out.write((const char*)&len, sizeof(len));
|
||||
out.write((const char *) &len, sizeof(len));
|
||||
out.write(p.first.c_str(), len);
|
||||
out.write((const char*)&p.second.ncall, sizeof(p.second.ncall));
|
||||
out.write((const char *) &p.second.ncall, sizeof(p.second.ncall));
|
||||
int nval = p.second.values.size();
|
||||
out.write((const char*)&nval, sizeof(nval));
|
||||
if (nval > 0) out.write((const char*)p.second.values.data(), nval*sizeof(float));
|
||||
out.write((const char *) &nval, sizeof(nval));
|
||||
if (nval > 0) out.write((const char *) p.second.values.data(), nval * sizeof(float));
|
||||
}
|
||||
|
||||
// Write the number of call the matrix was computed with
|
||||
out.write((const char *) &m_last_call, sizeof(m_last_call));
|
||||
|
||||
// Write the dataset name at the end of the file to later on specify it in quantize
|
||||
int n_dataset = strlen(dataset);
|
||||
out.write((const char *) &n_dataset, sizeof(n_dataset));
|
||||
out.write(dataset, n_dataset);
|
||||
|
||||
if (m_params.verbosity > 0) {
|
||||
fprintf(stderr, "\n%s: stored collected data after %d chunks in %s\n",__func__,m_last_call,fname);
|
||||
fprintf(stderr, "\n%s: stored collected data after %d chunks in %s\n", __func__, m_last_call, fname);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -548,6 +558,29 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
}
|
||||
|
||||
gpt_params params;
|
||||
params.n_batch = 512;
|
||||
if (!gpt_params_parse(args.size(), args.data(), params)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
params.logits_all = true;
|
||||
params.n_batch = std::min(params.n_batch, params.n_ctx);
|
||||
|
||||
print_build_info();
|
||||
|
||||
if (params.seed == LLAMA_DEFAULT_SEED) {
|
||||
params.seed = time(NULL);
|
||||
}
|
||||
|
||||
fprintf(stderr, "%s: seed = %u\n", __func__, params.seed);
|
||||
|
||||
std::mt19937 rng(params.seed);
|
||||
if (params.random_prompt) {
|
||||
params.prompt = gpt_random_prompt(rng);
|
||||
}
|
||||
|
||||
sparams.dataset = params.prompt_file;
|
||||
g_collector.set_parameters(std::move(sparams));
|
||||
|
||||
if (!combine_files.empty()) {
|
||||
|
@ -586,28 +619,6 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
}
|
||||
|
||||
gpt_params params;
|
||||
params.n_batch = 512;
|
||||
if (!gpt_params_parse(args.size(), args.data(), params)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
params.logits_all = true;
|
||||
params.n_batch = std::min(params.n_batch, params.n_ctx);
|
||||
|
||||
print_build_info();
|
||||
|
||||
if (params.seed == LLAMA_DEFAULT_SEED) {
|
||||
params.seed = time(NULL);
|
||||
}
|
||||
|
||||
fprintf(stderr, "%s: seed = %u\n", __func__, params.seed);
|
||||
|
||||
std::mt19937 rng(params.seed);
|
||||
if (params.random_prompt) {
|
||||
params.prompt = gpt_random_prompt(rng);
|
||||
}
|
||||
|
||||
llama_backend_init();
|
||||
llama_numa_init(params.numa);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue