Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	.devops/main-intel.Dockerfile
#	.devops/main-vulkan.Dockerfile
#	.devops/server-intel.Dockerfile
#	.devops/server-vulkan.Dockerfile
#	.github/workflows/bench.yml
#	.github/workflows/build.yml
#	.github/workflows/python-lint.yml
#	.github/workflows/server.yml
#	.gitignore
#	Makefile
#	README-sycl.md
#	README.md
#	ci/run.sh
#	flake.lock
#	llama.cpp
#	models/ggml-vocab-falcon.gguf
#	models/ggml-vocab-llama-spm.gguf
#	models/ggml-vocab-mpt.gguf
#	models/ggml-vocab-stablelm.gguf
#	models/ggml-vocab-starcoder.gguf
#	requirements.txt
#	scripts/check-requirements.sh
#	tests/CMakeLists.txt
#	tests/test-backend-ops.cpp
#	tests/test-grammar-integration.cpp
#	tests/test-tokenizer-0-bpe.py
#	tests/test-tokenizer-0-spm.py
#	tests/test-tokenizer-1-spm.cpp
This commit is contained in:
Concedo 2024-04-30 21:04:17 +08:00
commit 17a24d753c
52 changed files with 4978 additions and 1249 deletions

View file

@ -24,6 +24,7 @@ struct Stats {
};
struct StatParams {
std::string dataset;
std::string ofile = "imatrix.dat";
int n_output_frequency = 10;
int verbosity = 1;
@ -47,7 +48,7 @@ private:
std::vector<float> m_src1_data;
std::vector<char> m_ids; // the expert ids from ggml_mul_mat_id
//
void save_imatrix(const char * file_name) const;
void save_imatrix(const char * file_name, const char * dataset) const;
void keep_imatrix(int ncall) const;
};
@ -200,7 +201,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
}
void IMatrixCollector::save_imatrix() const {
save_imatrix(m_params.ofile.empty() ? "imatrix.dat" : m_params.ofile.c_str());
save_imatrix(m_params.ofile.empty() ? "imatrix.dat" : m_params.ofile.c_str(), m_params.dataset.c_str());
}
void IMatrixCollector::keep_imatrix(int ncall) const {
@ -208,24 +209,33 @@ void IMatrixCollector::keep_imatrix(int ncall) const {
if (file_name.empty()) file_name = "imatrix.dat";
file_name += ".at_";
file_name += std::to_string(ncall);
save_imatrix(file_name.c_str());
save_imatrix(file_name.c_str(), m_params.dataset.c_str());
}
void IMatrixCollector::save_imatrix(const char * fname) const {
void IMatrixCollector::save_imatrix(const char * fname, const char * dataset) const {
std::ofstream out(fname, std::ios::binary);
int n_entries = m_stats.size();
out.write((const char*)&n_entries, sizeof(n_entries));
for (auto& p : m_stats) {
out.write((const char *) &n_entries, sizeof(n_entries));
for (const auto & p : m_stats) {
int len = p.first.size();
out.write((const char*)&len, sizeof(len));
out.write((const char *) &len, sizeof(len));
out.write(p.first.c_str(), len);
out.write((const char*)&p.second.ncall, sizeof(p.second.ncall));
out.write((const char *) &p.second.ncall, sizeof(p.second.ncall));
int nval = p.second.values.size();
out.write((const char*)&nval, sizeof(nval));
if (nval > 0) out.write((const char*)p.second.values.data(), nval*sizeof(float));
out.write((const char *) &nval, sizeof(nval));
if (nval > 0) out.write((const char *) p.second.values.data(), nval * sizeof(float));
}
// Write the number of call the matrix was computed with
out.write((const char *) &m_last_call, sizeof(m_last_call));
// Write the dataset name at the end of the file to later on specify it in quantize
int n_dataset = strlen(dataset);
out.write((const char *) &n_dataset, sizeof(n_dataset));
out.write(dataset, n_dataset);
if (m_params.verbosity > 0) {
fprintf(stderr, "\n%s: stored collected data after %d chunks in %s\n",__func__,m_last_call,fname);
fprintf(stderr, "\n%s: stored collected data after %d chunks in %s\n", __func__, m_last_call, fname);
}
}
@ -548,6 +558,29 @@ int main(int argc, char ** argv) {
}
}
gpt_params params;
params.n_batch = 512;
if (!gpt_params_parse(args.size(), args.data(), params)) {
return 1;
}
params.logits_all = true;
params.n_batch = std::min(params.n_batch, params.n_ctx);
print_build_info();
if (params.seed == LLAMA_DEFAULT_SEED) {
params.seed = time(NULL);
}
fprintf(stderr, "%s: seed = %u\n", __func__, params.seed);
std::mt19937 rng(params.seed);
if (params.random_prompt) {
params.prompt = gpt_random_prompt(rng);
}
sparams.dataset = params.prompt_file;
g_collector.set_parameters(std::move(sparams));
if (!combine_files.empty()) {
@ -586,28 +619,6 @@ int main(int argc, char ** argv) {
}
}
gpt_params params;
params.n_batch = 512;
if (!gpt_params_parse(args.size(), args.data(), params)) {
return 1;
}
params.logits_all = true;
params.n_batch = std::min(params.n_batch, params.n_ctx);
print_build_info();
if (params.seed == LLAMA_DEFAULT_SEED) {
params.seed = time(NULL);
}
fprintf(stderr, "%s: seed = %u\n", __func__, params.seed);
std::mt19937 rng(params.seed);
if (params.random_prompt) {
params.prompt = gpt_random_prompt(rng);
}
llama_backend_init();
llama_numa_init(params.numa);