mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
Merge commit 'ad3a0505e3
' into concedo_experimental
# Conflicts: # .github/workflows/build.yml # .github/workflows/close-issue.yml # .github/workflows/code-coverage.yml # .github/workflows/docker.yml # .github/workflows/editorconfig.yml # .github/workflows/nix-ci-aarch64.yml # .github/workflows/nix-ci.yml # .github/workflows/python-check-requirements.yml # .github/workflows/python-lint.yml # .github/workflows/server.yml # .github/workflows/zig-build.yml # .gitignore # CMakeLists.txt # Makefile # README-sycl.md # README.md # build.zig # common/CMakeLists.txt # llama.cpp # tests/CMakeLists.txt # tests/test-backend-ops.cpp
This commit is contained in:
commit
9c0fbf9f73
67 changed files with 10861 additions and 4661 deletions
|
@ -51,29 +51,31 @@ private:
|
|||
void keep_imatrix(int ncall) const;
|
||||
};
|
||||
|
||||
// remove any prefix and suffixes from the name
|
||||
// CUDA0#blk.0.attn_k.weight#0 => blk.0.attn_k.weight
|
||||
static std::string filter_tensor_name(const char * name) {
|
||||
std::string wname;
|
||||
const char * p = strchr(name, '#');
|
||||
if (p != NULL) {
|
||||
p = p + 1;
|
||||
const char * q = strchr(p, '#');
|
||||
if (q != NULL) {
|
||||
wname = std::string(p, q - p);
|
||||
} else {
|
||||
wname = p;
|
||||
}
|
||||
} else {
|
||||
wname = name;
|
||||
}
|
||||
return wname;
|
||||
}
|
||||
|
||||
bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data) {
|
||||
GGML_UNUSED(user_data);
|
||||
|
||||
const struct ggml_tensor * src0 = t->src[0];
|
||||
const struct ggml_tensor * src1 = t->src[1];
|
||||
|
||||
std::string wname;
|
||||
{
|
||||
// remove any prefix and suffixes from the name
|
||||
// CUDA0#blk.0.attn_k.weight#0 => blk.0.attn_k.weight
|
||||
const char * p = strchr(src0->name, '#');
|
||||
if (p != NULL) {
|
||||
p = p + 1;
|
||||
const char * q = strchr(p, '#');
|
||||
if (q != NULL) {
|
||||
wname = std::string(p, q - p);
|
||||
} else {
|
||||
wname = p;
|
||||
}
|
||||
} else {
|
||||
wname = src0->name;
|
||||
}
|
||||
}
|
||||
std::string wname = filter_tensor_name(src0->name);
|
||||
|
||||
// when ask is true, the scheduler wants to know if we are interested in data from this tensor
|
||||
// if we return true, a follow-up call will be made with ask=false in which we can do the actual collection
|
||||
|
@ -113,6 +115,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
|
|||
// this is necessary to guarantee equal number of "ncall" for each tensor
|
||||
for (int ex = 0; ex < n_as; ++ex) {
|
||||
src0 = t->src[2 + ex];
|
||||
wname = filter_tensor_name(src0->name);
|
||||
auto& e = m_stats[wname];
|
||||
if (e.values.empty()) {
|
||||
e.values.resize(src1->ne[0], 0);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue