mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
cuda : add softcap fusion (#14907)
This commit is contained in:
parent
bbd0f91779
commit
138b288b59
4 changed files with 118 additions and 6 deletions
|
@ -2545,6 +2545,41 @@ struct test_scale : public test_case {
|
|||
}
|
||||
};
|
||||
|
||||
// GGML_OP_SCALE + GGML_UNARY_OP_TANH + GGML_OP_SCALE
|
||||
struct test_softcap : public test_case {
|
||||
const ggml_type type;
|
||||
const std::array<int64_t, 4> ne;
|
||||
float softcap;
|
||||
|
||||
std::string op_desc(ggml_tensor * t) override {
|
||||
GGML_UNUSED(t);
|
||||
return "SOFTCAP";
|
||||
}
|
||||
|
||||
bool run_whole_graph() override { return true; }
|
||||
|
||||
std::string vars() override {
|
||||
return VARS_TO_STR3(type, ne, softcap);
|
||||
}
|
||||
|
||||
test_softcap(ggml_type type = GGML_TYPE_F32,
|
||||
std::array<int64_t, 4> ne = {10, 10, 10, 10},
|
||||
float softcap = 30.0f)
|
||||
: type(type), ne(ne), softcap(softcap) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
||||
|
||||
ggml_set_param(a);
|
||||
ggml_set_name(a, "a");
|
||||
|
||||
ggml_tensor * out = ggml_scale(ctx, ggml_tanh(ctx, ggml_scale(ctx, a, 1.0f / softcap)), softcap);
|
||||
ggml_set_name(out, "out");
|
||||
|
||||
return out;
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_SILU_BACK
|
||||
struct test_silu_back : public test_case {
|
||||
const ggml_type type;
|
||||
|
@ -5421,6 +5456,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|||
test_cases.emplace_back(new test_add1());
|
||||
test_cases.emplace_back(new test_scale());
|
||||
test_cases.emplace_back(new test_scale(GGML_TYPE_F32, {10, 10, 10, 10}, 2.0f, 1.0f));
|
||||
test_cases.emplace_back(new test_softcap(GGML_TYPE_F32, {10, 10, 10, 10}, 50.0f));
|
||||
test_cases.emplace_back(new test_silu_back());
|
||||
|
||||
for (float eps : {0.0f, 1e-6f, 1e-4f, 1e-1f}) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue