mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-17 04:19:40 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .github/workflows/docker.yml # README.md # build-xcframework.sh # examples/llava/CMakeLists.txt # examples/llava/clip.cpp # examples/rpc/rpc-server.cpp # examples/run/run.cpp # ggml/src/ggml-cann/ggml-cann.cpp # scripts/sync-ggml-am.sh # scripts/sync-ggml.last # tests/test-backend-ops.cpp # tests/test-chat.cpp
This commit is contained in:
commit
a0ae187563
33 changed files with 2247 additions and 1202 deletions
278
ggml/src/ggml.c
278
ggml/src/ggml.c
|
@ -995,23 +995,18 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|||
|
||||
"UNARY",
|
||||
|
||||
"MAP_UNARY",
|
||||
"MAP_BINARY",
|
||||
|
||||
"MAP_CUSTOM1_F32",
|
||||
"MAP_CUSTOM2_F32",
|
||||
"MAP_CUSTOM3_F32",
|
||||
|
||||
"MAP_CUSTOM1",
|
||||
"MAP_CUSTOM2",
|
||||
"MAP_CUSTOM3",
|
||||
|
||||
"CUSTOM",
|
||||
|
||||
"CROSS_ENTROPY_LOSS",
|
||||
"CROSS_ENTROPY_LOSS_BACK",
|
||||
"OPT_STEP_ADAMW",
|
||||
};
|
||||
|
||||
static_assert(GGML_OP_COUNT == 85, "GGML_OP_COUNT != 85");
|
||||
static_assert(GGML_OP_COUNT == 81, "GGML_OP_COUNT != 81");
|
||||
|
||||
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
||||
"none",
|
||||
|
@ -1094,23 +1089,18 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|||
|
||||
"unary(x)",
|
||||
|
||||
"f(x)",
|
||||
"f(x,y)",
|
||||
|
||||
"custom_f32(x)",
|
||||
"custom_f32(x,y)",
|
||||
"custom_f32(x,y,z)",
|
||||
"map_custom(x)",
|
||||
"map_custom(x,y)",
|
||||
"map_custom(x,y,z)",
|
||||
|
||||
"custom(x)",
|
||||
"custom(x,y)",
|
||||
"custom(x,y,z)",
|
||||
|
||||
"cross_entropy_loss(x,y)",
|
||||
"cross_entropy_loss_back(x,y)",
|
||||
"adamw(x)",
|
||||
};
|
||||
|
||||
static_assert(GGML_OP_COUNT == 85, "GGML_OP_COUNT != 85");
|
||||
static_assert(GGML_OP_COUNT == 81, "GGML_OP_COUNT != 81");
|
||||
|
||||
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
||||
|
||||
|
@ -4197,7 +4187,8 @@ static struct ggml_tensor * ggml_upscale_impl(
|
|||
int ne0,
|
||||
int ne1,
|
||||
int ne2,
|
||||
int ne3) {
|
||||
int ne3,
|
||||
enum ggml_scale_mode mode) {
|
||||
GGML_ASSERT(a->ne[0] <= ne0);
|
||||
GGML_ASSERT(a->ne[1] <= ne1);
|
||||
GGML_ASSERT(a->ne[2] <= ne2);
|
||||
|
@ -4205,6 +4196,8 @@ static struct ggml_tensor * ggml_upscale_impl(
|
|||
|
||||
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
|
||||
|
||||
ggml_set_op_params_i32(result, 0, mode);
|
||||
|
||||
result->op = GGML_OP_UPSCALE;
|
||||
result->src[0] = a;
|
||||
|
||||
|
@ -4214,8 +4207,9 @@ static struct ggml_tensor * ggml_upscale_impl(
|
|||
struct ggml_tensor * ggml_upscale(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int scale_factor) {
|
||||
return ggml_upscale_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3]);
|
||||
int scale_factor,
|
||||
enum ggml_scale_mode mode) {
|
||||
return ggml_upscale_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_upscale_ext(
|
||||
|
@ -4224,8 +4218,9 @@ struct ggml_tensor * ggml_upscale_ext(
|
|||
int ne0,
|
||||
int ne1,
|
||||
int ne2,
|
||||
int ne3) {
|
||||
return ggml_upscale_impl(ctx, a, ne0, ne1, ne2, ne3);
|
||||
int ne3,
|
||||
enum ggml_scale_mode mode) {
|
||||
return ggml_upscale_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
|
||||
}
|
||||
|
||||
// ggml_pad
|
||||
|
@ -4855,179 +4850,6 @@ struct ggml_tensor * ggml_unary_inplace(
|
|||
return ggml_unary_impl(ctx, a, op, true);
|
||||
}
|
||||
|
||||
// ggml_map_unary
|
||||
|
||||
static struct ggml_tensor * ggml_map_unary_impl_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
const ggml_unary_op_f32_t fun,
|
||||
bool inplace) {
|
||||
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
||||
|
||||
result->op = GGML_OP_MAP_UNARY;
|
||||
result->src[0] = a;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_unary_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
const ggml_unary_op_f32_t fun) {
|
||||
return ggml_map_unary_impl_f32(ctx, a, fun, false);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_unary_inplace_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
const ggml_unary_op_f32_t fun) {
|
||||
return ggml_map_unary_impl_f32(ctx, a, fun, true);
|
||||
}
|
||||
|
||||
// ggml_map_binary
|
||||
|
||||
static struct ggml_tensor * ggml_map_binary_impl_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
const ggml_binary_op_f32_t fun,
|
||||
bool inplace) {
|
||||
GGML_ASSERT(ggml_are_same_shape(a, b));
|
||||
|
||||
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
||||
|
||||
result->op = GGML_OP_MAP_BINARY;
|
||||
result->src[0] = a;
|
||||
result->src[1] = b;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_binary_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
const ggml_binary_op_f32_t fun) {
|
||||
return ggml_map_binary_impl_f32(ctx, a, b, fun, false);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_binary_inplace_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
const ggml_binary_op_f32_t fun) {
|
||||
return ggml_map_binary_impl_f32(ctx, a, b, fun, true);
|
||||
}
|
||||
|
||||
// ggml_map_custom1_f32
|
||||
|
||||
static struct ggml_tensor * ggml_map_custom1_impl_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
const ggml_custom1_op_f32_t fun,
|
||||
bool inplace) {
|
||||
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
||||
|
||||
result->op = GGML_OP_MAP_CUSTOM1_F32;
|
||||
result->src[0] = a;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom1_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
const ggml_custom1_op_f32_t fun) {
|
||||
return ggml_map_custom1_impl_f32(ctx, a, fun, false);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
const ggml_custom1_op_f32_t fun) {
|
||||
return ggml_map_custom1_impl_f32(ctx, a, fun, true);
|
||||
}
|
||||
|
||||
// ggml_map_custom2_f32
|
||||
|
||||
static struct ggml_tensor * ggml_map_custom2_impl_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
const ggml_custom2_op_f32_t fun,
|
||||
bool inplace) {
|
||||
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
||||
|
||||
result->op = GGML_OP_MAP_CUSTOM2_F32;
|
||||
result->src[0] = a;
|
||||
result->src[1] = b;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom2_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
const ggml_custom2_op_f32_t fun) {
|
||||
return ggml_map_custom2_impl_f32(ctx, a, b, fun, false);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
const ggml_custom2_op_f32_t fun) {
|
||||
return ggml_map_custom2_impl_f32(ctx, a, b, fun, true);
|
||||
}
|
||||
|
||||
// ggml_map_custom3_f32
|
||||
|
||||
static struct ggml_tensor * ggml_map_custom3_impl_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
const ggml_custom3_op_f32_t fun,
|
||||
bool inplace) {
|
||||
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
||||
|
||||
result->op = GGML_OP_MAP_CUSTOM3_F32;
|
||||
result->src[0] = a;
|
||||
result->src[1] = b;
|
||||
result->src[2] = c;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom3_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
const ggml_custom3_op_f32_t fun) {
|
||||
return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, false);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom3_inplace_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
const ggml_custom3_op_f32_t fun) {
|
||||
return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, true);
|
||||
}
|
||||
|
||||
// ggml_map_custom1
|
||||
|
||||
static struct ggml_tensor * ggml_map_custom1_impl(
|
||||
|
@ -5046,7 +4868,7 @@ static struct ggml_tensor * ggml_map_custom1_impl(
|
|||
/*.n_tasks =*/ n_tasks,
|
||||
/*.userdata =*/ userdata
|
||||
};
|
||||
ggml_set_op_params(result, (const void *) ¶ms, sizeof(params));
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_MAP_CUSTOM1;
|
||||
result->src[0] = a;
|
||||
|
@ -5091,7 +4913,7 @@ static struct ggml_tensor * ggml_map_custom2_impl(
|
|||
/*.n_tasks =*/ n_tasks,
|
||||
/*.userdata =*/ userdata
|
||||
};
|
||||
ggml_set_op_params(result, (const void *) ¶ms, sizeof(params));
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_MAP_CUSTOM2;
|
||||
result->src[0] = a;
|
||||
|
@ -5140,7 +4962,7 @@ static struct ggml_tensor * ggml_map_custom3_impl(
|
|||
/*.n_tasks =*/ n_tasks,
|
||||
/*.userdata =*/ userdata
|
||||
};
|
||||
ggml_set_op_params(result, (const void *) ¶ms, sizeof(params));
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_MAP_CUSTOM3;
|
||||
result->src[0] = a;
|
||||
|
@ -5172,6 +4994,66 @@ struct ggml_tensor * ggml_map_custom3_inplace(
|
|||
return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_custom_4d(
|
||||
struct ggml_context * ctx,
|
||||
enum ggml_type type,
|
||||
int64_t ne0,
|
||||
int64_t ne1,
|
||||
int64_t ne2,
|
||||
int64_t ne3,
|
||||
struct ggml_tensor ** args,
|
||||
int n_args,
|
||||
ggml_custom_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata) {
|
||||
|
||||
GGML_ASSERT(n_args < GGML_MAX_SRC);
|
||||
|
||||
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, ne0, ne1, ne2, ne3);
|
||||
|
||||
struct ggml_custom_op_params params = {
|
||||
/*.fun =*/ fun,
|
||||
/*.n_tasks =*/ n_tasks,
|
||||
/*.userdata =*/ userdata
|
||||
};
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_CUSTOM;
|
||||
for (int i = 0; i < n_args; i++) {
|
||||
result->src[i] = args[i];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_custom_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor ** args,
|
||||
int n_args,
|
||||
ggml_custom_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata) {
|
||||
|
||||
GGML_ASSERT(n_args < GGML_MAX_SRC - 1);
|
||||
|
||||
struct ggml_tensor * result = ggml_view_tensor(ctx, a);
|
||||
|
||||
struct ggml_custom_op_params params = {
|
||||
/*.fun =*/ fun,
|
||||
/*.n_tasks =*/ n_tasks,
|
||||
/*.userdata =*/ userdata
|
||||
};
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_CUSTOM;
|
||||
result->src[0] = a;
|
||||
for (int i = 0; i < n_args; i++) {
|
||||
result->src[i + 1] = args[i];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
// ggml_cross_entropy_loss
|
||||
|
||||
struct ggml_tensor * ggml_cross_entropy_loss(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue