mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
ggml : add ggml_set_rows (#14274)
* ggml : add ggml_set_rows Add ggml_set_rows(a, b, c) which copies rows from 'b' into 'a' using indices from 'c'. ref: #8366 * use I64 for indices * ggml : add repeat impl for i64 * ggml : add ggml_is_contiguous_rows * ggml : ggml_set_rows support broadcast * ggml : ggml_set_rows support quantized dst ggml-ci * ggml : support GGML_TYPE_F32 ".from_float" trait * ggml : ggml_set_rows update comment + better index name * tests : add ggml_set_rows * metal : add ggml_set_rows implementation ggml-ci * ggml : simplify forward_dup_f32 * ggml : fix supports_op * tests : add comment to set_rows * ggml : leave the repeat_i64 for a separate PR ggml-ci * ggml : set_rows use std::min instead of MIN * ggml : better error message for set_rows unsupported type * metal : perform op->type check only once * tests : more consistent implementation + more tests ggml-ci --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
f667f1e624
commit
8d94219a4a
12 changed files with 653 additions and 204 deletions
|
@ -696,24 +696,8 @@ static void ggml_compute_forward_dup_f32(
|
|||
if (ggml_is_contiguous(dst)) {
|
||||
// TODO: simplify
|
||||
if (nb00 == sizeof(float)) {
|
||||
if (dst->type == GGML_TYPE_F32) {
|
||||
size_t id = 0;
|
||||
const size_t rs = ne00 * nb00;
|
||||
char * dst_ptr = (char *) dst->data;
|
||||
|
||||
for (int i03 = 0; i03 < ne03; i03++) {
|
||||
for (int i02 = 0; i02 < ne02; i02++) {
|
||||
id += rs * ir0;
|
||||
for (int i01 = ir0; i01 < ir1; i01++) {
|
||||
const char * src0_ptr = (char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03;
|
||||
memcpy(dst_ptr + id, src0_ptr, rs);
|
||||
id += rs;
|
||||
}
|
||||
id += rs * (ne01 - ir1);
|
||||
}
|
||||
}
|
||||
} else if (ggml_get_type_traits_cpu(dst->type)->from_float) {
|
||||
ggml_from_float_t const quantize_row_q = ggml_get_type_traits_cpu(dst->type)->from_float;
|
||||
if (ggml_get_type_traits_cpu(dst->type)->from_float) {
|
||||
ggml_from_float_t const from_float = ggml_get_type_traits_cpu(dst->type)->from_float;
|
||||
|
||||
size_t id = 0;
|
||||
size_t rs = nb0 * (ne00 / ggml_blck_size(dst->type));
|
||||
|
@ -724,7 +708,7 @@ static void ggml_compute_forward_dup_f32(
|
|||
id += rs * ir0;
|
||||
for (int i01 = ir0; i01 < ir1; i01++) {
|
||||
const float * src0_ptr = (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03);
|
||||
quantize_row_q(src0_ptr, dst_ptr + id, ne00);
|
||||
from_float(src0_ptr, dst_ptr + id, ne00);
|
||||
id += rs;
|
||||
}
|
||||
id += rs * (ne01 - ir1);
|
||||
|
@ -2300,6 +2284,12 @@ void ggml_compute_forward_repeat(
|
|||
{
|
||||
ggml_compute_forward_repeat_f32(params, dst);
|
||||
} break;
|
||||
// TODO: templateify the implemenation and support for I64
|
||||
// ref https://github.com/ggml-org/llama.cpp/pull/14274#discussion_r2169492225
|
||||
//case GGML_TYPE_I64:
|
||||
// {
|
||||
// ggml_compute_forward_repeat_i64(params, dst);
|
||||
// } break;
|
||||
default:
|
||||
{
|
||||
GGML_ABORT("fatal error");
|
||||
|
@ -4470,6 +4460,74 @@ void ggml_compute_forward_get_rows(
|
|||
//}
|
||||
}
|
||||
|
||||
static void ggml_compute_forward_set_rows_f32(
|
||||
const ggml_compute_params * params,
|
||||
ggml_tensor * dst) {
|
||||
|
||||
const ggml_tensor * src0 = dst->src[0];
|
||||
const ggml_tensor * src1 = dst->src[1];
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
||||
const int64_t nc = ne00;
|
||||
const int64_t nr = ne01;
|
||||
|
||||
assert(ne0 == nc);
|
||||
assert(ne2 == ne02);
|
||||
assert(ne3 == ne03);
|
||||
assert(src0->type == GGML_TYPE_F32);
|
||||
assert(ne02 % ne11 == 0);
|
||||
assert(ne03 % ne12 == 0);
|
||||
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
|
||||
// rows per thread
|
||||
const int64_t dr = (nr + nth - 1)/nth;
|
||||
|
||||
// row range for this thread
|
||||
const int64_t ir0 = dr*ith;
|
||||
const int64_t ir1 = std::min(ir0 + dr, nr);
|
||||
|
||||
ggml_from_float_t const from_float = ggml_get_type_traits_cpu(dst->type)->from_float;
|
||||
|
||||
for (int64_t i03 = 0; i03 < ne03; ++i03) {
|
||||
for (int64_t i02 = 0; i02 < ne02; ++i02) {
|
||||
for (int64_t i = ir0; i < ir1; ++i) {
|
||||
const int64_t i12 = i03%ne12;
|
||||
const int64_t i11 = i02%ne11;
|
||||
const int64_t i10 = i;
|
||||
|
||||
const int64_t i1 = *(int64_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12);
|
||||
|
||||
GGML_ASSERT(i1 >= 0 && i1 < ne1);
|
||||
|
||||
from_float(
|
||||
(const float *) ((char *) src0->data + i*nb01 + i02*nb02 + i03*nb03),
|
||||
((char *) dst->data + i1*nb1 + i02*nb2 + i03*nb3), nc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ggml_compute_forward_set_rows(
|
||||
const ggml_compute_params * params,
|
||||
ggml_tensor * dst) {
|
||||
|
||||
const ggml_tensor * src0 = dst->src[0];
|
||||
|
||||
switch (src0->type) {
|
||||
case GGML_TYPE_F32:
|
||||
{
|
||||
ggml_compute_forward_set_rows_f32(params, dst);
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
GGML_ABORT("src0->type = %d (%s) not supported", src0->type, ggml_type_name(src0->type));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ggml_compute_forward_get_rows_back
|
||||
|
||||
static void ggml_compute_forward_get_rows_back_f32_f16(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue