mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-12 09:59:41 +00:00
Revert "merge missing functions from sdcpp"
This reverts commit 19e1c518f1
.
This commit is contained in:
parent
b67a906244
commit
c952b4f192
4 changed files with 7 additions and 325 deletions
121
ggml-cuda.cu
121
ggml-cuda.cu
|
@ -616,8 +616,6 @@ static_assert(sizeof(block_iq4_xs) == sizeof(ggml_fp16_t) + sizeof(uint16_t) + Q
|
|||
#define CUDA_UPSCALE_BLOCK_SIZE 256
|
||||
#define CUDA_CONCAT_BLOCK_SIZE 256
|
||||
#define CUDA_PAD_BLOCK_SIZE 256
|
||||
#define CUDA_ARANGE_BLOCK_SIZE 256
|
||||
#define CUDA_TIMESTEP_EMBEDDING_BLOCK_SIZE 256
|
||||
#define CUDA_ACC_BLOCK_SIZE 256
|
||||
#define CUDA_IM2COL_BLOCK_SIZE 256
|
||||
#define CUDA_POOL2D_BLOCK_SIZE 256
|
||||
|
@ -1045,38 +1043,6 @@ static __global__ void pad_f32(const float * x, float * dst, const int ne0, cons
|
|||
}
|
||||
}
|
||||
|
||||
static __global__ void arange_f32(float * dst, const int ne0, const float start, const float step) {
|
||||
// blockIDx.x: idx of ne0 / BLOCK_SIZE
|
||||
int nidx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
if (nidx >= ne0) {
|
||||
return;
|
||||
}
|
||||
dst[nidx] = start + step * nidx;
|
||||
}
|
||||
|
||||
static __global__ void timestep_embedding_f32(const float * timesteps, float * dst, const int nb1, const int dim, const int max_period) {
|
||||
// blockIDx.y: idx of timesteps->ne[0]
|
||||
// blockIDx.x: idx of ((dim + 1) / 2) / BLOCK_SIZE
|
||||
int i = blockIdx.y;
|
||||
int j = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
float * embed_data = (float *)((char *)dst + i*nb1);
|
||||
|
||||
if (dim % 2 != 0 && j == ((dim + 1) / 2)) {
|
||||
embed_data[dim] = 0.f;
|
||||
}
|
||||
|
||||
int half = dim / 2;
|
||||
if (j >= half) {
|
||||
return;
|
||||
}
|
||||
|
||||
float timestep = timesteps[i];
|
||||
float freq = (float)exp(-logf(max_period) * j / half);
|
||||
float arg = timestep * freq;
|
||||
embed_data[j] = cos(arg);
|
||||
embed_data[j + half] = sin(arg);
|
||||
}
|
||||
|
||||
template <int block_size>
|
||||
static __global__ void group_norm_f32(const float * x, float * dst, const int group_size, const int ne_elements, const float eps) {
|
||||
int start = blockIdx.x * group_size;
|
||||
|
@ -9219,44 +9185,6 @@ static void ggml_cuda_op_pad(
|
|||
(void) src1_dd;
|
||||
}
|
||||
|
||||
static void ggml_cuda_op_arange(
|
||||
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst,
|
||||
const float * src0_dd, const float * src1_dd, float * dst_dd, cudaStream_t main_stream) {
|
||||
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
||||
|
||||
const float start = ((float*)dst->op_params)[0];
|
||||
const float stop = ((float*)dst->op_params)[1];
|
||||
const float step = ((float*)dst->op_params)[2];
|
||||
|
||||
int64_t steps = (int64_t)ceil((stop - start) / step);
|
||||
GGML_ASSERT(ggml_nelements(dst) == steps);
|
||||
|
||||
arange_f32_cuda(dst_dd, dst->ne[0], start, step, main_stream);
|
||||
|
||||
(void) src0;
|
||||
(void) src1;
|
||||
(void) src0_dd;
|
||||
(void) src1_dd;
|
||||
}
|
||||
|
||||
|
||||
static void ggml_cuda_op_timestep_embedding(
|
||||
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst,
|
||||
const float * src0_dd, const float * src1_dd, float * dst_dd, cudaStream_t main_stream) {
|
||||
|
||||
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
||||
|
||||
const int dim = dst->op_params[0];
|
||||
const int max_period = dst->op_params[1];
|
||||
|
||||
timestep_embedding_f32_cuda(src0_dd, dst_dd, src0->ne[0], dst->nb[1], dim, max_period, main_stream);
|
||||
|
||||
(void) src1;
|
||||
(void) dst;
|
||||
(void) src1_dd;
|
||||
}
|
||||
|
||||
static void ggml_cuda_op_rms_norm(
|
||||
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst,
|
||||
const float * src0_dd, const float * src1_dd, float * dst_dd, cudaStream_t main_stream) {
|
||||
|
@ -10535,47 +10463,6 @@ static void ggml_cuda_pad(const ggml_tensor * src0, const ggml_tensor * src1, gg
|
|||
ggml_cuda_op_flatten(src0, src1, dst, ggml_cuda_op_pad);
|
||||
}
|
||||
|
||||
static void ggml_cuda_arange(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||
GGML_ASSERT( dst->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
|
||||
|
||||
ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra;
|
||||
|
||||
const bool dst_on_device = dst->backend == GGML_BACKEND_TYPE_GPU;
|
||||
|
||||
// dd = data device
|
||||
float * src0_ddf = nullptr;
|
||||
float * src1_ddf = nullptr;
|
||||
float * dst_ddf = nullptr;
|
||||
|
||||
cuda_pool_alloc<float> dst_f;
|
||||
|
||||
ggml_cuda_set_device(g_main_device);
|
||||
cudaStream_t main_stream = g_cudaStreams[g_main_device][0];
|
||||
|
||||
if (dst_on_device) {
|
||||
dst_ddf = (float *) dst_extra->data_device[g_main_device];
|
||||
} else {
|
||||
dst_ddf = dst_f.alloc(ggml_nelements(dst));
|
||||
}
|
||||
|
||||
// do the computation
|
||||
ggml_cuda_op_arange(src0, src1, dst, src0_ddf, src1_ddf, dst_ddf, main_stream);
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
|
||||
// copy dst to host if necessary
|
||||
if (!dst_on_device) {
|
||||
CUDA_CHECK(cudaMemcpyAsync(dst->data, dst_ddf, ggml_nbytes(dst), cudaMemcpyDeviceToHost, main_stream));
|
||||
}
|
||||
|
||||
if (dst->backend == GGML_BACKEND_TYPE_CPU) {
|
||||
CUDA_CHECK(cudaDeviceSynchronize());
|
||||
}
|
||||
}
|
||||
|
||||
static void ggml_cuda_timestep_embedding(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||
ggml_cuda_op_flatten(src0, src1, dst, ggml_cuda_op_timestep_embedding);
|
||||
}
|
||||
|
||||
static void ggml_cuda_rms_norm(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||
ggml_cuda_op_flatten(src0, src1, dst, ggml_cuda_op_rms_norm);
|
||||
}
|
||||
|
@ -11482,12 +11369,6 @@ GGML_CALL bool ggml_cuda_compute_forward(struct ggml_compute_params * params, st
|
|||
case GGML_OP_PAD:
|
||||
func = ggml_cuda_pad;
|
||||
break;
|
||||
case GGML_OP_ARANGE:
|
||||
func = ggml_cuda_arange;
|
||||
break;
|
||||
case GGML_OP_TIMESTEP_EMBEDDING:
|
||||
func = ggml_cuda_timestep_embedding;
|
||||
break;
|
||||
case GGML_OP_LEAKY_RELU:
|
||||
func = ggml_cuda_leaky_relu;
|
||||
break;
|
||||
|
@ -12383,8 +12264,6 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons
|
|||
case GGML_OP_GROUP_NORM:
|
||||
case GGML_OP_UPSCALE:
|
||||
case GGML_OP_PAD:
|
||||
case GGML_OP_ARANGE:
|
||||
case GGML_OP_TIMESTEP_EMBEDDING:
|
||||
case GGML_OP_LEAKY_RELU:
|
||||
return true;
|
||||
default:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue