mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-16 03:49:42 +00:00
Merge branch 'master' into concedo_experimental
# Conflicts: # Makefile # ggml.c # llama.cpp
This commit is contained in:
commit
e01e373e63
16 changed files with 6698 additions and 350 deletions
208
ggml.h
208
ggml.h
|
@ -192,7 +192,7 @@
|
|||
|
||||
#define GGML_MAX_DIMS 4
|
||||
#define GGML_MAX_NODES 4096
|
||||
#define GGML_MAX_PARAMS 16
|
||||
#define GGML_MAX_PARAMS 256
|
||||
#define GGML_MAX_CONTEXTS 64
|
||||
#define GGML_MAX_OPT 4
|
||||
#define GGML_DEFAULT_N_THREADS 4
|
||||
|
@ -244,6 +244,11 @@ extern "C" {
|
|||
GGML_TYPE_COUNT,
|
||||
};
|
||||
|
||||
enum ggml_backend {
|
||||
GGML_BACKEND_CPU = 0,
|
||||
GGML_BACKEND_CUDA = 1,
|
||||
};
|
||||
|
||||
// model file types
|
||||
enum ggml_ftype {
|
||||
GGML_FTYPE_UNKNOWN = -1,
|
||||
|
@ -265,12 +270,16 @@ extern "C" {
|
|||
|
||||
GGML_OP_DUP,
|
||||
GGML_OP_ADD,
|
||||
GGML_OP_ADD1,
|
||||
GGML_OP_ACC,
|
||||
GGML_OP_SUB,
|
||||
GGML_OP_MUL,
|
||||
GGML_OP_DIV,
|
||||
GGML_OP_SQR,
|
||||
GGML_OP_SQRT,
|
||||
GGML_OP_LOG,
|
||||
GGML_OP_SUM,
|
||||
GGML_OP_SUM_ROWS,
|
||||
GGML_OP_MEAN,
|
||||
GGML_OP_REPEAT,
|
||||
GGML_OP_ABS,
|
||||
|
@ -280,12 +289,15 @@ extern "C" {
|
|||
GGML_OP_RELU,
|
||||
GGML_OP_GELU,
|
||||
GGML_OP_SILU,
|
||||
GGML_OP_SILU_BACK,
|
||||
GGML_OP_NORM, // normalize
|
||||
GGML_OP_RMS_NORM,
|
||||
GGML_OP_RMS_NORM_BACK,
|
||||
|
||||
GGML_OP_MUL_MAT,
|
||||
|
||||
GGML_OP_SCALE,
|
||||
GGML_OP_SET,
|
||||
GGML_OP_CPY,
|
||||
GGML_OP_CONT,
|
||||
GGML_OP_RESHAPE,
|
||||
|
@ -293,9 +305,13 @@ extern "C" {
|
|||
GGML_OP_PERMUTE,
|
||||
GGML_OP_TRANSPOSE,
|
||||
GGML_OP_GET_ROWS,
|
||||
GGML_OP_GET_ROWS_BACK,
|
||||
GGML_OP_DIAG,
|
||||
GGML_OP_DIAG_MASK_INF,
|
||||
GGML_OP_DIAG_MASK_ZERO,
|
||||
GGML_OP_SOFT_MAX,
|
||||
GGML_OP_ROPE,
|
||||
GGML_OP_ROPE_BACK,
|
||||
GGML_OP_ALIBI,
|
||||
GGML_OP_CONV_1D_1S,
|
||||
GGML_OP_CONV_1D_2S,
|
||||
|
@ -325,6 +341,7 @@ extern "C" {
|
|||
// n-dimensional tensor
|
||||
struct ggml_tensor {
|
||||
enum ggml_type type;
|
||||
enum ggml_backend backend;
|
||||
|
||||
int n_dims;
|
||||
int64_t ne[GGML_MAX_DIMS]; // number of elements
|
||||
|
@ -355,7 +372,7 @@ extern "C" {
|
|||
|
||||
char name[32];
|
||||
|
||||
char padding[8]; // TODO: remove and add padding to name?
|
||||
char padding[9]; // TODO: remove and add padding to name?
|
||||
};
|
||||
|
||||
// computation graph
|
||||
|
@ -499,6 +516,29 @@ extern "C" {
|
|||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_add1(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_acc(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
size_t nb1,
|
||||
size_t nb2,
|
||||
size_t nb3,
|
||||
size_t offset);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_acc_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
size_t nb1,
|
||||
size_t nb2,
|
||||
size_t nb3,
|
||||
size_t offset);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_sub(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
|
@ -522,12 +562,24 @@ extern "C" {
|
|||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_log(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_log_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
// return scalar
|
||||
// TODO: compute sum along rows
|
||||
GGML_API struct ggml_tensor * ggml_sum(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
// sums along rows, with input shape [a,b,c,d] return shape [1,b,c,d]
|
||||
GGML_API struct ggml_tensor * ggml_sum_rows(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
// mean along rows
|
||||
GGML_API struct ggml_tensor * ggml_mean(
|
||||
struct ggml_context * ctx,
|
||||
|
@ -569,6 +621,13 @@ extern "C" {
|
|||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
// a - x
|
||||
// b - dy
|
||||
GGML_API struct ggml_tensor * ggml_silu_back(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b);
|
||||
|
||||
// normalize along rows
|
||||
// TODO: eps is hardcoded to 1e-5 for now
|
||||
GGML_API struct ggml_tensor * ggml_norm(
|
||||
|
@ -579,6 +638,13 @@ extern "C" {
|
|||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
// a - x
|
||||
// b - dy
|
||||
GGML_API struct ggml_tensor * ggml_rms_norm_back(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b);
|
||||
|
||||
// A: m rows, n columns
|
||||
// B: p rows, n columns (i.e. we transpose it internally)
|
||||
// result is m columns, p rows
|
||||
|
@ -591,12 +657,66 @@ extern "C" {
|
|||
// operations on tensors without backpropagation
|
||||
//
|
||||
|
||||
// in-place, returns view(a)
|
||||
GGML_API struct ggml_tensor * ggml_scale(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b);
|
||||
|
||||
// in-place, returns view(a)
|
||||
GGML_API struct ggml_tensor * ggml_scale_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b);
|
||||
|
||||
// b -> view(a,offset,nb1,nb2,3), return modified a
|
||||
GGML_API struct ggml_tensor * ggml_set(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
size_t nb1,
|
||||
size_t nb2,
|
||||
size_t nb3,
|
||||
size_t offset);
|
||||
|
||||
// b -> view(a,offset,nb1,nb2,3), return view(a)
|
||||
GGML_API struct ggml_tensor * ggml_set_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
size_t nb1,
|
||||
size_t nb2,
|
||||
size_t nb3,
|
||||
size_t offset);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_set_1d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
size_t offset);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_set_1d_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
size_t offset);
|
||||
|
||||
// b -> view(a,offset,nb1,nb2,3), return modified a
|
||||
GGML_API struct ggml_tensor * ggml_set_2d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
size_t nb1,
|
||||
size_t offset);
|
||||
|
||||
// b -> view(a,offset,nb1,nb2,3), return view(a)
|
||||
GGML_API struct ggml_tensor * ggml_set_2d_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
size_t nb1,
|
||||
size_t offset);
|
||||
|
||||
|
||||
// a -> b, return view(b)
|
||||
GGML_API struct ggml_tensor * ggml_cpy(
|
||||
struct ggml_context * ctx,
|
||||
|
@ -617,6 +737,11 @@ extern "C" {
|
|||
|
||||
// return view(a)
|
||||
// TODO: when we start computing gradient, make a copy instead of view
|
||||
GGML_API struct ggml_tensor * ggml_reshape_1d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int64_t ne0);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_reshape_2d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
|
@ -632,6 +757,14 @@ extern "C" {
|
|||
int64_t ne1,
|
||||
int64_t ne2);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_reshape_4d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int64_t ne0,
|
||||
int64_t ne1,
|
||||
int64_t ne2,
|
||||
int64_t ne3);
|
||||
|
||||
// offset in bytes
|
||||
GGML_API struct ggml_tensor * ggml_view_1d(
|
||||
struct ggml_context * ctx,
|
||||
|
@ -657,6 +790,18 @@ extern "C" {
|
|||
size_t nb2, // slice stride in bytes
|
||||
size_t offset);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_view_4d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int64_t ne0,
|
||||
int64_t ne1,
|
||||
int64_t ne2,
|
||||
int64_t ne3,
|
||||
size_t nb1, // row stride in bytes
|
||||
size_t nb2, // slice stride in bytes
|
||||
size_t nb3,
|
||||
size_t offset);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_permute(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
|
@ -675,20 +820,50 @@ extern "C" {
|
|||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_get_rows_back(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_diag(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
// set elements above the diagonal to -INF
|
||||
// in-place, returns view(a)
|
||||
GGML_API struct ggml_tensor * ggml_diag_mask_inf(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int n_past);
|
||||
|
||||
// in-place, returns view(a)
|
||||
GGML_API struct ggml_tensor * ggml_diag_mask_inf_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int n_past);
|
||||
|
||||
// set elements above the diagonal to 0
|
||||
GGML_API struct ggml_tensor * ggml_diag_mask_zero(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int n_past);
|
||||
|
||||
// in-place, returns view(a)
|
||||
GGML_API struct ggml_tensor * gml_diag_mask_zero_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int n_past);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_soft_max(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
// rotary position embedding
|
||||
// in-place, returns view(a)
|
||||
GGML_API struct ggml_tensor * ggml_soft_max_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
// rotary position embedding
|
||||
// if mode & 1 == 1, skip n_past elements
|
||||
// if mode & 2 == 1, GPT-NeoX style
|
||||
// TODO: avoid creating a new tensor every time
|
||||
|
@ -699,6 +874,23 @@ extern "C" {
|
|||
int n_dims,
|
||||
int mode);
|
||||
|
||||
// in-place, returns view(a)
|
||||
GGML_API struct ggml_tensor * ggml_rope_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int n_past,
|
||||
int n_dims,
|
||||
int mode);
|
||||
|
||||
// rotary position embedding backward, i.e compute dx from dy
|
||||
// a - dy
|
||||
GGML_API struct ggml_tensor * ggml_rope_back(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int n_past,
|
||||
int n_dims,
|
||||
int mode);
|
||||
|
||||
// alibi position embedding
|
||||
// in-place, returns view(a)
|
||||
struct ggml_tensor * ggml_alibi(
|
||||
|
@ -743,13 +935,13 @@ extern "C" {
|
|||
GGML_API struct ggml_tensor * ggml_map_unary_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
const ggml_unary_op_f32_t fun);
|
||||
ggml_unary_op_f32_t fun);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_binary_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
const ggml_binary_op_f32_t fun);
|
||||
ggml_binary_op_f32_t fun);
|
||||
|
||||
//
|
||||
// automatic differentiation
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue