mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-14 09:09:42 +00:00
add balance-serve, support concurrence
This commit is contained in:
parent
8d0292aa44
commit
25cee5810e
196 changed files with 22077 additions and 565 deletions
|
@ -9,6 +9,7 @@
|
|||
**/
|
||||
// Python bindings
|
||||
#include "cpu_backend/cpuinfer.h"
|
||||
#include "device_launch_parameters.h"
|
||||
#include "llamafile/flags.h"
|
||||
#include "operators/kvcache/kvcache.h"
|
||||
#include "operators/llamafile/linear.h"
|
||||
|
@ -535,16 +536,17 @@ class MOEBindings {
|
|||
const float *weights;
|
||||
const void *input;
|
||||
void *output;
|
||||
int *batch_size_tensor;
|
||||
};
|
||||
static void inner(void *args) {
|
||||
Args *args_ = (Args *)args;
|
||||
args_->cpuinfer->enqueue(
|
||||
&MOE::forward, args_->moe, args_->qlen, args_->k,
|
||||
args_->expert_ids, args_->weights, args_->input, args_->output);
|
||||
args_->expert_ids, args_->weights, args_->input, args_->output, args_->batch_size_tensor);
|
||||
}
|
||||
static std::pair<intptr_t, intptr_t>
|
||||
cpuinfer_interface(MOE &moe, int qlen, int k, intptr_t expert_ids,
|
||||
intptr_t weights, intptr_t input, intptr_t output) {
|
||||
intptr_t weights, intptr_t input, intptr_t output, intptr_t batch_size_tensor) {
|
||||
Args *args = new Args{nullptr,
|
||||
&moe,
|
||||
qlen,
|
||||
|
@ -552,7 +554,8 @@ class MOEBindings {
|
|||
(const uint64_t *)expert_ids,
|
||||
(const float *)weights,
|
||||
(const void *)input,
|
||||
(void *)output};
|
||||
(void *)output,
|
||||
(int *)batch_size_tensor};
|
||||
return std::make_pair((intptr_t)&inner, (intptr_t)args);
|
||||
}
|
||||
};
|
||||
|
@ -679,4 +682,4 @@ PYBIND11_MODULE(cpuinfer_ext, m) {
|
|||
cpuinfer_interface)
|
||||
.def("calc_anchor_all_layers",
|
||||
&KVCacheBindings::CalcAnchorAllLayersBindinds::cpuinfer_interface);
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue