mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-13 07:29:46 +00:00
improve the computing buffer estimate
This commit is contained in:
parent
0b4ffdfde5
commit
dd589561b4
8 changed files with 87 additions and 34 deletions
|
@ -293,10 +293,20 @@ struct model_bytes {
|
|||
int64_t nb_layer;
|
||||
int64_t nb_output;
|
||||
|
||||
// used to estimate the compute buffer size
|
||||
int64_t nb_output_w;
|
||||
int64_t nb_attn_norm_w;
|
||||
int64_t nb_ffn_gate_w;
|
||||
int64_t nb_ffn_down_w;
|
||||
|
||||
model_bytes() :
|
||||
nb_input (0),
|
||||
nb_layer (0),
|
||||
nb_output(0) {}
|
||||
nb_input (0),
|
||||
nb_layer (0),
|
||||
nb_output (0),
|
||||
nb_output_w (0),
|
||||
nb_attn_norm_w(0),
|
||||
nb_ffn_gate_w (0),
|
||||
nb_ffn_down_w (0) {}
|
||||
};
|
||||
|
||||
struct disk_props {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue