mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-06 14:09:03 +00:00
update README
This commit is contained in:
parent
11ce0d58f7
commit
e8d3e5a631
2 changed files with 6 additions and 8 deletions
|
@ -232,10 +232,10 @@ Assume we have a host machine with at least 32 CPU cores, 32 GiB RAM, and 32 GiB
|
|||
1. Pull our prebuilt Docker image (e.g., [`prima.cpp:1.0.2-cuda`](https://hub.docker.com/repository/docker/lizonghango00o1/prima.cpp/general)) and run 4 containers:
|
||||
|
||||
```shell
|
||||
sudo docker run -dit --name prima-v1 --memory=8gb --memory-swap=8gb --cpus 8 --cpuset-cpus="0-7" --network host --gpus all prima.cpp:1.0.1-cuda
|
||||
sudo docker run -dit --name prima-v2 --memory=8gb --memory-swap=8gb --cpus 8 --cpuset-cpus="8-15" --network host --gpus all prima.cpp:1.0.1-cuda
|
||||
sudo docker run -dit --name prima-v3 --memory=8gb --memory-swap=8gb --cpus 8 --cpuset-cpus="16-23" --network host --gpus all prima.cpp:1.0.1-cuda
|
||||
sudo docker run -dit --name prima-v4 --memory=8gb --memory-swap=8gb --cpus 8 --cpuset-cpus="24-31" --network host --gpus all prima.cpp:1.0.1-cuda
|
||||
sudo docker run -dit --name prima-v1 --memory=8gb --memory-swap=8gb --cpus 8 --cpuset-cpus="0-7" --network host --gpus all prima.cpp:1.0.2-cuda
|
||||
sudo docker run -dit --name prima-v2 --memory=8gb --memory-swap=8gb --cpus 8 --cpuset-cpus="8-15" --network host --gpus all prima.cpp:1.0.2-cuda
|
||||
sudo docker run -dit --name prima-v3 --memory=8gb --memory-swap=8gb --cpus 8 --cpuset-cpus="16-23" --network host --gpus all prima.cpp:1.0.2-cuda
|
||||
sudo docker run -dit --name prima-v4 --memory=8gb --memory-swap=8gb --cpus 8 --cpuset-cpus="24-31" --network host --gpus all prima.cpp:1.0.2-cuda
|
||||
```
|
||||
|
||||
2. Download the model file [`qwq-32b-q4_k_m.gguf`](https://huggingface.co/Qwen/QwQ-32B-GGUF) and copy it into each container:
|
||||
|
|
|
@ -21365,8 +21365,6 @@ void * llama_context_setup_backend(
|
|||
|| model->n_gpu_layers == 0) {
|
||||
continue;
|
||||
}
|
||||
#elif defined(GGML_USE_METAL)
|
||||
|
||||
#endif
|
||||
|
||||
ok = ok & ggml_backend_sched_reserve(ctx->sched[i], gf[i]);
|
||||
|
@ -21933,10 +21931,10 @@ void llama_model_compute_buf_size(
|
|||
const int64_t n_result = hparams.n_vocab * cparams.n_ubatch;
|
||||
|
||||
// weights
|
||||
const int64_t nb_output_w = n_bytes.nb_output_w;
|
||||
const int64_t nb_attn_norm_w = n_bytes.nb_attn_norm_w;
|
||||
const int64_t nb_attn_q_w = n_bytes.nb_attn_q_w;
|
||||
|
||||
// const int64_t nb_output_w = n_bytes.nb_output_w;
|
||||
|
||||
// format bytes
|
||||
const int64_t type_size_f32 = ggml_type_size(GGML_TYPE_F32);
|
||||
const int64_t type_size_f16 = ggml_type_size(GGML_TYPE_F16);
|
||||
|
|
Loading…
Add table
Reference in a new issue