mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-05-02 21:51:30 +00:00
122 lines
No EOL
3.6 KiB
JSON
122 lines
No EOL
3.6 KiB
JSON
{
|
|
"DeepSeek-Coder-V2-Instruct": {
|
|
"hidden_size": 5120,
|
|
"intermediate_size": 12288,
|
|
"max_position_embeddings": 163840,
|
|
"model_type": "deepseek_v2",
|
|
"num_attention_heads": 128,
|
|
"num_hidden_layers": 60,
|
|
"num_key_value_heads": 128,
|
|
"vocab_size": 102400
|
|
},
|
|
"DeepSeek-R1": {
|
|
"hidden_size": 7168,
|
|
"intermediate_size": 18432,
|
|
"max_position_embeddings": 163840,
|
|
"model_type": "deepseek_v3",
|
|
"num_attention_heads": 128,
|
|
"num_hidden_layers": 61,
|
|
"num_key_value_heads": 128,
|
|
"vocab_size": 129280
|
|
},
|
|
"DeepSeek-V2-Lite-Chat": {
|
|
"hidden_size": 2048,
|
|
"intermediate_size": 10944,
|
|
"max_position_embeddings": 163840,
|
|
"model_type": "deepseek_v2",
|
|
"num_attention_heads": 16,
|
|
"num_hidden_layers": 27,
|
|
"num_key_value_heads": 16,
|
|
"vocab_size": 102400
|
|
},
|
|
"DeepSeek-V3": {
|
|
"hidden_size": 7168,
|
|
"intermediate_size": 18432,
|
|
"max_position_embeddings": 163840,
|
|
"model_type": "deepseek_v3",
|
|
"num_attention_heads": 128,
|
|
"num_hidden_layers": 3,
|
|
"num_key_value_heads": 128,
|
|
"vocab_size": 129280
|
|
},
|
|
"DeepSeek-V3-bf16": {
|
|
"hidden_size": 7168,
|
|
"intermediate_size": 18432,
|
|
"max_position_embeddings": 163840,
|
|
"model_type": "deepseek_v3",
|
|
"num_attention_heads": 128,
|
|
"num_hidden_layers": 61,
|
|
"num_key_value_heads": 128,
|
|
"vocab_size": 129280
|
|
},
|
|
"LLaMA-2-7B-32K": {
|
|
"hidden_size": 4096,
|
|
"intermediate_size": 11008,
|
|
"max_position_embeddings": 32768,
|
|
"model_type": "llama",
|
|
"num_attention_heads": 32,
|
|
"num_hidden_layers": 32,
|
|
"num_key_value_heads": 32,
|
|
"vocab_size": 32000
|
|
},
|
|
"Moonlight-16B-A3B-Instruct": {
|
|
"hidden_size": 2048,
|
|
"intermediate_size": 11264,
|
|
"max_position_embeddings": 8192,
|
|
"model_type": "deepseek_v3",
|
|
"num_attention_heads": 16,
|
|
"num_hidden_layers": 27,
|
|
"num_key_value_heads": 16,
|
|
"vocab_size": 163840
|
|
},
|
|
"Qwen2.5-32B-Instruct": {
|
|
"hidden_size": 5120,
|
|
"intermediate_size": 27648,
|
|
"max_position_embeddings": 32768,
|
|
"model_type": "qwen2",
|
|
"num_attention_heads": 40,
|
|
"num_hidden_layers": 64,
|
|
"num_key_value_heads": 8,
|
|
"vocab_size": 152064
|
|
},
|
|
"Qwen2.5-32B-Instruct-GPTQ-Int4": {
|
|
"hidden_size": 5120,
|
|
"intermediate_size": 27648,
|
|
"max_position_embeddings": 32768,
|
|
"model_type": "qwen2",
|
|
"num_attention_heads": 40,
|
|
"num_hidden_layers": 64,
|
|
"num_key_value_heads": 8,
|
|
"vocab_size": 152064
|
|
},
|
|
"Qwen2.5-7B-Instruct": {
|
|
"hidden_size": 3584,
|
|
"intermediate_size": 18944,
|
|
"max_position_embeddings": 32768,
|
|
"model_type": "qwen2",
|
|
"num_attention_heads": 28,
|
|
"num_hidden_layers": 28,
|
|
"num_key_value_heads": 4,
|
|
"vocab_size": 152064
|
|
},
|
|
"Qwen2.5-7B-Instruct-GPTQ-Int4": {
|
|
"hidden_size": 3584,
|
|
"intermediate_size": 18944,
|
|
"max_position_embeddings": 32768,
|
|
"model_type": "qwen2",
|
|
"num_attention_heads": 28,
|
|
"num_hidden_layers": 28,
|
|
"num_key_value_heads": 4,
|
|
"vocab_size": 152064
|
|
},
|
|
"qwen2-72b-instruct": {
|
|
"hidden_size": 8192,
|
|
"intermediate_size": 29568,
|
|
"max_position_embeddings": 32768,
|
|
"model_type": "qwen2",
|
|
"num_attention_heads": 64,
|
|
"num_hidden_layers": 80,
|
|
"num_key_value_heads": 8,
|
|
"vocab_size": 152064
|
|
}
|
|
} |