kvcache-ai-ktransformers/ktransformers/configs/model_configs.json
2025-03-31 22:55:32 +08:00

122 lines
No EOL
3.6 KiB
JSON

{
"DeepSeek-Coder-V2-Instruct": {
"hidden_size": 5120,
"intermediate_size": 12288,
"max_position_embeddings": 163840,
"model_type": "deepseek_v2",
"num_attention_heads": 128,
"num_hidden_layers": 60,
"num_key_value_heads": 128,
"vocab_size": 102400
},
"DeepSeek-R1": {
"hidden_size": 7168,
"intermediate_size": 18432,
"max_position_embeddings": 163840,
"model_type": "deepseek_v3",
"num_attention_heads": 128,
"num_hidden_layers": 61,
"num_key_value_heads": 128,
"vocab_size": 129280
},
"DeepSeek-V2-Lite-Chat": {
"hidden_size": 2048,
"intermediate_size": 10944,
"max_position_embeddings": 163840,
"model_type": "deepseek_v2",
"num_attention_heads": 16,
"num_hidden_layers": 27,
"num_key_value_heads": 16,
"vocab_size": 102400
},
"DeepSeek-V3": {
"hidden_size": 7168,
"intermediate_size": 18432,
"max_position_embeddings": 163840,
"model_type": "deepseek_v3",
"num_attention_heads": 128,
"num_hidden_layers": 3,
"num_key_value_heads": 128,
"vocab_size": 129280
},
"DeepSeek-V3-bf16": {
"hidden_size": 7168,
"intermediate_size": 18432,
"max_position_embeddings": 163840,
"model_type": "deepseek_v3",
"num_attention_heads": 128,
"num_hidden_layers": 61,
"num_key_value_heads": 128,
"vocab_size": 129280
},
"LLaMA-2-7B-32K": {
"hidden_size": 4096,
"intermediate_size": 11008,
"max_position_embeddings": 32768,
"model_type": "llama",
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 32,
"vocab_size": 32000
},
"Moonlight-16B-A3B-Instruct": {
"hidden_size": 2048,
"intermediate_size": 11264,
"max_position_embeddings": 8192,
"model_type": "deepseek_v3",
"num_attention_heads": 16,
"num_hidden_layers": 27,
"num_key_value_heads": 16,
"vocab_size": 163840
},
"Qwen2.5-32B-Instruct": {
"hidden_size": 5120,
"intermediate_size": 27648,
"max_position_embeddings": 32768,
"model_type": "qwen2",
"num_attention_heads": 40,
"num_hidden_layers": 64,
"num_key_value_heads": 8,
"vocab_size": 152064
},
"Qwen2.5-32B-Instruct-GPTQ-Int4": {
"hidden_size": 5120,
"intermediate_size": 27648,
"max_position_embeddings": 32768,
"model_type": "qwen2",
"num_attention_heads": 40,
"num_hidden_layers": 64,
"num_key_value_heads": 8,
"vocab_size": 152064
},
"Qwen2.5-7B-Instruct": {
"hidden_size": 3584,
"intermediate_size": 18944,
"max_position_embeddings": 32768,
"model_type": "qwen2",
"num_attention_heads": 28,
"num_hidden_layers": 28,
"num_key_value_heads": 4,
"vocab_size": 152064
},
"Qwen2.5-7B-Instruct-GPTQ-Int4": {
"hidden_size": 3584,
"intermediate_size": 18944,
"max_position_embeddings": 32768,
"model_type": "qwen2",
"num_attention_heads": 28,
"num_hidden_layers": 28,
"num_key_value_heads": 4,
"vocab_size": 152064
},
"qwen2-72b-instruct": {
"hidden_size": 8192,
"intermediate_size": 29568,
"max_position_embeddings": 32768,
"model_type": "qwen2",
"num_attention_heads": 64,
"num_hidden_layers": 80,
"num_key_value_heads": 8,
"vocab_size": 152064
}
}