mirror of
https://github.com/ruvnet/RuVector.git
synced 2026-05-30 20:43:38 +00:00
feat: Add GLM-4.7-Flash GGUF tensor mapping, MLA attention, and model validation
- TensorNameMapper resolves both llama.cpp (blk.*) and HuggingFace (model.layers.*) naming - MLA (Multi-Head Latent Attention) with low-rank Q/KV compression (DeepSeek-V2 style) - Stacked 3D expert tensor support (ffn_gate_exps → per-expert slicing) - Shared expert + dense layer-0 support (MoeWithShared/Dense/Moe layer types) - Updated BitNetModelConfig defaults to match GLM-4.7-Flash architecture - Tensor discovery and model validation harness for GGUF files - 188 passing tests (14 new) https://claude.ai/code/session_011nTcGcn49b8YKJRVoh4TaK
This commit is contained in:
parent
acfb352f40
commit
4ae1ad9182
2 changed files with 1579 additions and 224 deletions
File diff suppressed because it is too large
Load diff
|
|
@ -80,7 +80,10 @@ pub use rlm_embedder::{
|
|||
RlmEmbeddingResult,
|
||||
};
|
||||
pub use rlm_refiner::{RefinementResult, RefinementStepMetrics, RlmRefiner, RlmRefinerConfig};
|
||||
pub use backend::{BitNetBackend, BitNetModelConfig};
|
||||
pub use backend::{
|
||||
BitNetBackend, BitNetModelConfig, ModelValidation, TensorDiscoveryReport, TensorEntry,
|
||||
TensorGroup,
|
||||
};
|
||||
pub use expert_cache::{
|
||||
ExpertBatch, ExpertCache, ExpertCacheConfig, ExpertCacheStats, EvictionPolicy,
|
||||
MoeBatchScheduler, NullPrefetcher, Prefetcher,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue