.. |
__init__.py
|
Initial commit
|
2024-07-27 16:06:58 +08:00 |
attention.py
|
Merge pull request #1276 from kvcache-ai/support_load_safetensor
|
2025-05-12 11:10:26 +08:00 |
balance_serve_attention.py
|
support safetensor load, delete architectures argument
|
2025-05-09 10:38:29 +00:00 |
base_operator.py
|
support safetensor load, delete architectures argument
|
2025-05-09 10:38:29 +00:00 |
cpuinfer.py
|
cpuinfer: filter repeated backend instantiation
|
2025-03-10 22:03:04 +08:00 |
dynamic_attention.py
|
merge main; Add torch q8 linear
|
2025-03-14 05:52:07 -04:00 |
experts.py
|
support safetensor load, delete architectures argument
|
2025-05-09 10:38:29 +00:00 |
flashinfer_batch_prefill_wrapper.py
|
support safetensor load, delete architectures argument
|
2025-05-09 10:38:29 +00:00 |
flashinfer_wrapper.py
|
fix-hopper-flashinfer
|
2025-04-29 11:06:34 +08:00 |
gate.py
|
support safetensor load, delete architectures argument
|
2025-05-09 10:38:29 +00:00 |
layernorm.py
|
support safetensor load, delete architectures argument
|
2025-05-09 10:38:29 +00:00 |
linear.py
|
support safetensor load, delete architectures argument
|
2025-05-09 10:38:29 +00:00 |
mlp.py
|
support safetensor load, delete architectures argument
|
2025-05-09 10:38:29 +00:00 |
models.py
|
Merge pull request #1276 from kvcache-ai/support_load_safetensor
|
2025-05-12 11:10:26 +08:00 |
RoPE.py
|
support safetensor load, delete architectures argument
|
2025-05-09 10:38:29 +00:00 |
triton_attention.py
|
merge main; Add torch q8 linear
|
2025-03-14 05:52:07 -04:00 |
triton_attention_prefill.py
|
merge main; Add torch q8 linear
|
2025-03-14 05:52:07 -04:00 |