__init__.py
|
Initial commit
|
2024-07-27 16:06:58 +08:00 |
attention.py
|
Update attention.py
|
2025-04-09 10:54:00 +08:00 |
dynamic_attention.py
|
merge main; Add torch q8 linear
|
2025-03-14 05:52:07 -04:00 |
experts.py
|
support AMX
|
2025-04-25 14:47:16 +00:00 |
gate.py
|
rm KMoEGateDeepSeekV3, fall back to KMoEGate
|
2025-04-01 07:13:05 +00:00 |
layernorm.py
|
add balance-serve, support concurrence
|
2025-03-31 22:55:32 +08:00 |
linear.py
|
fix some bugs
|
2025-04-17 00:48:09 +08:00 |
mlp.py
|
add balance-serve, support concurrence
|
2025-03-31 22:55:32 +08:00 |
models.py
|
merge main; Add torch q8 linear
|
2025-03-14 05:52:07 -04:00 |
RoPE.py
|
add balance-serve, support concurrence
|
2025-03-31 22:55:32 +08:00 |
triton_attention.py
|
merge main; Add torch q8 linear
|
2025-03-14 05:52:07 -04:00 |
triton_attention_prefill.py
|
merge main; Add torch q8 linear
|
2025-03-14 05:52:07 -04:00 |