|
ascend
|
处理检视意见
|
2025-10-23 11:28:42 +08:00 |
|
__init__.py
|
Initial commit
|
2024-07-27 16:06:58 +08:00 |
|
attention.py
|
add XPU support for qwen3moe local chat
|
2025-05-22 21:01:41 +08:00 |
|
balance_serve_attention.py
|
fix bug
|
2025-09-16 13:21:58 +00:00 |
|
dynamic_attention.py
|
merge main; Add torch q8 linear
|
2025-03-14 05:52:07 -04:00 |
|
experts.py
|
fix: 合并最新main, 解决冲突
|
2025-09-29 17:30:40 +08:00 |
|
flashinfer_wrapper.py
|
fix:修复balance_server tp=1 不开图下沉报错
|
2025-09-22 20:52:07 +08:00 |
|
gate.py
|
fix:修复balance_server tp=1 不开图下沉报错
|
2025-09-22 20:52:07 +08:00 |
|
layernorm.py
|
support qwen3 next
|
2025-09-11 09:56:21 +00:00 |
|
linear.py
|
fix:修复balance_server tp=1 不开图下沉报错
|
2025-09-22 20:52:07 +08:00 |
|
mlp.py
|
support smt and qlm4
|
2025-07-25 12:48:51 +00:00 |
|
models.py
|
fix transformers local_chat
|
2025-10-23 17:51:19 +08:00 |
|
RoPE.py
|
support smt and qlm4
|
2025-07-25 12:48:51 +00:00 |
|
triton_attention.py
|
merge main; Add torch q8 linear
|
2025-03-14 05:52:07 -04:00 |
|
triton_attention_prefill.py
|
merge main; Add torch q8 linear
|
2025-03-14 05:52:07 -04:00 |