mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-06-01 06:00:36 +00:00
hexagon: add support for Q4_1 in MUL_MAT and MUL_MAT_ID (#23647)
* hex-mm: add support for Q4_1 matmul/matvec, hvx-only for now * hmx-mm: add support for Q4_1 * hex-mm: use Q8_1 dynamic quantization to avoid having to compute sums in the vec_dot * hexagon: fix repack scratch buffer overflow * hex-mm: fix Q4_1 repack buffer sizing * hexagon: flip the build order for mm and fa (seems to help LTO) * hex-mm: add vec_dot 4x1s and minor HMX cleanup after adding Q4_1 * hex-mm: fix fp16 vec_dot fallback to 2x1 and another issue that could cause incorrect output * hexagon: resurrect early-wake and add support for polling for op-batch completions With Q4_1 ggml-hexagon now claims pretty much the entire graphs which gives the CPU more time to chilax. This is a good thing! But it does add extra latency for the pure benchmark runs. Early wakeup helps recover the latency a bit in the normals runs and op-batch polling is just for benchmarking. --------- Co-authored-by: Todor Boinovski <todorb@qti.qualcomm.com>
This commit is contained in:
parent
c40006a62e
commit
aa50b2c2ae
8 changed files with 1890 additions and 74 deletions
|
|
@ -51,6 +51,9 @@ opbatch=
|
|||
opqueue=
|
||||
[ "$OQ" != "" ] && opqueue="GGML_HEXAGON_OPQUEUE=$OQ"
|
||||
|
||||
oppoll=
|
||||
[ "$OP" != "" ] && oppoll="GGML_HEXAGON_OPPOLL=$OP"
|
||||
|
||||
opflt=
|
||||
[ "$OF" != "" ] && opflt="GGML_HEXAGON_OPFILTER=$OF"
|
||||
|
||||
|
|
@ -66,7 +69,7 @@ adb $adbserial $adbhost shell " \
|
|||
cd $basedir; ulimit -c unlimited; \
|
||||
LD_LIBRARY_PATH=$basedir/$branch/lib \
|
||||
ADSP_LIBRARY_PATH=$basedir/$branch/lib \
|
||||
$verbose $sched $opmask $profile $nhvx $hmx $ndev $hb $opbatch $opqueue $opflt $vmem $mbuf \
|
||||
$verbose $sched $opmask $profile $nhvx $hmx $ndev $hb $opbatch $opqueue $oppoll $opflt $vmem $mbuf \
|
||||
./$branch/bin/llama-completion --no-mmap -m $basedir/../gguf/$model \
|
||||
--poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \
|
||||
--ctx-size 8192 --ubatch-size 1024 -fa on \
|
||||
|
|
|
|||
|
|
@ -42,6 +42,15 @@ ndev=
|
|||
hb=
|
||||
[ "$HB" != "" ] && hb="GGML_HEXAGON_HOSTBUF=$HB"
|
||||
|
||||
opbatch=
|
||||
[ "$OB" != "" ] && opbatch="GGML_HEXAGON_OPBATCH=$OB"
|
||||
|
||||
opqueue=
|
||||
[ "$OQ" != "" ] && opqueue="GGML_HEXAGON_OPQUEUE=$OQ"
|
||||
|
||||
oppoll=
|
||||
[ "$OP" != "" ] && oppoll="GGML_HEXAGON_OPPOLL=$OP"
|
||||
|
||||
set -x
|
||||
|
||||
tool=$1; shift
|
||||
|
|
@ -50,5 +59,5 @@ adb $adbserial $adbhost shell " \
|
|||
cd $basedir; ulimit -c unlimited; \
|
||||
LD_LIBRARY_PATH=$basedir/$branch/lib \
|
||||
ADSP_LIBRARY_PATH=$basedir/$branch/lib \
|
||||
$verbose $sched $opmask $profile $nhvx $hmx $ndev $hb ./$branch/bin/$tool $@ \
|
||||
$verbose $sched $opmask $profile $nhvx $hmx $ndev $hb $opbatch $opqueue $oppoll ./$branch/bin/$tool $@ \
|
||||
"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue