mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-04-28 03:30:20 +00:00
* hexagon: restore HTP_OPMASK_QUEUE * hexagon: honor OPMASK_SKIP_COMPUTE in hmx-matmul * hex-prof: restore op profiling * hex-prof: enable PMU * hexagon: simplify and improve op-queuing with full profiling support Add separate profile descriptors. * hexagon: remove opsync and rename opmask into opstage opsync is no longer needed since the profiler is fully async now. opmask name was confusing and opstage is more accurate. * hexagon: refactor opbatch queue handling * hexagon: add iface hooks for enabling profiler from the host Also move all the PMU setup stuff out of the hex-utils since it's not inteded for normal use. * hexagon: make profiler mode configurable On older devices getting PMU counters is expensive so it's now optional. * hexagon: add support for setting profiler pmu events from env * hexagon: simplify profiler output (no need to print buffs, etc) * hexagon: simplify pmu counter formating * hexagon: add a simple profile post-proc tool * hex-prof: add support for reading logs from stdin * hexagon: document GGML_HEXAGON_PROFILE * hex-prof: update default width for dims field * hex-prof: fix linter warnings and errors * Update ggml/src/ggml-hexagon/htp/htp-ops.h Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * Update scripts/snapdragon/ggml-hexagon-profile.py Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> --------- Co-authored-by: Trivikram Reddy <tamarnat@qti.qualcomm.com> Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
68 lines
1.6 KiB
Bash
Executable file
68 lines
1.6 KiB
Bash
Executable file
#!/bin/sh
|
|
#
|
|
|
|
# Basedir on device
|
|
basedir=/data/local/tmp/llama.cpp
|
|
|
|
cli_opts=
|
|
|
|
branch=.
|
|
[ "$B" != "" ] && branch=$B
|
|
|
|
adbserial=
|
|
[ "$S" != "" ] && adbserial="-s $S"
|
|
|
|
adbhost=
|
|
[ "$H" != "" ] && adbhost="-H $H"
|
|
|
|
model="Llama-3.2-3B-Instruct-Q4_0.gguf"
|
|
[ "$M" != "" ] && model="$M"
|
|
|
|
device="HTP0"
|
|
[ "$D" != "" ] && device="$D"
|
|
|
|
verbose=
|
|
[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V" cli_opts="$cli_opts -v"
|
|
|
|
sched=
|
|
[ "$SCHED" != "" ] && sched="GGML_SCHED_DEBUG=2" cli_opts="$cli_opts -v"
|
|
|
|
profile=
|
|
[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF" cli_opts="$cli_opts -v"
|
|
|
|
opmask=
|
|
[ "$OPSTAGE" != "" ] && opmask="GGML_HEXAGON_OPSTAGE=$OPSTAGE"
|
|
|
|
nhvx=
|
|
[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX"
|
|
|
|
hmx=
|
|
[ "$HMX" != "" ] && hmx="GGML_HEXAGON_USE_HMX=$HMX"
|
|
|
|
ndev=
|
|
[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV"
|
|
|
|
hb=
|
|
[ "$HB" != "" ] && hb="GGML_HEXAGON_HOSTBUF=$HB"
|
|
|
|
opbatch=
|
|
[ "$OB" != "" ] && opbatch="GGML_HEXAGON_OPBATCH=$OB"
|
|
|
|
opqueue=
|
|
[ "$OQ" != "" ] && opqueue="GGML_HEXAGON_OPQUEUE=$OQ"
|
|
|
|
opflt=
|
|
[ "$OF" != "" ] && opflt="GGML_HEXAGON_OPFILTER=$OF"
|
|
|
|
set -x
|
|
|
|
adb $adbserial $adbhost shell " \
|
|
cd $basedir; ulimit -c unlimited; \
|
|
LD_LIBRARY_PATH=$basedir/$branch/lib \
|
|
ADSP_LIBRARY_PATH=$basedir/$branch/lib \
|
|
$verbose $sched $opmask $profile $nhvx $hmx $ndev $hb $opbatch $opqueue $opflt \
|
|
./$branch/bin/llama-cli --no-mmap -m $basedir/../gguf/$model \
|
|
--poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \
|
|
--ctx-size 8192 --ubatch-size 256 -fa on \
|
|
-ngl 99 --device $device $cli_opts $@ \
|
|
"
|