Enable testing on Snapdragon devices (#21051)

* Add the tests that we want to run on external CI * remove extra files * Fixes python issues, reove the deadlock on CI * remove unecessary changes * use override to ty.toml * fix pre-commit and try tests with secret in external repo not upstream * skip if key is unavailable * Fix feedback * switch hexagon to snapdragon * cleanup * fix secrets * remove the copyrights at the top of the files
2026-05-19 08:00:25 +00:00 · 2026-04-23 13:08:10 -07:00 · 2026-04-23 13:08:10 -07:00 · 187a456370
commit 187a456370
parent 185cbff6f1
11 changed files with 764 additions and 99 deletions
--- a/.github/workflows/build-and-test-snapdragon.yml
+++ b/.github/workflows/build-and-test-snapdragon.yml
@ -0,0 +1,113 @@
+name: CI (snapdragon)
+
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - master
+    paths:
+      - '.github/workflows/build-and-test-snapdragon.yml'
+      - 'ggml/include/ggml-hexagon.h'
+      - 'ggml/src/ggml-hexagon/**'
+      - 'docs/backend/snapdragon/**'
+      - 'scripts/snapdragon/**'
+      - 'CMakePresets.json'
+
+  pull_request:
+    types: [opened, synchronize, reopened]
+    paths:
+      - '.github/workflows/build-and-test-snapdragon.yml'
+      - 'ggml/include/ggml-hexagon.h'
+      - 'ggml/src/ggml-hexagon/**'
+      - 'docs/backend/snapdragon/**'
+      - 'scripts/snapdragon/**'
+      - 'CMakePresets.json'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  android-ndk-snapdragon:
+    runs-on: ubuntu-latest
+    container:
+      image: 'ghcr.io/snapdragon-toolchain/arm64-android:v0.3'
+    defaults:
+      run:
+        shell: bash
+
+    steps:
+      - name: Clone
+        uses: actions/checkout@v6
+        with:
+          fetch-depth: 0
+          lfs: false
+
+      - name: Build Llama.CPP for Snapdragon Android
+        id: build_llama_cpp_snapdragon_android
+        run: |
+          cp docs/backend/snapdragon/CMakeUserPresets.json .
+          cmake --preset arm64-android-snapdragon-release -B build
+          cmake --build build
+          cmake --install build --prefix pkg-adb/llama.cpp
+
+      - name: Upload Llama.CPP Snapdragon Android Build Artifact
+        if: ${{ always() && steps.build_llama_cpp_snapdragon_android.outcome == 'success' }}
+        uses: actions/upload-artifact@v6
+        with:
+          name: llama-cpp-android-arm64-snapdragon
+          path: pkg-adb/llama.cpp
+
+  check-secret:
+    runs-on: ubuntu-latest
+    outputs:
+      has-key: ${{ steps.check.outputs.has-key }}
+    steps:
+      - id: check
+        run: echo "has-key=${{ secrets.QDC_API_KEY != '' }}" >> "$GITHUB_OUTPUT"
+
+  test-snapdragon-qdc:
+    name: Test on QDC Android Device (${{ matrix.device }})
+    needs: [android-ndk-snapdragon, check-secret]
+    if: needs.check-secret.outputs.has-key == 'true'
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        device: [SM8750, SM8650, SM8850]
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v6
+
+      - name: Download build artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: llama-cpp-android-arm64-snapdragon
+          path: pkg-snapdragon/
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.x'
+          cache: pip
+
+      - name: Install QDC SDK wheel
+        run: |
+          curl -fSL -o qdc_sdk.zip https://softwarecenter.qualcomm.com/api/download/software/tools/Qualcomm_Device_Cloud_SDK/All/0.2.3/qualcomm_device_cloud_sdk-0.2.3.zip
+          unzip qdc_sdk.zip -d qdc_sdk
+          pip install qdc_sdk/qualcomm_device_cloud_sdk-0.2.3-py3-none-any.whl
+
+      - name: Run QDC tests (${{ matrix.device }})
+        run: |
+          python scripts/snapdragon/qdc/run_qdc_jobs.py \
+              --test       all \
+              --pkg-dir    pkg-snapdragon/llama.cpp \
+              --model-url  "https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q4_0.gguf" \
+              --device     ${{ matrix.device }}
+        env:
+          QDC_API_KEY: ${{ secrets.QDC_API_KEY }}
+
+      - name: Cleanup
+        if: always()
+        run: rm -rf pkg-snapdragon qdc_sdk qdc_sdk.zip
--- a/.github/workflows/build-android.yml
+++ b/.github/workflows/build-android.yml
@ -1,26 +1,24 @@
 name: CI (android)

 on:
-  workflow_dispatch: # allows manual triggering
+  workflow_dispatch:
  push:
    branches:
      - master
-    paths: [
-      '.github/workflows/build-android.yml',
-      '**/CMakeLists.txt',
-      '**/.cmake',
-      '**/*.h',
-      '**/*.hpp',
-      '**/*.c',
-      '**/*.cpp'
-    ]
+    paths:
+      - '.github/workflows/build-android.yml'
+      - '**/CMakeLists.txt'
+      - '**/.cmake'
+      - '**/*.h'
+      - '**/*.hpp'
+      - '**/*.c'
+      - '**/*.cpp'

  pull_request:
    types: [opened, synchronize, reopened]
-    paths: [
-      '.github/workflows/build-android.yml',
-      'examples/llama.android/**'
-    ]
+    paths:
+      - '.github/workflows/build-android.yml'
+      - 'examples/llama.android/**'

 concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
@ -67,35 +65,24 @@ jobs:
    defaults:
      run:
        shell: bash
-    strategy:
-      matrix:
-        include:
-          - build: 'arm64-cpu'
-            defines: '-D ANDROID_ABI=arm64-v8a -D ANDROID_PLATFORM=android-31 -D CMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -D GGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm -G Ninja -D LLAMA_OPENSSL=OFF -D GGML_OPENMP=OFF'
-          - build: 'arm64-snapdragon'
-            defines: '--preset arm64-android-snapdragon-release'

    steps:
      - name: Clone
-        id: checkout
        uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: false

-      - name: Build Llama.CPP for Hexagon Android
-        id: build_llama_cpp_hexagon_android
+      - name: Build
+        id: ndk_build
        run: |
-          if [[ "${{ matrix.build }}" == "arm64-snapdragon" ]]; then
-            cp docs/backend/snapdragon/CMakeUserPresets.json .
-          fi
-          cmake ${{ matrix.defines }} -B build
+          cmake -D ANDROID_ABI=arm64-v8a -D ANDROID_PLATFORM=android-31 -D CMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -D GGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm -G Ninja -D LLAMA_OPENSSL=OFF -D GGML_OPENMP=OFF -B build
          cmake --build build
          cmake --install build --prefix pkg-adb/llama.cpp

-      - name: Upload Llama.CPP Hexagon Android Build Artifact
-        if: ${{ always() && steps.build_llama_cpp_hexagon_android.outcome == 'success' }}
+      - name: Upload Android Build Artifact
+        if: ${{ always() && steps.ndk_build.outcome == 'success' }}
        uses: actions/upload-artifact@v6
        with:
-          name: llama-cpp-android-${{ matrix.build }}
+          name: llama-cpp-android-arm64-cpu
          path: pkg-adb/llama.cpp
--- a/scripts/snapdragon/qdc/readme.md
+++ b/scripts/snapdragon/qdc/readme.md
@ -1 +0,0 @@
-This directory includes pytest based scripts for running CI jobs on Qualcomm Device Cloud (QDC).
--- a/scripts/snapdragon/qdc/requirements.txt
+++ b/scripts/snapdragon/qdc/requirements.txt
@ -8,12 +8,9 @@ iniconfig==2.1.0
 outcome==1.3.0.post0
 packaging==25.0
 pluggy==1.6.0
-Pygments==2.19.2
 PySocks==1.7.1
 pytest==8.4.2
-pytest-dependency==0.6.0
 selenium==4.36.0
-setuptools==80.9.0
 sniffio==1.3.1
 sortedcontainers==2.4.0
 tomli==2.3.0
--- a/scripts/snapdragon/qdc/run_qdc_jobs.py
+++ b/scripts/snapdragon/qdc/run_qdc_jobs.py
@ -0,0 +1,401 @@
+"""Run llama.cpp Hexagon Android tests in a single QDC Appium job.
+
+Bundles test scripts into one artifact and submits a single QDC job:
+
+  1. run_bench_tests_posix.py — llama-cli and llama-bench on CPU / GPU / NPU
+                                (from scripts/snapdragon/qdc/)
+
+Results are written to $GITHUB_STEP_SUMMARY when set (GitHub Actions).
+
+Prerequisites:
+  pip install /path/to/qualcomm_device_cloud_sdk*.whl
+
+Required environment variables:
+  QDC_API_KEY   API key from QDC UI -> Users -> Settings -> API Keys
+
+Usage:
+  python run_qdc_jobs.py \\
+      --pkg-dir    pkg-snapdragon/llama.cpp \\
+      --model-url  https://.../Llama-3.2-1B-Instruct-Q4_0.gguf \\
+      --device     SM8750
+"""
+
+from __future__ import annotations
+
+import argparse
+import logging
+import os
+import re
+import shutil
+import sys
+import tempfile
+import time
+import xml.etree.ElementTree as ET
+from dataclasses import dataclass, field
+from pathlib import Path
+
+from qualcomm_device_cloud_sdk.api import qdc_api  # ty: ignore[unresolved-import]
+from qualcomm_device_cloud_sdk.logging import configure_logging  # ty: ignore[unresolved-import]
+from qualcomm_device_cloud_sdk.models import ArtifactType, JobMode, JobState, JobSubmissionParameter, JobType, TestFramework  # ty: ignore[unresolved-import]
+
+configure_logging(level=logging.INFO, handlers=[logging.StreamHandler()])
+log = logging.getLogger(__name__)
+
+POLL_INTERVAL        = 30
+JOB_TIMEOUT          = 3600
+LOG_UPLOAD_TIMEOUT   = 600
+CAPACITY_TIMEOUT     = 1800
+CAPACITY_POLL        = 60
+MAX_CONCURRENT_JOBS  = 5
+TERMINAL_STATES     = {JobState.COMPLETED, JobState.CANCELED}
+NON_TERMINAL_STATES = {JobState.DISPATCHED, JobState.RUNNING, JobState.SETUP, JobState.SUBMITTED}
+
+_SCRIPTS_DIR      = Path(__file__).parent
+_TESTS_DIR        = _SCRIPTS_DIR / "tests"
+_RUN_BENCH        = _TESTS_DIR / "run_bench_tests_posix.py"
+_RUN_BACKEND_OPS  = _TESTS_DIR / "run_backend_ops_posix.py"
+_UTILS            = _TESTS_DIR / "utils.py"
+_CONFTEST         = _TESTS_DIR / "conftest.py"
+_REQUIREMENTS     = _SCRIPTS_DIR / "requirements.txt"
+
+_PYTEST_LINE_RE = re.compile(
+    r"(?:[\w/]+\.py::)?(?:\w+::)?([\w\[\].-]+)\s+(PASSED|FAILED|ERROR|SKIPPED)"
+)
+_EXCLUDED_LOGS = {"qdc_android_whole_host-000.log", "qdc_kernel_host-000.log"}
+_NON_TERMINAL_STATE_VALUES = {s.value for s in NON_TERMINAL_STATES}
+
+
+@dataclass
+class JobResult:
+    passed: bool
+    tests: dict[str, bool] = field(default_factory=dict)
+    raw_logs: dict[str, str] = field(default_factory=dict)
+    failure_details: dict[str, str] = field(default_factory=dict)
+
+
+def build_artifact_zip(
+    pkg_dir: Path,
+    stage_dir: Path,
+    *,
+    test_mode: str = "bench",
+    model_url: str | None = None,
+) -> Path:
+    """Bundle everything into a single QDC artifact zip.
+
+    Zip structure (extracted by QDC to /qdc/appium/ on the runner):
+      llama_cpp_bundle/            installed package (adb pushed to /data/local/tmp/)
+      tests/
+        utils.py                   shared helpers (paths, run_adb_command, …)
+        conftest.py                shared pytest fixtures (driver)
+        test_bench_posix.py        bench + cli tests (<<MODEL_URL>> substituted)
+          AND/OR
+        test_backend_ops_posix.py  test-backend-ops -b HTP0
+      requirements.txt
+    """
+    shutil.copytree(pkg_dir, stage_dir / "llama_cpp_bundle")
+
+    tests_dir = stage_dir / "tests"
+    tests_dir.mkdir()
+
+    shutil.copy(_UTILS,    tests_dir / "utils.py")
+    shutil.copy(_CONFTEST, tests_dir / "conftest.py")
+
+    if test_mode in ("bench", "all"):
+        assert model_url is not None, "--model-url is required for bench/all test modes"
+        (tests_dir / "test_bench_posix.py").write_text(
+            _RUN_BENCH.read_text().replace("<<MODEL_URL>>", model_url)
+        )
+    if test_mode in ("backend-ops", "all"):
+        shutil.copy(_RUN_BACKEND_OPS, tests_dir / "test_backend_ops_posix.py")
+
+    shutil.copy(_REQUIREMENTS, stage_dir / "requirements.txt")
+    (stage_dir / "pytest.ini").write_text("[pytest]\naddopts = --junitxml=results.xml\n")
+
+    zip_base = str(stage_dir / "artifact")
+    shutil.make_archive(zip_base, "zip", stage_dir)
+    return Path(f"{zip_base}.zip")
+
+
+def wait_for_job(client, job_id: str, timeout: int) -> str:
+    elapsed = 0
+    while elapsed < timeout:
+        raw = qdc_api.get_job_status(client, job_id)
+        try:
+            status = JobState(raw)
+        except ValueError:
+            status = raw
+        if status in TERMINAL_STATES:
+            return raw.lower()
+        log.info("Job %s: %s", job_id, raw)
+        time.sleep(POLL_INTERVAL)
+        elapsed += POLL_INTERVAL
+    raise TimeoutError(f"Job {job_id} did not finish within {timeout}s")
+
+
+def wait_for_log_upload(client, job_id: str) -> None:
+    elapsed = 0
+    while elapsed <= LOG_UPLOAD_TIMEOUT:
+        status = (qdc_api.get_job_log_upload_status(client, job_id) or "").lower()
+        if status in {"completed", "failed"}:
+            return
+        log.info("Waiting for log upload (status=%s) ...", status)
+        time.sleep(POLL_INTERVAL)
+        elapsed += POLL_INTERVAL
+    log.warning("Timed out waiting for log upload after %ds", LOG_UPLOAD_TIMEOUT)
+
+
+def wait_for_capacity(client, max_jobs: int = MAX_CONCURRENT_JOBS) -> None:
+    """Block until the user's active (non-terminal) QDC job count is below max_jobs."""
+    elapsed = 0
+    while elapsed < CAPACITY_TIMEOUT:
+        jobs_page = qdc_api.get_jobs_list(client, page_number=0, page_size=50)
+        if jobs_page is None:
+            log.warning("Could not retrieve job list; proceeding without capacity check")
+            return
+        items = getattr(jobs_page, "data", []) or []
+        active = sum(1 for j in items if getattr(j, "state", None) in _NON_TERMINAL_STATE_VALUES)
+        if active < max_jobs:
+            log.info("Active QDC jobs: %d / %d — proceeding", active, max_jobs)
+            return
+        log.info("Active QDC jobs: %d / %d — waiting %ds ...", active, max_jobs, CAPACITY_POLL)
+        time.sleep(CAPACITY_POLL)
+        elapsed += CAPACITY_POLL
+    log.warning("Capacity wait timed out after %ds; proceeding anyway", CAPACITY_TIMEOUT)
+
+
+def _parse_junit_xml(content: str) -> tuple[dict[str, bool], dict[str, str]]:
+    try:
+        root = ET.fromstring(content)
+    except ET.ParseError:
+        return {}, {}
+    results: dict[str, bool] = {}
+    failures: dict[str, str] = {}
+    for tc in root.iter("testcase"):
+        name = tc.get("name", "")
+        if classname := tc.get("classname", ""):
+            name = f"{classname}.{name}"
+        failure_el = tc.find("failure")
+        if failure_el is None:
+            failure_el = tc.find("error")
+        results[name] = failure_el is None
+        if failure_el is not None:
+            parts = [failure_el.get("message", ""), failure_el.text or ""]
+            failures[name] = "\n".join(p for p in parts if p).strip()
+    return results, failures
+
+
+def _parse_pytest_output(content: str) -> dict[str, bool]:
+    results: dict[str, bool] = {}
+    for m in _PYTEST_LINE_RE.finditer(content):
+        results[m.group(1)] = m.group(2) == "PASSED"
+    return results
+
+
+def fetch_logs_and_parse_tests(
+    client, job_id: str
+) -> tuple[dict[str, bool], dict[str, str], dict[str, str]]:
+    """Returns (test_results, raw_logs, failure_details)."""
+    log_files = qdc_api.get_job_log_files(client, job_id)
+    if not log_files:
+        log.warning("No log files returned for job %s", job_id)
+        return {}, {}, {}
+
+    test_results: dict[str, bool] = {}
+    pytest_fallback: dict[str, bool] = {}
+    raw_logs: dict[str, str] = {}
+    failure_details: dict[str, str] = {}
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        for lf in log_files:
+            log.info("Downloading log file: %s", lf.filename)
+            zip_path = os.path.join(tmpdir, "log.zip")
+            qdc_api.download_job_log_files(client, lf.filename, zip_path)
+            try:
+                shutil.unpack_archive(zip_path, tmpdir, "zip")
+            except Exception as e:
+                log.warning("Could not unpack %s as zip: %s", lf.filename, e)
+
+        for root_dir, _, files in os.walk(tmpdir):
+            for fname in sorted(files):
+                fpath = os.path.join(root_dir, fname)
+                content = Path(fpath).read_text(errors="replace")
+                if fname.endswith(".xml"):
+                    results, failures = _parse_junit_xml(content)
+                    test_results.update(results)
+                    failure_details.update(failures)
+                elif fname.endswith(".log"):
+                    if fname in _EXCLUDED_LOGS:
+                        continue
+                    log.info("--- %s ---", fname)
+                    log.info("%s", content)
+                    raw_logs[fname] = content
+                    pytest_fallback.update(_parse_pytest_output(content))
+
+    return (test_results if test_results else pytest_fallback), raw_logs, failure_details
+
+
+def write_summary(result: JobResult, title: str = "QDC Test Results") -> None:
+    summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
+    if not summary_path:
+        return
+
+    icon = "✅" if result.passed else "❌"
+
+    lines = [
+        f"## {title}\n",
+        f"Overall: {icon} {'PASSED' if result.passed else 'FAILED'}\n",
+    ]
+    reportable = {n: ok for n, ok in result.tests.items() if "test_install" not in n}
+    if reportable:
+        lines += ["| Test | Result |", "| ---- | ------ |"]
+        for name, ok in reportable.items():
+            lines.append(f"| `{name}` | {'✅' if ok else '❌'} |")
+        passed_n = sum(1 for v in reportable.values() if v)
+        failed_n = sum(1 for v in reportable.values() if not v)
+        lines += ["", f"**{passed_n} passed, {failed_n} failed**"]
+    else:
+        lines.append("_No per-test data available._")
+
+    failed_names = [n for n, ok in reportable.items() if not ok]
+    if failed_names:
+        lines += ["", "### Failures"]
+        for name in failed_names:
+            detail = result.failure_details.get(name)
+            if detail:
+                lines += [
+                    f"<details><summary><code>{name}</code></summary>",
+                    "",
+                    "```",
+                    detail,
+                    "```",
+                    "",
+                    "</details>",
+                ]
+
+    if result.raw_logs:
+        lines += ["", "### Raw Logs"]
+        for fname, content in sorted(result.raw_logs.items()):
+            lines += [
+                f"<details><summary>{fname}</summary>",
+                "",
+                "```",
+                content.rstrip(),
+                "```",
+                "",
+                "</details>",
+            ]
+
+    with open(summary_path, "a") as f:
+        f.write("\n".join(lines) + "\n")
+
+
+def parse_args() -> argparse.Namespace:
+    p = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    p.add_argument("--pkg-dir",   required=True, type=Path,
+                   help="Installed llama.cpp package directory (contains bin/ and lib/)")
+    p.add_argument("--model-url",
+                   help="Direct URL to the GGUF model file (required for --test bench)")
+    p.add_argument("--device",    required=True,
+                   help="QDC chipset name, e.g. SM8750")
+    p.add_argument("--test", choices=["bench", "backend-ops", "all"], default="bench",
+                   help="Test suite to run (default: bench)")
+    p.add_argument("--job-timeout", type=int, default=JOB_TIMEOUT, metavar="SECONDS",
+                   help=f"Max seconds to wait for job completion (default: {JOB_TIMEOUT})")
+    args = p.parse_args()
+    if args.test in ("bench", "all") and not args.model_url:
+        p.error("--model-url is required when --test bench or --test all")
+    return args
+
+
+def main() -> int:
+    args = parse_args()
+
+    api_key = os.environ.get("QDC_API_KEY")
+    if not api_key:
+        log.error("QDC_API_KEY environment variable must be set")
+        return 1
+    if not args.pkg_dir.is_dir():
+        log.error("--pkg-dir %s does not exist", args.pkg_dir)
+        return 1
+
+    client = qdc_api.get_public_api_client_using_api_key(
+        api_key_header=api_key,
+        app_name_header="llama-cpp-ci",
+        on_behalf_of_header="llama-cpp-ci",
+        client_type_header="Python",
+    )
+
+    target_id = qdc_api.get_target_id(client, args.device)
+    if target_id is None:
+        log.error("Could not find QDC target for device %r", args.device)
+        return 1
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        log.info("Building artifact ...")
+        zip_path = build_artifact_zip(
+            args.pkg_dir, Path(tmpdir),
+            test_mode=args.test, model_url=args.model_url,
+        )
+        log.info("Uploading artifact (%d MB) ...", zip_path.stat().st_size // 1_000_000)
+        artifact_id = qdc_api.upload_file(client, str(zip_path), ArtifactType.TESTSCRIPT)
+
+    if artifact_id is None:
+        log.error("Artifact upload failed")
+        return 1
+
+    wait_for_capacity(client)
+
+    job_id = qdc_api.submit_job(
+        public_api_client=client,
+        target_id=target_id,
+        job_name="llama.cpp Hexagon tests",
+        external_job_id=None,
+        job_type=JobType.AUTOMATED,
+        job_mode=JobMode.APPLICATION,
+        timeout=max(1, args.job_timeout // 60),
+        test_framework=TestFramework.APPIUM,
+        entry_script=None,
+        job_artifacts=[artifact_id],
+        monkey_events=None,
+        monkey_session_timeout=None,
+        job_parameters=[JobSubmissionParameter.WIFIENABLED],
+    )
+    if job_id is None:
+        log.error("Job submission failed")
+        return 1
+    log.info("Job submitted: %s  (device=%s)", job_id, args.device)
+
+    try:
+        job_status = wait_for_job(client, job_id, timeout=args.job_timeout)
+    except TimeoutError as e:
+        log.error("%s", e)
+        write_summary(JobResult(passed=False, tests={}), title=f"QDC Job Timed Out ({args.device})")
+        return 1
+    log.info("Job %s finished: %s", job_id, job_status)
+
+    wait_for_log_upload(client, job_id)
+    tests, raw_logs, failure_details = fetch_logs_and_parse_tests(client, job_id)
+
+    passed = job_status == JobState.COMPLETED.value.lower()
+    if tests:
+        passed = passed and all(tests.values())
+    if not passed:
+        log.error("Job did not complete successfully or tests failed (status=%s)", job_status)
+
+    result = JobResult(passed=passed, tests=tests, raw_logs=raw_logs, failure_details=failure_details)
+    if args.test == "backend-ops":
+        title = f"Backend Ops — HTP0 ({args.device})"
+    elif args.test == "all":
+        title = f"QDC Tests ({args.device})"
+    else:
+        title = f"QDC Test Results ({args.device})"
+    write_summary(result, title=title)
+
+    return 0 if passed else 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/scripts/snapdragon/qdc/tests/conftest.py
+++ b/scripts/snapdragon/qdc/tests/conftest.py
@ -0,0 +1,20 @@
+"""Shared pytest fixtures for QDC on-device test runners."""
+
+import os
+
+import pytest
+from appium import webdriver
+
+from utils import options, write_qdc_log
+
+
+@pytest.fixture(scope="session", autouse=True)
+def driver():
+    return webdriver.Remote(command_executor="http://127.0.0.1:4723/wd/hub", options=options)
+
+
+def pytest_sessionfinish(session, exitstatus):
+    xml_path = getattr(session.config.option, "xmlpath", None) or "results.xml"
+    if os.path.exists(xml_path):
+        with open(xml_path) as f:
+            write_qdc_log("results.xml", f.read())
--- a/scripts/snapdragon/qdc/tests/run_backend_ops_posix.py
+++ b/scripts/snapdragon/qdc/tests/run_backend_ops_posix.py
@ -0,0 +1,41 @@
+"""
+On-device test-backend-ops runner for llama.cpp (HTP0 backend).
+
+Executed by QDC's Appium test framework on the QDC runner.
+The runner has ADB access to the allocated device.
+"""
+
+import os
+import sys
+
+import pytest
+
+from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_adb_command, write_qdc_log
+
+
+@pytest.fixture(scope="session", autouse=True)
+def install(driver):
+    push_bundle_if_needed(f"{BIN_PATH}/test-backend-ops")
+
+
+@pytest.mark.parametrize("type_a", ["mxfp4", "fp16", "q4_0"])
+def test_backend_ops_htp0(type_a):
+    cmd = f"{CMD_PREFIX} GGML_HEXAGON_HOSTBUF=0 GGML_HEXAGON_EXPERIMENTAL=1 {BIN_PATH}/test-backend-ops -b HTP0 -o MUL_MAT"
+    if type_a == "q4_0":
+        cmd += r' -p "^(?=.*type_a=q4_0)(?!.*type_b=f32,m=576,n=512,k=576).*$"'
+    else:
+        cmd += f" -p type_a={type_a}"
+    result = run_adb_command(
+        cmd,
+        check=False,
+    )
+    write_qdc_log(f"backend_ops_{type_a}.log", result.stdout or "")
+    assert result.returncode == 0, f"test-backend-ops type_a={type_a} failed (exit {result.returncode})"
+
+
+if __name__ == "__main__":
+    ret = pytest.main(["-s", "--junitxml=results.xml", os.path.realpath(__file__)])
+    if os.path.exists("results.xml"):
+        with open("results.xml") as f:
+            write_qdc_log("results.xml", f.read())
+    sys.exit(ret)
--- a/scripts/snapdragon/qdc/tests/run_bench_tests_posix.py
+++ b/scripts/snapdragon/qdc/tests/run_bench_tests_posix.py
@ -0,0 +1,76 @@
+"""
+On-device bench and completion test runner for llama.cpp (CPU, GPU, NPU backends).
+
+Executed by QDC's Appium test framework on the QDC runner.
+The runner has ADB access to the allocated device.
+
+Placeholders replaced at artifact creation time by run_qdc_jobs.py:
+  <<MODEL_URL>>  Direct URL to the GGUF model file (downloaded on-device via curl)
+"""
+
+import os
+import subprocess
+import sys
+
+import pytest
+
+from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_adb_command, write_qdc_log
+
+MODEL_PATH = "/data/local/tmp/model.gguf"
+PROMPT     = "What is the capital of France?"
+CLI_OPTS   = "--batch-size 128 -n 128 -no-cnv --seed 42"
+
+
+@pytest.fixture(scope="session", autouse=True)
+def install(driver):
+    push_bundle_if_needed(f"{BIN_PATH}/llama-cli")
+
+    # Skip model download if already present
+    check = subprocess.run(
+        ["adb", "shell", f"ls {MODEL_PATH}"],
+        text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+    )
+    if check.returncode != 0:
+        run_adb_command(f'curl -L -J --output {MODEL_PATH} "<<MODEL_URL>>"')
+
+
+@pytest.mark.parametrize("device,extra_flags", [
+    pytest.param("none",      "-ctk q8_0 -ctv q8_0", id="cpu"),
+    pytest.param("GPUOpenCL", "",                     id="gpu"),
+    pytest.param("HTP0",      "-ctk q8_0 -ctv q8_0", id="npu"),
+])
+def test_llama_completion(device, extra_flags):
+    result = run_adb_command(
+        f'{CMD_PREFIX} {BIN_PATH}/llama-completion'
+        f' -m {MODEL_PATH} --device {device} -ngl 99 -t 4 {CLI_OPTS} {extra_flags} -fa on'
+        f' -p "{PROMPT}"',
+        check=False,
+    )
+    write_qdc_log(f"llama_completion_{device}.log", result.stdout or "")
+    assert result.returncode == 0, f"llama-completion {device} failed (exit {result.returncode})"
+
+
+_DEVICE_LOG_NAME = {"none": "cpu", "GPUOpenCL": "gpu", "HTP0": "htp"}
+
+
+@pytest.mark.parametrize("device", [
+    pytest.param("none",      id="cpu"),
+    pytest.param("GPUOpenCL", id="gpu"),
+    pytest.param("HTP0",      id="npu"),
+])
+def test_llama_bench(device):
+    result = run_adb_command(
+        f"{CMD_PREFIX} {BIN_PATH}/llama-bench"
+        f" -m {MODEL_PATH} --device {device} -ngl 99 --batch-size 128 -t 4 -p 128 -n 32",
+        check=False,
+    )
+    write_qdc_log(f"llama_bench_{_DEVICE_LOG_NAME[device]}.log", result.stdout or "")
+    assert result.returncode == 0, f"llama-bench {device} failed (exit {result.returncode})"
+
+
+if __name__ == "__main__":
+    ret = pytest.main(["-s", "--junitxml=results.xml", os.path.realpath(__file__)])
+    if os.path.exists("results.xml"):
+        with open("results.xml") as f:
+            write_qdc_log("results.xml", f.read())
+    sys.exit(ret)
--- a/scripts/snapdragon/qdc/tests/test_bench.py
+++ b/scripts/snapdragon/qdc/tests/test_bench.py
@ -1,63 +0,0 @@
-import pytest
-import subprocess
-import sys
-
-tmp_path='/data/local/tmp'
-pkg_path=f'{tmp_path}/llama.cpp'
-lib_path=f'{pkg_path}/lib'
-bin_path=f'{pkg_path}/bin'
-
-model='../gguf/Llama-3.2-1B-Instruct-Q4_0.gguf'
-cli_pref=f'cd {pkg_path} && LD_LIBRARY_PATH={lib_path} ADSP_LIBRARY_PATH={lib_path} {bin_path}'
-
-
-def run_cmd(cmd):
-    p = subprocess.run(cmd, text = True, stdout = subprocess.PIPE, stderr = subprocess.STDOUT)
-    sys.stdout.write(p.stdout)
-    assert(p.returncode == 0)
-
-
-@pytest.mark.dependency()
-def test_install():
-    run_cmd(['adb', 'push', 'llama.cpp', f'{tmp_path}'])
-    run_cmd(['adb', 'shell', f'chmod 755 {bin_path}/*'])
-
-
-## Basic cli tests
-def run_llama_cli(dev, opts):
-    prompt='what is the most popular cookie in the world?\nPlease provide a very brief bullet point summary.\nBegin your answer with **BEGIN**.'
-    opts = '--batch-size 128 -n 128 -no-cnv --seed 42 ' + opts
-    run_cmd(['adb', 'shell', f'{cli_pref}/llama-cli -m {model} --device {dev} -ngl 99 -t 4 {opts} -p "{prompt}"'])
-
-
-@pytest.mark.dependency(depends=['test_install'])
-def test_llama_cli_cpu():
-    run_llama_cli('none', '-ctk q8_0 -ctv q8_0 -fa on')
-
-
-@pytest.mark.dependency(depends=['test_install'])
-def test_llama_cli_gpu():
-    run_llama_cli('GPUOpenCL', '-fa on')
-
-
-@pytest.mark.dependency(depends=['test_install'])
-def test_llama_cli_npu():
-    run_llama_cli('HTP0', '-ctk q8_0 -ctv q8_0 -fa on')
-
-
-## Basic bench tests
-def run_llama_bench(dev):
-    run_cmd(['adb', 'shell', f'{cli_pref}/llama-bench -m {model} --device {dev} -ngl 99 --batch-size 128 -t 4 -p 128 -n 32'])
-
-
-@pytest.mark.dependency(depends=['test_install'])
-def test_llama_bench_cpu():
-    run_llama_bench('none')
-
-
-def test_llama_bench_gpu():
-    run_llama_bench('GPUOpenCL')
-
-
-def test_llama_bench_npu():
-    run_llama_bench('HTP0')
--- a/scripts/snapdragon/qdc/tests/utils.py
+++ b/scripts/snapdragon/qdc/tests/utils.py
@ -0,0 +1,93 @@
+"""Shared helpers for QDC on-device test runners."""
+
+import logging
+import os
+import subprocess
+import tempfile
+
+from appium.options.common import AppiumOptions
+
+log = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# On-device paths
+# ---------------------------------------------------------------------------
+
+BUNDLE_PATH  = "/data/local/tmp/llama_cpp_bundle"
+QDC_LOGS_PATH = "/data/local/tmp/QDC_logs"
+LIB_PATH    = f"{BUNDLE_PATH}/lib"
+BIN_PATH    = f"{BUNDLE_PATH}/bin"
+ENV_PREFIX  = (
+    f"export LD_LIBRARY_PATH={LIB_PATH} && "
+    f"export ADSP_LIBRARY_PATH={LIB_PATH} && "
+    f"chmod +x {BIN_PATH}/* &&"
+)
+CMD_PREFIX  = f"cd {BUNDLE_PATH} && {ENV_PREFIX}"
+
+# ---------------------------------------------------------------------------
+# Appium session options
+# ---------------------------------------------------------------------------
+
+options = AppiumOptions()
+options.set_capability("automationName", "UiAutomator2")
+options.set_capability("platformName", "Android")
+options.set_capability("deviceName", os.getenv("ANDROID_DEVICE_VERSION"))
+
+# ---------------------------------------------------------------------------
+# ADB helpers
+# ---------------------------------------------------------------------------
+
+
+def run_adb_command(cmd: str, *, check: bool = True) -> subprocess.CompletedProcess:
+    # Append exit-code sentinel because `adb shell` doesn't reliably propagate
+    # the on-device exit code (older ADB versions always return 0).
+    raw = subprocess.run(
+        ["adb", "shell", f"{cmd}; echo __RC__:$?"],
+        text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+    )
+    stdout = raw.stdout
+    returncode = raw.returncode
+    if stdout:
+        lines = stdout.rstrip("\n").split("\n")
+        if lines and lines[-1].startswith("__RC__:"):
+            try:
+                returncode = int(lines[-1][7:])
+                stdout = "\n".join(lines[:-1]) + "\n"
+            except ValueError:
+                pass
+    log.info("%s", stdout)
+    result = subprocess.CompletedProcess(raw.args, returncode, stdout=stdout)
+    if check:
+        assert returncode == 0, f"Command failed (exit {returncode})"
+    return result
+
+
+def write_qdc_log(filename: str, content: str) -> None:
+    """Push content as a log file to QDC_LOGS_PATH on the device for QDC log collection."""
+    subprocess.run(
+        ["adb", "shell", f"mkdir -p {QDC_LOGS_PATH}"],
+        stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+    )
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False) as f:
+        f.write(content)
+        tmp_path = f.name
+    try:
+        subprocess.run(
+            ["adb", "push", tmp_path, f"{QDC_LOGS_PATH}/{filename}"],
+            stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+        )
+    finally:
+        os.unlink(tmp_path)
+
+
+def push_bundle_if_needed(check_binary: str) -> None:
+    """Push llama_cpp_bundle to the device if check_binary is not already present."""
+    result = subprocess.run(
+        ["adb", "shell", f"ls {check_binary}"],
+        text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+    )
+    if result.returncode != 0:
+        subprocess.run(
+            ["adb", "push", "/qdc/appium/llama_cpp_bundle/", "/data/local/tmp"],
+            text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+        )
--- a/ty.toml
+++ b/ty.toml
@ -1,5 +1,5 @@
 [environment]
-extra-paths = ["./gguf-py", "./examples/model-conversion/scripts", "./tools/server/tests"]
+extra-paths = ["./gguf-py", "./examples/model-conversion/scripts", "./tools/server/tests", "./scripts/snapdragon/qdc/tests"]
 python-version = "3.10"

 [rules]
@ -13,6 +13,7 @@ exclude = [
 [[overrides]]
 include = [
    "./tools/server/tests/**",
+    "./scripts/snapdragon/qdc/tests/**",
 ]

 [overrides.rules]
				`@ -1 +0,0 @@`
				`This directory includes pytest based scripts for running CI jobs on Qualcomm Device Cloud (QDC).`