mirror of
https://github.com/ruvnet/RuView.git
synced 2026-04-26 13:10:40 +00:00
feat: QEMU ESP32-S3 testing platform + swarm configurator (ADR-061/062) (#260)
9-layer QEMU testing platform (ADR-061) and YAML-driven swarm configurator (ADR-062) for ESP32-S3 firmware testing without hardware. 12 commits, 56 files, +9,500 lines. Tested on Windows with Espressif QEMU 9.0.0 — firmware boots, mock CSI generates frames, 14/16 validation checks pass. 39 bugs found and fixed across 2 deep code reviews. Closes #259 Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
parent
a467dfed9f
commit
523be943b0
57 changed files with 9532 additions and 8 deletions
355
.github/workflows/firmware-qemu.yml
vendored
Normal file
355
.github/workflows/firmware-qemu.yml
vendored
Normal file
|
|
@ -0,0 +1,355 @@
|
|||
name: Firmware QEMU Tests (ADR-061)
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- 'firmware/**'
|
||||
- 'scripts/qemu-esp32s3-test.sh'
|
||||
- 'scripts/validate_qemu_output.py'
|
||||
- 'scripts/generate_nvs_matrix.py'
|
||||
- 'scripts/qemu_swarm.py'
|
||||
- 'scripts/swarm_health.py'
|
||||
- 'scripts/swarm_presets/**'
|
||||
- '.github/workflows/firmware-qemu.yml'
|
||||
pull_request:
|
||||
paths:
|
||||
- 'firmware/**'
|
||||
- 'scripts/qemu-esp32s3-test.sh'
|
||||
- 'scripts/validate_qemu_output.py'
|
||||
- 'scripts/generate_nvs_matrix.py'
|
||||
- 'scripts/qemu_swarm.py'
|
||||
- 'scripts/swarm_health.py'
|
||||
- 'scripts/swarm_presets/**'
|
||||
- '.github/workflows/firmware-qemu.yml'
|
||||
|
||||
env:
|
||||
IDF_VERSION: "v5.4"
|
||||
QEMU_REPO: "https://github.com/espressif/qemu.git"
|
||||
QEMU_BRANCH: "esp-develop"
|
||||
|
||||
jobs:
|
||||
build-qemu:
|
||||
name: Build Espressif QEMU
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Cache QEMU build
|
||||
id: cache-qemu
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: /opt/qemu-esp32
|
||||
# Include date component so cache refreshes monthly when branch updates
|
||||
key: qemu-esp32s3-${{ env.QEMU_BRANCH }}-v4
|
||||
restore-keys: |
|
||||
qemu-esp32s3-${{ env.QEMU_BRANCH }}-
|
||||
|
||||
- name: Install QEMU build dependencies
|
||||
if: steps.cache-qemu.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y \
|
||||
git build-essential ninja-build pkg-config \
|
||||
libglib2.0-dev libpixman-1-dev libslirp-dev \
|
||||
python3 python3-venv
|
||||
|
||||
- name: Clone and build Espressif QEMU
|
||||
if: steps.cache-qemu.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
git clone --depth 1 -b "$QEMU_BRANCH" "$QEMU_REPO" /tmp/qemu-esp
|
||||
cd /tmp/qemu-esp
|
||||
mkdir build && cd build
|
||||
../configure \
|
||||
--target-list=xtensa-softmmu \
|
||||
--prefix=/opt/qemu-esp32 \
|
||||
--enable-slirp \
|
||||
--disable-werror
|
||||
ninja -j$(nproc)
|
||||
ninja install
|
||||
|
||||
- name: Verify QEMU binary
|
||||
run: |
|
||||
file_size() { stat -c%s "$1" 2>/dev/null || stat -f%z "$1" 2>/dev/null || wc -c < "$1"; }
|
||||
/opt/qemu-esp32/bin/qemu-system-xtensa --version
|
||||
echo "QEMU binary size: $(file_size /opt/qemu-esp32/bin/qemu-system-xtensa) bytes"
|
||||
|
||||
- name: Upload QEMU artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: qemu-esp32
|
||||
path: /opt/qemu-esp32/
|
||||
retention-days: 7
|
||||
|
||||
qemu-test:
|
||||
name: QEMU Test (${{ matrix.nvs_config }})
|
||||
needs: build-qemu
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: espressif/idf:v5.4
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
nvs_config:
|
||||
- default
|
||||
- full-adr060
|
||||
- edge-tier0
|
||||
- edge-tier1
|
||||
- tdm-3node
|
||||
- boundary-max
|
||||
- boundary-min
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Download QEMU artifact
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: qemu-esp32
|
||||
path: /opt/qemu-esp32
|
||||
|
||||
- name: Make QEMU executable
|
||||
run: chmod +x /opt/qemu-esp32/bin/qemu-system-xtensa
|
||||
|
||||
- name: Verify QEMU works
|
||||
run: /opt/qemu-esp32/bin/qemu-system-xtensa --version
|
||||
|
||||
- name: Install Python dependencies
|
||||
run: pip install esptool esp-idf-nvs-partition-gen
|
||||
|
||||
- name: Set target ESP32-S3
|
||||
working-directory: firmware/esp32-csi-node
|
||||
run: |
|
||||
. $IDF_PATH/export.sh
|
||||
idf.py set-target esp32s3
|
||||
|
||||
- name: Build firmware (mock CSI mode)
|
||||
working-directory: firmware/esp32-csi-node
|
||||
run: |
|
||||
. $IDF_PATH/export.sh
|
||||
idf.py \
|
||||
-D SDKCONFIG_DEFAULTS="sdkconfig.defaults;sdkconfig.qemu" \
|
||||
build
|
||||
|
||||
- name: Generate NVS matrix
|
||||
run: |
|
||||
python3 scripts/generate_nvs_matrix.py \
|
||||
--output-dir firmware/esp32-csi-node/build/nvs_matrix \
|
||||
--only ${{ matrix.nvs_config }}
|
||||
|
||||
- name: Create merged flash image
|
||||
working-directory: firmware/esp32-csi-node
|
||||
run: |
|
||||
. $IDF_PATH/export.sh
|
||||
|
||||
# Determine merge_bin arguments
|
||||
OTA_ARGS=""
|
||||
if [ -f build/ota_data_initial.bin ]; then
|
||||
OTA_ARGS="0xf000 build/ota_data_initial.bin"
|
||||
fi
|
||||
|
||||
python3 -m esptool --chip esp32s3 merge_bin \
|
||||
-o build/qemu_flash.bin \
|
||||
--flash_mode dio --flash_freq 80m --flash_size 8MB \
|
||||
0x0 build/bootloader/bootloader.bin \
|
||||
0x8000 build/partition_table/partition-table.bin \
|
||||
$OTA_ARGS \
|
||||
0x20000 build/esp32-csi-node.bin
|
||||
|
||||
file_size() { stat -c%s "$1" 2>/dev/null || stat -f%z "$1" 2>/dev/null || wc -c < "$1"; }
|
||||
echo "Flash image size: $(file_size build/qemu_flash.bin) bytes"
|
||||
|
||||
- name: Inject NVS partition
|
||||
if: matrix.nvs_config != 'default'
|
||||
working-directory: firmware/esp32-csi-node
|
||||
run: |
|
||||
NVS_BIN="build/nvs_matrix/nvs_${{ matrix.nvs_config }}.bin"
|
||||
if [ -f "$NVS_BIN" ]; then
|
||||
file_size() { stat -c%s "$1" 2>/dev/null || stat -f%z "$1" 2>/dev/null || wc -c < "$1"; }
|
||||
echo "Injecting NVS: $NVS_BIN ($(file_size "$NVS_BIN") bytes)"
|
||||
dd if="$NVS_BIN" of=build/qemu_flash.bin \
|
||||
bs=1 seek=$((0x9000)) conv=notrunc 2>/dev/null
|
||||
else
|
||||
echo "WARNING: NVS binary not found: $NVS_BIN"
|
||||
fi
|
||||
|
||||
- name: Run QEMU smoke test
|
||||
env:
|
||||
QEMU_PATH: /opt/qemu-esp32/bin/qemu-system-xtensa
|
||||
QEMU_TIMEOUT: "90"
|
||||
run: |
|
||||
echo "Starting QEMU (timeout: ${QEMU_TIMEOUT}s)..."
|
||||
|
||||
timeout "$QEMU_TIMEOUT" "$QEMU_PATH" \
|
||||
-machine esp32s3 \
|
||||
-nographic \
|
||||
-drive file=firmware/esp32-csi-node/build/qemu_flash.bin,if=mtd,format=raw \
|
||||
-serial mon:stdio \
|
||||
-nic user,model=open_eth,net=10.0.2.0/24 \
|
||||
-no-reboot \
|
||||
2>&1 | tee firmware/esp32-csi-node/build/qemu_output.log || true
|
||||
|
||||
echo "QEMU finished. Log size: $(wc -l < firmware/esp32-csi-node/build/qemu_output.log) lines"
|
||||
|
||||
- name: Validate QEMU output
|
||||
run: |
|
||||
python3 scripts/validate_qemu_output.py \
|
||||
firmware/esp32-csi-node/build/qemu_output.log
|
||||
|
||||
- name: Upload test logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: qemu-logs-${{ matrix.nvs_config }}
|
||||
path: |
|
||||
firmware/esp32-csi-node/build/qemu_output.log
|
||||
firmware/esp32-csi-node/build/nvs_matrix/
|
||||
retention-days: 14
|
||||
|
||||
fuzz-test:
|
||||
name: Fuzz Testing (ADR-061 Layer 6)
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install clang
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y clang
|
||||
|
||||
- name: Build fuzz targets
|
||||
working-directory: firmware/esp32-csi-node/test
|
||||
run: make all CC=clang
|
||||
|
||||
- name: Run serialize fuzzer (60s)
|
||||
working-directory: firmware/esp32-csi-node/test
|
||||
run: make run_serialize FUZZ_DURATION=60 || echo "FUZZER_CRASH=serialize" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Run edge enqueue fuzzer (60s)
|
||||
working-directory: firmware/esp32-csi-node/test
|
||||
run: make run_edge FUZZ_DURATION=60 || echo "FUZZER_CRASH=edge" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Run NVS config fuzzer (60s)
|
||||
working-directory: firmware/esp32-csi-node/test
|
||||
run: make run_nvs FUZZ_DURATION=60 || echo "FUZZER_CRASH=nvs" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Check for crashes
|
||||
working-directory: firmware/esp32-csi-node/test
|
||||
run: |
|
||||
CRASHES=$(find . -type f \( -name "crash-*" -o -name "oom-*" -o -name "timeout-*" \) 2>/dev/null | wc -l)
|
||||
echo "Crash artifacts found: $CRASHES"
|
||||
if [ "$CRASHES" -gt 0 ] || [ -n "${FUZZER_CRASH:-}" ]; then
|
||||
echo "::error::Fuzzer found $CRASHES crash/oom/timeout artifacts. FUZZER_CRASH=${FUZZER_CRASH:-none}"
|
||||
ls -la crash-* oom-* timeout-* 2>/dev/null
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Upload fuzz artifacts
|
||||
if: failure()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: fuzz-crashes
|
||||
path: |
|
||||
firmware/esp32-csi-node/test/crash-*
|
||||
firmware/esp32-csi-node/test/oom-*
|
||||
firmware/esp32-csi-node/test/timeout-*
|
||||
retention-days: 30
|
||||
|
||||
nvs-matrix-validate:
|
||||
name: NVS Matrix Generation
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install NVS generator
|
||||
run: pip install esp-idf-nvs-partition-gen
|
||||
|
||||
- name: Generate all 14 NVS configs
|
||||
run: |
|
||||
python3 scripts/generate_nvs_matrix.py \
|
||||
--output-dir build/nvs_matrix
|
||||
|
||||
- name: Verify all binaries generated
|
||||
run: |
|
||||
EXPECTED=14
|
||||
ACTUAL=$(find build/nvs_matrix -type f -name "nvs_*.bin" 2>/dev/null | wc -l)
|
||||
echo "Generated $ACTUAL / $EXPECTED NVS binaries"
|
||||
ls -la build/nvs_matrix/
|
||||
|
||||
if [ "$ACTUAL" -lt "$EXPECTED" ]; then
|
||||
echo "::error::Only $ACTUAL of $EXPECTED NVS binaries generated"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Verify binary sizes
|
||||
run: |
|
||||
file_size() { stat -c%s "$1" 2>/dev/null || stat -f%z "$1" 2>/dev/null || wc -c < "$1"; }
|
||||
for f in build/nvs_matrix/nvs_*.bin; do
|
||||
SIZE=$(file_size "$f")
|
||||
if [ "$SIZE" -ne 24576 ]; then
|
||||
echo "::error::$f has unexpected size $SIZE (expected 24576)"
|
||||
exit 1
|
||||
fi
|
||||
echo " OK: $(basename $f) ($SIZE bytes)"
|
||||
done
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ADR-062: QEMU Swarm Configurator Test
|
||||
#
|
||||
# Runs a lightweight 3-node swarm (ci_matrix preset) under QEMU to validate
|
||||
# multi-node orchestration, TDM slot coordination, and swarm-level health
|
||||
# assertions. Uses the pre-built QEMU binary from the build-qemu job and the
|
||||
# firmware built by qemu-test.
|
||||
#
|
||||
# The CI runner is non-root, so TAP bridge networking is unavailable.
|
||||
# The orchestrator (qemu_swarm.py) detects this and falls back to SLIRP
|
||||
# user-mode networking, which is sufficient for the ci_matrix preset.
|
||||
# ---------------------------------------------------------------------------
|
||||
swarm-test:
|
||||
name: Swarm Test (ADR-062)
|
||||
needs: [build-qemu]
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: espressif/idf:v5.4
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Download QEMU artifact
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: qemu-esp32
|
||||
path: ${{ github.workspace }}/qemu-build
|
||||
|
||||
- name: Make QEMU executable
|
||||
run: chmod +x ${{ github.workspace }}/qemu-build/bin/qemu-system-xtensa
|
||||
|
||||
- name: Install Python dependencies
|
||||
run: pip install pyyaml esptool esp-idf-nvs-partition-gen
|
||||
|
||||
- name: Build firmware for swarm
|
||||
working-directory: firmware/esp32-csi-node
|
||||
run: |
|
||||
. $IDF_PATH/export.sh
|
||||
idf.py set-target esp32s3
|
||||
idf.py -D SDKCONFIG_DEFAULTS="sdkconfig.defaults;sdkconfig.qemu" build
|
||||
python3 -m esptool --chip esp32s3 merge_bin \
|
||||
-o build/qemu_flash.bin \
|
||||
--flash_mode dio --flash_freq 80m --flash_size 8MB \
|
||||
0x0 build/bootloader/bootloader.bin \
|
||||
0x8000 build/partition_table/partition-table.bin \
|
||||
0x20000 build/esp32-csi-node.bin
|
||||
|
||||
- name: Run swarm smoke test
|
||||
run: |
|
||||
python3 scripts/qemu_swarm.py --preset ci_matrix \
|
||||
--qemu-path ${{ github.workspace }}/qemu-build/bin/qemu-system-xtensa \
|
||||
--output-dir build/swarm-results
|
||||
timeout-minutes: 10
|
||||
|
||||
- name: Upload swarm results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: swarm-results
|
||||
path: |
|
||||
build/swarm-results/
|
||||
retention-days: 14
|
||||
49
.vscode/launch.json
vendored
Normal file
49
.vscode/launch.json
vendored
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "QEMU ESP32-S3 Debug",
|
||||
"type": "cppdbg",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/firmware/esp32-csi-node/build/esp32-csi-node.elf",
|
||||
"cwd": "${workspaceFolder}/firmware/esp32-csi-node",
|
||||
"MIMode": "gdb",
|
||||
"miDebuggerPath": "xtensa-esp-elf-gdb",
|
||||
"miDebuggerServerAddress": "localhost:1234",
|
||||
"setupCommands": [
|
||||
{
|
||||
"description": "Set remote hardware breakpoint limit (ESP32-S3 has 2)",
|
||||
"text": "set remote hardware-breakpoint-limit 2",
|
||||
"ignoreFailures": false
|
||||
},
|
||||
{
|
||||
"description": "Set remote hardware watchpoint limit (ESP32-S3 has 2)",
|
||||
"text": "set remote hardware-watchpoint-limit 2",
|
||||
"ignoreFailures": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "QEMU ESP32-S3 Debug (attach)",
|
||||
"type": "cppdbg",
|
||||
"request": "attach",
|
||||
"program": "${workspaceFolder}/firmware/esp32-csi-node/build/esp32-csi-node.elf",
|
||||
"cwd": "${workspaceFolder}/firmware/esp32-csi-node",
|
||||
"MIMode": "gdb",
|
||||
"miDebuggerPath": "xtensa-esp-elf-gdb",
|
||||
"miDebuggerServerAddress": "localhost:1234",
|
||||
"setupCommands": [
|
||||
{
|
||||
"description": "Set remote hardware breakpoint limit (ESP32-S3 has 2)",
|
||||
"text": "set remote hardware-breakpoint-limit 2",
|
||||
"ignoreFailures": false
|
||||
},
|
||||
{
|
||||
"description": "Set remote hardware watchpoint limit (ESP32-S3 has 2)",
|
||||
"text": "set remote hardware-watchpoint-limit 2",
|
||||
"ignoreFailures": false
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
28
CHANGELOG.md
28
CHANGELOG.md
|
|
@ -8,6 +8,34 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
- **QEMU ESP32-S3 testing platform (ADR-061)** — 9-layer firmware testing without hardware
|
||||
- Mock CSI generator with 10 physics-based scenarios (empty room, walking, fall, multi-person, etc.)
|
||||
- Single-node QEMU runner with 16-check UART validation
|
||||
- Multi-node TDM mesh simulation (TAP networking, 2-6 nodes)
|
||||
- GDB remote debugging with VS Code integration
|
||||
- Code coverage via gcov/lcov + apptrace
|
||||
- Fuzz testing (3 libFuzzer targets + ASAN/UBSAN)
|
||||
- NVS provisioning matrix (14 configs)
|
||||
- Snapshot-based regression testing (sub-second VM restore)
|
||||
- Chaos testing with fault injection + health monitoring
|
||||
- **QEMU Swarm Configurator (ADR-062)** — YAML-driven multi-ESP32 test orchestration
|
||||
- 4 topologies: star, mesh, line, ring
|
||||
- 3 node roles: sensor, coordinator, gateway
|
||||
- 9 swarm-level assertions (boot, crashes, TDM, frame rate, fall detection, etc.)
|
||||
- 7 presets: smoke (2n/15s), standard (3n/60s), ci-matrix, large-mesh, line-relay, ring-fault, heterogeneous
|
||||
- Health oracle with cross-node validation
|
||||
- **QEMU installer** (`install-qemu.sh`) — auto-detects OS, installs deps, builds Espressif QEMU fork
|
||||
- **Unified QEMU CLI** (`qemu-cli.sh`) — single entry point for all 11 QEMU test commands
|
||||
- CI: `firmware-qemu.yml` workflow with QEMU test matrix, fuzz testing, NVS validation, and swarm test jobs
|
||||
- User guide: QEMU testing and swarm configurator section with plain-language walkthrough
|
||||
|
||||
### Fixed
|
||||
- Firmware now boots in QEMU: WiFi/UDP/OTA/display guards for mock CSI mode
|
||||
- 9 bugs in mock_csi.c (LFSR bias, MAC filter init, scenario loop, overflow burst timing)
|
||||
- 23 bugs from ADR-061 deep review (inject_fault.py writes, CI cache, snapshot log corruption, etc.)
|
||||
- 16 bugs from ADR-062 deep review (log filename mismatch, SLIRP port collision, heap false positives, etc.)
|
||||
- All scripts: `--help` flags, prerequisite checks with install hints, standardized exit codes
|
||||
|
||||
- **Sensing server UI API completion (ADR-043)** — 14 fully-functional REST endpoints for model management, CSI recording, and training control
|
||||
- Model CRUD: `GET /api/v1/models`, `GET /api/v1/models/active`, `POST /api/v1/models/load`, `POST /api/v1/models/unload`, `DELETE /api/v1/models/:id`, `GET /api/v1/models/lora/profiles`, `POST /api/v1/models/lora/activate`
|
||||
- CSI recording: `GET /api/v1/recording/list`, `POST /api/v1/recording/start`, `POST /api/v1/recording/stop`, `DELETE /api/v1/recording/:id`
|
||||
|
|
|
|||
80
README.md
80
README.md
|
|
@ -75,7 +75,7 @@ docker run -p 3000:3000 ruvnet/wifi-densepose:latest
|
|||
|----------|-------------|
|
||||
| [User Guide](docs/user-guide.md) | Step-by-step guide: installation, first run, API usage, hardware setup, training |
|
||||
| [Build Guide](docs/build-guide.md) | Building from source (Rust and Python) |
|
||||
| [Architecture Decisions](docs/adr/README.md) | 49 ADRs — why each technical choice was made, organized by domain (hardware, signal processing, ML, platform, infrastructure) |
|
||||
| [Architecture Decisions](docs/adr/README.md) | 62 ADRs — why each technical choice was made, organized by domain (hardware, signal processing, ML, platform, infrastructure) |
|
||||
| [Domain Models](docs/ddd/README.md) | 7 DDD models (RuvSense, Signal Processing, Training Pipeline, Hardware Platform, Sensing Server, WiFi-Mat, CHCI) — bounded contexts, aggregates, domain events, and ubiquitous language |
|
||||
| [Desktop App](rust-port/wifi-densepose-rs/crates/wifi-densepose-desktop/README.md) | **WIP** — Tauri v2 desktop app for node management, OTA updates, WASM deployment, and mesh visualization |
|
||||
|
||||
|
|
@ -1696,6 +1696,82 @@ WebSocket: `ws://localhost:3001/ws/sensing` (real-time sensing + vital signs)
|
|||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>QEMU Firmware Testing (ADR-061) — 9-Layer Platform</strong></summary>
|
||||
|
||||
Test ESP32-S3 firmware without physical hardware using Espressif's QEMU fork. The platform provides 9 layers of testing capability:
|
||||
|
||||
| Layer | Capability | Script / Config |
|
||||
|-------|-----------|-----------------|
|
||||
| 1 | Mock CSI generator (10 physics-based scenarios) | `firmware/esp32-csi-node/main/mock_csi.c` |
|
||||
| 2 | Single-node QEMU runner + UART validation (16 checks) | `scripts/qemu-esp32s3-test.sh`, `scripts/validate_qemu_output.py` |
|
||||
| 3 | Multi-node TDM mesh simulation (TAP networking) | `scripts/qemu-mesh-test.sh`, `scripts/validate_mesh_test.py` |
|
||||
| 4 | GDB remote debugging (VS Code integration) | `.vscode/launch.json` |
|
||||
| 5 | Code coverage (gcov/lcov via apptrace) | `firmware/esp32-csi-node/sdkconfig.coverage` |
|
||||
| 6 | Fuzz testing (libFuzzer + ASAN/UBSAN) | `firmware/esp32-csi-node/test/fuzz_*.c` |
|
||||
| 7 | NVS provisioning matrix (14 configs) | `scripts/generate_nvs_matrix.py` |
|
||||
| 8 | Snapshot regression (sub-second VM restore) | `scripts/qemu-snapshot-test.sh` |
|
||||
| 9 | Chaos testing (fault injection + health monitoring) | `scripts/qemu-chaos-test.sh`, `scripts/inject_fault.py`, `scripts/check_health.py` |
|
||||
|
||||
```bash
|
||||
# Quick start: build + run + validate
|
||||
cd firmware/esp32-csi-node
|
||||
idf.py -D SDKCONFIG_DEFAULTS="sdkconfig.defaults;sdkconfig.qemu" build
|
||||
|
||||
# Single-node test (builds, merges flash, runs QEMU, validates output)
|
||||
bash scripts/qemu-esp32s3-test.sh
|
||||
|
||||
# Multi-node mesh test (3 QEMU instances with TDM)
|
||||
sudo bash scripts/qemu-mesh-test.sh 3
|
||||
|
||||
# Fuzz testing (60 seconds per target)
|
||||
cd firmware/esp32-csi-node/test && make all CC=clang && make run_serialize FUZZ_DURATION=60
|
||||
|
||||
# Chaos testing (fault injection resilience)
|
||||
bash scripts/qemu-chaos-test.sh --faults all --duration 120
|
||||
```
|
||||
|
||||
**10 test scenarios**: empty room, static person, walking, fall, multi-person, channel sweep, MAC filter, ring overflow, boundary RSSI, zero-length frames.
|
||||
|
||||
**14 NVS configs**: default, WiFi-only, full ADR-060, edge tiers 0/1/2, TDM mesh, WASM signed/unsigned, 5GHz, boundary max/min, power-save, empty-strings.
|
||||
|
||||
**CI**: GitHub Actions workflow runs 7 NVS matrix configs, 3 fuzz targets, and NVS binary validation on every push to `firmware/`.
|
||||
|
||||
See [ADR-061](docs/adr/ADR-061-qemu-esp32s3-firmware-testing.md) for the full architecture.
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>QEMU Swarm Configurator (ADR-062)</strong></summary>
|
||||
|
||||
Test multiple ESP32-S3 nodes simultaneously using a YAML-driven orchestrator. Define node roles, network topologies, and validation assertions in a config file.
|
||||
|
||||
```bash
|
||||
# Quick smoke test (2 nodes, 15 seconds)
|
||||
python3 scripts/qemu_swarm.py --preset smoke
|
||||
|
||||
# Standard 3-node test (coordinator + 2 sensors)
|
||||
python3 scripts/qemu_swarm.py --preset standard
|
||||
|
||||
# See all presets
|
||||
python3 scripts/qemu_swarm.py --list-presets
|
||||
|
||||
# Preview without running
|
||||
python3 scripts/qemu_swarm.py --preset standard --dry-run
|
||||
```
|
||||
|
||||
**Topologies**: star (sensors → coordinator), mesh (fully connected), line (relay chain), ring (circular).
|
||||
|
||||
**Node roles**: sensor (generates CSI), coordinator (aggregates), gateway (bridges to host).
|
||||
|
||||
**7 presets**: smoke, standard, ci-matrix, large-mesh, line-relay, ring-fault, heterogeneous.
|
||||
|
||||
**9 swarm assertions**: boot check, crash detection, TDM collision, frame production, coordinator reception, fall detection, frame rate, boot time, heap health.
|
||||
|
||||
See [ADR-062](docs/adr/ADR-062-qemu-swarm-configurator.md) and the [User Guide](docs/user-guide.md#testing-firmware-without-hardware-qemu) for step-by-step instructions.
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>Python Legacy CLI</strong> — v1 API server commands</summary>
|
||||
|
||||
|
|
@ -1715,7 +1791,9 @@ wifi-densepose tasks list # List background tasks
|
|||
<details>
|
||||
<summary><strong>Documentation Links</strong></summary>
|
||||
|
||||
- [User Guide](docs/user-guide.md) — installation, first run, API, hardware setup, QEMU testing
|
||||
- [WiFi-Mat User Guide](docs/wifi-mat-user-guide.md) | [Domain Model](docs/ddd/wifi-mat-domain-model.md)
|
||||
- [ADR-061](docs/adr/ADR-061-qemu-esp32s3-firmware-testing.md) QEMU platform | [ADR-062](docs/adr/ADR-062-qemu-swarm-configurator.md) Swarm configurator
|
||||
- [ADR-021](docs/adr/ADR-021-vital-sign-detection-rvdna-pipeline.md) | [ADR-022](docs/adr/ADR-022-windows-wifi-enhanced-fidelity-ruvector.md) | [ADR-023](docs/adr/ADR-023-trained-densepose-model-ruvector-pipeline.md)
|
||||
|
||||
</details>
|
||||
|
|
|
|||
|
|
@ -2,8 +2,8 @@
|
|||
|
||||
| Field | Value |
|
||||
|-------------|------------------------------------------------|
|
||||
| **Status** | Proposed |
|
||||
| **Date** | 2026-03-13 |
|
||||
| **Status** | Accepted |
|
||||
| **Date** | 2026-03-13 (updated 2026-03-14) |
|
||||
| **Authors** | RuView Team |
|
||||
| **Relates** | ADR-018 (binary frame), ADR-039 (edge intel), ADR-040 (WASM), ADR-057 (build guard), ADR-060 (channel/MAC filter) |
|
||||
|
||||
|
|
@ -32,6 +32,98 @@ Currently, **every code change requires flashing to physical hardware** on COM7.
|
|||
|
||||
Espressif maintains an official QEMU fork (`github.com/espressif/qemu`) with ESP32-S3 machine support, including dual-core Xtensa LX7, flash mapping, UART, GPIO, timers, and FreeRTOS.
|
||||
|
||||
## Glossary
|
||||
|
||||
| Term | Definition |
|
||||
|------|-----------|
|
||||
| CSI | Channel State Information — per-subcarrier amplitude/phase from WiFi |
|
||||
| NVS | Non-Volatile Storage — ESP-IDF key-value flash partition |
|
||||
| TDM | Time-Division Multiplexing — nodes transmit in assigned time slots |
|
||||
| UART | Universal Asynchronous Receiver-Transmitter — serial console output |
|
||||
| SLIRP | User-mode TCP/IP stack — enables networking without root/TAP |
|
||||
| QEMU | Quick Emulator — runs ESP32-S3 firmware without physical hardware |
|
||||
| QMP | QEMU Machine Protocol — JSON-based control interface |
|
||||
| LFSR | Linear Feedback Shift Register — deterministic pseudo-random generator |
|
||||
| SPSC | Single Producer Single Consumer — lock-free ring buffer pattern |
|
||||
| FreeRTOS | Real-time OS used by ESP-IDF for task scheduling |
|
||||
| gcov/lcov | GCC code coverage tools for line/branch analysis |
|
||||
| libFuzzer | LLVM coverage-guided fuzzer for finding crashes |
|
||||
| ASAN | AddressSanitizer — detects buffer overflows and use-after-free |
|
||||
| UBSAN | UndefinedBehaviorSanitizer — detects undefined C behavior |
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Prerequisites
|
||||
|
||||
Install required tools:
|
||||
|
||||
```bash
|
||||
# QEMU (Espressif fork with ESP32-S3 support)
|
||||
git clone https://github.com/espressif/qemu.git
|
||||
cd qemu && ./configure --target-list=xtensa-softmmu && make -j$(nproc)
|
||||
export QEMU_PATH=/path/to/qemu/build/qemu-system-xtensa
|
||||
|
||||
# ESP-IDF (for building firmware)
|
||||
# See https://docs.espressif.com/projects/esp-idf/en/latest/esp32s3/get-started/
|
||||
|
||||
# Python tools
|
||||
pip install esptool esp-idf-nvs-partition-gen
|
||||
|
||||
# Coverage tools (optional, Layer 5)
|
||||
sudo apt install lcov # Debian/Ubuntu
|
||||
brew install lcov # macOS
|
||||
|
||||
# Fuzz testing (optional, Layer 6)
|
||||
sudo apt install clang # Debian/Ubuntu
|
||||
|
||||
# Mesh testing (optional, Layer 3 — requires root)
|
||||
sudo apt install socat bridge-utils iproute2
|
||||
```
|
||||
|
||||
### Run the Full Test Suite
|
||||
|
||||
```bash
|
||||
# Layer 2: Single-node test (build + run + validate)
|
||||
bash scripts/qemu-esp32s3-test.sh
|
||||
|
||||
# Layer 3: Multi-node mesh (3 nodes, requires root)
|
||||
sudo bash scripts/qemu-mesh-test.sh 3
|
||||
|
||||
# Layer 6: Fuzz testing (60 seconds per target)
|
||||
cd firmware/esp32-csi-node/test && make all CC=clang
|
||||
make run_serialize FUZZ_DURATION=60
|
||||
|
||||
# Layer 7: Generate NVS test matrix
|
||||
python3 scripts/generate_nvs_matrix.py --output-dir build/nvs_matrix
|
||||
|
||||
# Layer 8: Snapshot regression tests
|
||||
bash scripts/qemu-snapshot-test.sh --create
|
||||
bash scripts/qemu-snapshot-test.sh --restore csi-streaming
|
||||
|
||||
# Layer 9: Chaos/fault injection
|
||||
bash scripts/qemu-chaos-test.sh --faults all --duration 120
|
||||
```
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `QEMU_PATH` | `qemu-system-xtensa` | Path to Espressif QEMU binary |
|
||||
| `QEMU_TIMEOUT` | `60` (single) / `45` (mesh) / `120` (chaos) | Test timeout in seconds |
|
||||
| `SKIP_BUILD` | unset | Set to `1` to skip firmware build step |
|
||||
| `NVS_BIN` | unset | Path to pre-built NVS partition binary |
|
||||
| `QEMU_NET` | `1` | Set to `0` to disable SLIRP networking |
|
||||
| `CHAOS_SEED` | current time | Seed for reproducible chaos testing |
|
||||
|
||||
### Exit Codes (all scripts)
|
||||
|
||||
| Code | Meaning | Action |
|
||||
|------|---------|--------|
|
||||
| 0 | PASS | All checks passed |
|
||||
| 1 | WARN | Non-critical issues; review output |
|
||||
| 2 | FAIL | Critical checks failed; fix and re-run |
|
||||
| 3 | FATAL | Build error, crash, or missing tool; check prerequisites |
|
||||
|
||||
## Decision
|
||||
|
||||
Introduce a **comprehensive QEMU testing platform** for the ESP32-S3 CSI node firmware with nine capability layers:
|
||||
|
|
@ -145,7 +237,7 @@ This model exercises:
|
|||
| 5 | Channel sweep | 5s | Frames on channels 1, 6, 11 in sequence |
|
||||
| 6 | MAC filter test | 5s | Frames with wrong MAC are dropped (counter check) |
|
||||
| 7 | Ring buffer overflow | 3s | 1000 frames in 100ms burst, graceful drop |
|
||||
| 8 | Boundary RSSI | 5s | RSSI sweeps -127 to 0, no crash |
|
||||
| 8 | Boundary RSSI | 5s | RSSI sweeps -90 to -10 dBm, no crash |
|
||||
| 9 | Zero-length frame | 2s | `iq_len=0` frames, serialize returns 0 |
|
||||
|
||||
---
|
||||
|
|
@ -456,6 +548,53 @@ xtensa-esp-elf-gdb build/esp32-csi-node.elf \
|
|||
-ex "continue"
|
||||
```
|
||||
|
||||
### Debugging Walkthrough
|
||||
|
||||
**1. Start QEMU with GDB stub (paused at reset vector):**
|
||||
|
||||
```bash
|
||||
qemu-system-xtensa \
|
||||
-machine esp32s3 \
|
||||
-nographic \
|
||||
-drive file=build/qemu_flash.bin,if=mtd,format=raw \
|
||||
-serial mon:stdio \
|
||||
-s -S
|
||||
# -s opens GDB server on localhost:1234
|
||||
# -S pauses CPU until GDB sends "continue"
|
||||
```
|
||||
|
||||
**2. Connect from a second terminal:**
|
||||
|
||||
```bash
|
||||
xtensa-esp-elf-gdb build/esp32-csi-node.elf \
|
||||
-ex "target remote :1234" \
|
||||
-ex "b app_main" \
|
||||
-ex "continue"
|
||||
```
|
||||
|
||||
**3. Set a breakpoint on DSP processing and inspect state:**
|
||||
|
||||
```
|
||||
(gdb) b edge_processing.c:dsp_task
|
||||
(gdb) continue
|
||||
# ...breakpoint hit...
|
||||
(gdb) print g_nvs_config
|
||||
(gdb) print ring->head - ring->tail
|
||||
(gdb) continue
|
||||
```
|
||||
|
||||
**4. Connect from VS Code** using the `launch.json` config below (set breakpoints in the editor gutter, then press F5).
|
||||
|
||||
**5. Dump gcov coverage data (requires `sdkconfig.coverage` overlay):**
|
||||
|
||||
```
|
||||
(gdb) monitor gcov dump
|
||||
# Writes .gcda files to the build directory.
|
||||
# Then generate the HTML report on the host:
|
||||
# lcov --capture --directory build --output-file coverage.info
|
||||
# genhtml coverage.info --output-directory build/coverage_report
|
||||
```
|
||||
|
||||
### Key Breakpoint Locations
|
||||
|
||||
| Breakpoint | Purpose |
|
||||
|
|
@ -862,3 +1001,32 @@ Alternative to QEMU with better peripheral modeling for some platforms.
|
|||
- ADR-040: WASM programmable sensing runtime
|
||||
- ADR-057: Build-time CSI guard (`CONFIG_ESP_WIFI_CSI_ENABLED`)
|
||||
- ADR-060: Channel override and MAC address filter
|
||||
|
||||
---
|
||||
|
||||
## Optimization Log (2026-03-14)
|
||||
|
||||
### Bugs Fixed
|
||||
|
||||
1. **LFSR float bias** — `lfsr_float()` used divisor 32767.5 producing range [-1.0, 1.00002]; fixed to 32768.0 for exact [-1.0, +1.0)
|
||||
2. **MAC filter initialization** — `gen_mac_filter()` compared `frame_count == scenario_start_ms` (count vs timestamp); replaced with boolean flag
|
||||
3. **Scenario infinite loop** — `advance_scenario()` looped to scenario 0 when all completed; now sets `s_all_done=true` and timer callback exits early
|
||||
4. **Boot check severity** — `validate_qemu_output.py` reported no-boot as ERROR; upgraded to FATAL (nothing works without boot)
|
||||
5. **NVS boundary configs** — `boundary-max` used `vital_win=65535` which firmware silently rejects (valid: 32-256); fixed to 256
|
||||
6. **NVS boundary-min** — `vital_win=1` also invalid; fixed to 32 (firmware min)
|
||||
7. **edge-tier2-custom** — `vital_win=512` exceeded firmware max of 256; fixed to 256
|
||||
8. **power-save config** — Described as "10% duty cycle" but didn't set `power_duty=10`; fixed
|
||||
9. **wasm-signed/unsigned** — Both configs were identical; signed now includes pubkey blob, unsigned sets `wasm_verify=0`
|
||||
|
||||
### Optimizations Applied
|
||||
|
||||
1. **SLIRP networking** — QEMU runner now passes `-nic user,model=open_eth` for UDP testing
|
||||
2. **Scenario completion tracking** — Validator now checks `All N scenarios complete` log marker (check 15)
|
||||
3. **Frame rate monitoring** — Validator extracts `scenario=N frames=M` counters for rate analysis (check 16)
|
||||
4. **Watchdog tuning** — `sdkconfig.qemu` relaxes WDT to 30s / INT_WDT to 800ms for QEMU timing variance
|
||||
5. **Timer stack depth** — Increased `FREERTOS_TIMER_TASK_STACK_DEPTH=4096` to prevent overflow from math-heavy mock callback
|
||||
6. **Display disabled** — `CONFIG_DISPLAY_ENABLE=n` in QEMU overlay (no I2C hardware)
|
||||
7. **CI fuzz job** — Added `fuzz-test` job running all 3 fuzz targets for 60s each with crash artifact upload
|
||||
8. **CI NVS validation** — Added `nvs-matrix-validate` job that generates all 14 binaries and verifies sizes
|
||||
9. **CI matrix expanded** — Added `edge-tier1`, `boundary-max`, `boundary-min` to QEMU test matrix (4 → 7 configs)
|
||||
10. **QEMU cache key** — Uses `github.run_id` with restore-keys fallback to prevent stale QEMU builds
|
||||
|
|
|
|||
199
docs/adr/ADR-062-qemu-swarm-configurator.md
Normal file
199
docs/adr/ADR-062-qemu-swarm-configurator.md
Normal file
|
|
@ -0,0 +1,199 @@
|
|||
# ADR-062: QEMU ESP32-S3 Swarm Configurator
|
||||
|
||||
| Field | Value |
|
||||
|-------------|------------------------------------------------|
|
||||
| **Status** | Accepted |
|
||||
| **Date** | 2026-03-14 |
|
||||
| **Authors** | RuView Team |
|
||||
| **Relates** | ADR-061 (QEMU testing platform), ADR-060 (channel/MAC filter), ADR-018 (binary frame), ADR-039 (edge intel) |
|
||||
|
||||
## Glossary
|
||||
|
||||
| Term | Definition |
|
||||
|------|-----------|
|
||||
| Swarm | A group of N QEMU ESP32-S3 instances running simultaneously |
|
||||
| Topology | How nodes are connected: star, mesh, line, ring |
|
||||
| Role | Node function: `sensor` (collects CSI), `coordinator` (aggregates + forwards), `gateway` (bridges to host) |
|
||||
| Scenario matrix | Cross-product of topology × node count × NVS config × mock scenario |
|
||||
| Health oracle | Python process that monitors all node UART logs and declares swarm health |
|
||||
|
||||
## Context
|
||||
|
||||
ADR-061 Layer 3 provides a basic multi-node mesh test: N identical nodes with sequential TDM slots connected via a Linux bridge. This is useful but limited:
|
||||
|
||||
1. **All nodes are identical** — real deployments have heterogeneous roles (sensor, coordinator, gateway)
|
||||
2. **Single topology** — only fully-connected bridge; no star, line, or ring topologies
|
||||
3. **No scenario variation per node** — all nodes run the same mock CSI scenario
|
||||
4. **Manual configuration** — each test requires hand-editing env vars and arguments
|
||||
5. **No swarm-level health monitoring** — validation checks individual nodes, not collective behavior
|
||||
6. **No cross-node timing validation** — TDM slot ordering and inter-frame gaps aren't verified
|
||||
|
||||
Real WiFi-DensePose deployments use 3-8 ESP32-S3 nodes in various topologies. A single coordinator aggregates CSI from multiple sensors. The firmware must handle TDM conflicts, missing nodes, role-based behavior differences, and network partitions — none of which ADR-061 Layer 3 tests.
|
||||
|
||||
## Decision
|
||||
|
||||
Build a **QEMU Swarm Configurator** — a YAML-driven tool that defines multi-node test scenarios declaratively and orchestrates them under QEMU with swarm-level validation.
|
||||
|
||||
### Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────┐
|
||||
│ swarm_config.yaml │
|
||||
│ nodes: [{role: sensor, scenario: 2, channel: 6}] │
|
||||
│ topology: star │
|
||||
│ duration: 60s │
|
||||
│ assertions: [all_nodes_boot, tdm_no_collision, ...] │
|
||||
└──────────────────────┬──────────────────────────────┘
|
||||
│
|
||||
┌────────────▼────────────┐
|
||||
│ qemu_swarm.py │
|
||||
│ (orchestrator) │
|
||||
└───┬────┬────┬───┬──────┘
|
||||
│ │ │ │
|
||||
┌────▼┐ ┌▼──┐ ▼ ┌▼────┐
|
||||
│Node0│ │N1 │... │N(n-1)│ QEMU instances
|
||||
│sens │ │sen│ │coord │
|
||||
└──┬──┘ └─┬─┘ └──┬───┘
|
||||
│ │ │
|
||||
┌──▼──────▼─────────▼──┐
|
||||
│ Virtual Network │ TAP bridge / SLIRP
|
||||
│ (topology-shaped) │
|
||||
└──────────┬───────────┘
|
||||
│
|
||||
┌──────────▼───────────┐
|
||||
│ Aggregator (Rust) │ Collects frames
|
||||
└──────────┬───────────┘
|
||||
│
|
||||
┌──────────▼───────────┐
|
||||
│ Health Oracle │ Swarm-level assertions
|
||||
│ (swarm_health.py) │
|
||||
└──────────────────────┘
|
||||
```
|
||||
|
||||
### YAML Configuration Schema
|
||||
|
||||
```yaml
|
||||
# swarm_config.yaml
|
||||
swarm:
|
||||
name: "3-sensor-star"
|
||||
duration_s: 60
|
||||
topology: star # star | mesh | line | ring
|
||||
aggregator_port: 5005
|
||||
|
||||
nodes:
|
||||
- role: coordinator
|
||||
node_id: 0
|
||||
scenario: 0 # empty room (baseline)
|
||||
channel: 6
|
||||
edge_tier: 2
|
||||
is_gateway: true # receives aggregated frames
|
||||
|
||||
- role: sensor
|
||||
node_id: 1
|
||||
scenario: 2 # walking person
|
||||
channel: 6
|
||||
tdm_slot: 1 # TDM slot index (auto-assigned from node position if omitted)
|
||||
|
||||
- role: sensor
|
||||
node_id: 2
|
||||
scenario: 3 # fall event
|
||||
channel: 6
|
||||
tdm_slot: 2
|
||||
|
||||
assertions:
|
||||
- all_nodes_boot
|
||||
- no_crashes
|
||||
- tdm_no_collision
|
||||
- all_nodes_produce_frames
|
||||
- coordinator_receives_from_all
|
||||
- fall_detected_by_node_2
|
||||
- frame_rate_above: 15 # Hz minimum per node
|
||||
- max_boot_time_s: 10
|
||||
```
|
||||
|
||||
### Topologies
|
||||
|
||||
| Topology | Network | Description |
|
||||
|----------|---------|-------------|
|
||||
| `star` | All sensors connect to coordinator; coordinator has TAP to each sensor | Hub-and-spoke, most common |
|
||||
| `mesh` | All nodes on same bridge (existing Layer 3 behavior) | Every node sees every other |
|
||||
| `line` | Node 0 ↔ Node 1 ↔ Node 2 ↔ ... | Linear chain, tests multi-hop |
|
||||
| `ring` | Like line but last connects to first | Circular, tests routing |
|
||||
|
||||
### Node Roles
|
||||
|
||||
| Role | Behavior | NVS Keys |
|
||||
|------|----------|----------|
|
||||
| `sensor` | Runs mock CSI, sends frames to coordinator | `node_id`, `tdm_slot`, `target_ip` |
|
||||
| `coordinator` | Receives frames from sensors, runs edge aggregation | `node_id`, `tdm_slot=0`, `edge_tier=2` |
|
||||
| `gateway` | Like coordinator but also bridges to host UDP | `node_id`, `target_ip=host`, `is_gateway=1` |
|
||||
|
||||
### Assertions (Swarm-Level)
|
||||
|
||||
| Assertion | What It Checks |
|
||||
|-----------|---------------|
|
||||
| `all_nodes_boot` | Every node's UART log shows boot indicators within timeout |
|
||||
| `no_crashes` | No Guru Meditation, assert, panic in any log |
|
||||
| `tdm_no_collision` | No two nodes transmit in the same TDM slot |
|
||||
| `all_nodes_produce_frames` | Every sensor node's log contains CSI frame output |
|
||||
| `coordinator_receives_from_all` | Coordinator log shows frames from each sensor's node_id |
|
||||
| `fall_detected_by_node_N` | Node N's log reports a fall detection event |
|
||||
| `frame_rate_above` | Each node produces at least N frames/second |
|
||||
| `max_boot_time_s` | All nodes boot within N seconds |
|
||||
| `no_heap_errors` | No OOM or heap corruption in any log |
|
||||
| `network_partitioned_recovery` | After deliberate partition, nodes resume communication (future) |
|
||||
|
||||
### Preset Configurations
|
||||
|
||||
| Preset | Nodes | Topology | Purpose |
|
||||
|--------|-------|----------|---------|
|
||||
| `smoke` | 2 | star | Quick CI smoke test (15s) |
|
||||
| `standard` | 3 | star | Default 3-node (sensor + sensor + coordinator) |
|
||||
| `large-mesh` | 6 | mesh | Scale test with 6 fully-connected nodes |
|
||||
| `line-relay` | 4 | line | Multi-hop relay chain |
|
||||
| `ring-fault` | 4 | ring | Ring with fault injection mid-test |
|
||||
| `heterogeneous` | 5 | star | Mixed scenarios: walk, fall, static, channel-sweep, empty |
|
||||
| `ci-matrix` | 3 | star | CI-optimized preset (30s, minimal assertions) |
|
||||
|
||||
## File Layout
|
||||
|
||||
```
|
||||
scripts/
|
||||
├── qemu_swarm.py # Main orchestrator (CLI entry point)
|
||||
├── swarm_health.py # Swarm-level health oracle
|
||||
└── swarm_presets/
|
||||
├── smoke.yaml
|
||||
├── standard.yaml
|
||||
├── large_mesh.yaml
|
||||
├── line_relay.yaml
|
||||
├── ring_fault.yaml
|
||||
├── heterogeneous.yaml
|
||||
└── ci_matrix.yaml
|
||||
|
||||
.github/workflows/
|
||||
└── firmware-qemu.yml # MODIFIED: add swarm test job
|
||||
```
|
||||
|
||||
## Consequences
|
||||
|
||||
### Benefits
|
||||
|
||||
1. **Declarative testing** — define swarm topology in YAML, not shell scripts
|
||||
2. **Role-based nodes** — test coordinator/sensor/gateway interactions
|
||||
3. **Topology variety** — star/mesh/line/ring match real deployment patterns
|
||||
4. **Swarm-level assertions** — validate collective behavior, not just individual nodes
|
||||
5. **Preset library** — quick CI smoke tests and thorough manual validation
|
||||
6. **Reproducible** — YAML configs are version-controlled and shareable
|
||||
|
||||
### Limitations
|
||||
|
||||
1. **Still requires root** for TAP bridge topologies (star, line, ring); mesh can use SLIRP
|
||||
2. **QEMU resource usage** — 6+ QEMU instances use ~2GB RAM, may slow CI runners
|
||||
3. **No real RF** — inter-node communication is IP-based, not WiFi CSI multipath
|
||||
|
||||
## References
|
||||
|
||||
- ADR-061: QEMU ESP32-S3 firmware testing platform (Layers 1-9)
|
||||
- ADR-060: Channel override and MAC address filter provisioning
|
||||
- ADR-018: Binary CSI frame format (magic `0xC5110001`)
|
||||
- ADR-039: Edge intelligence pipeline (biquad, vitals, fall detection)
|
||||
|
|
@ -38,8 +38,17 @@ WiFi DensePose turns commodity WiFi signals into real-time human pose estimation
|
|||
- [ESP32-S3 Mesh](#esp32-s3-mesh)
|
||||
- [Intel 5300 / Atheros NIC](#intel-5300--atheros-nic)
|
||||
15. [Docker Compose (Multi-Service)](#docker-compose-multi-service)
|
||||
16. [Troubleshooting](#troubleshooting)
|
||||
17. [FAQ](#faq)
|
||||
16. [Testing Firmware Without Hardware (QEMU)](#testing-firmware-without-hardware-qemu)
|
||||
- [What You Need](#what-you-need)
|
||||
- [Your First Test Run](#your-first-test-run)
|
||||
- [Understanding the Test Output](#understanding-the-test-output)
|
||||
- [Testing Multiple Nodes at Once (Swarm)](#testing-multiple-nodes-at-once-swarm)
|
||||
- [Swarm Presets](#swarm-presets)
|
||||
- [Writing Your Own Swarm Config](#writing-your-own-swarm-config)
|
||||
- [Debugging Firmware in QEMU](#debugging-firmware-in-qemu)
|
||||
- [Running the Full Test Suite](#running-the-full-test-suite)
|
||||
17. [Troubleshooting](#troubleshooting)
|
||||
18. [FAQ](#faq)
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -936,6 +945,288 @@ This starts:
|
|||
|
||||
---
|
||||
|
||||
## Testing Firmware Without Hardware (QEMU)
|
||||
|
||||
You can test the ESP32-S3 firmware on your computer without any physical hardware. The project uses **QEMU** — an emulator that pretends to be an ESP32-S3 chip, running the real firmware code inside a virtual machine on your PC.
|
||||
|
||||
This is useful when:
|
||||
- You don't have an ESP32-S3 board yet
|
||||
- You want to test firmware changes before flashing to real hardware
|
||||
- You're running automated tests in CI/CD
|
||||
- You want to simulate multiple ESP32 nodes talking to each other
|
||||
|
||||
### What You Need
|
||||
|
||||
**Required:**
|
||||
- Python 3.8+ (you probably already have this)
|
||||
- QEMU with ESP32-S3 support (Espressif's fork)
|
||||
|
||||
**Install QEMU (one-time setup):**
|
||||
|
||||
```bash
|
||||
# Easiest: use the automated installer (installs QEMU + Python tools)
|
||||
bash scripts/install-qemu.sh
|
||||
|
||||
# Or check what's already installed:
|
||||
bash scripts/install-qemu.sh --check
|
||||
```
|
||||
|
||||
The installer detects your OS (Ubuntu, Fedora, macOS, etc.), installs build dependencies, clones Espressif's QEMU fork, builds it, and adds it to your PATH. It also installs the Python tools (`esptool`, `pyyaml`, `esp-idf-nvs-partition-gen`).
|
||||
|
||||
<details>
|
||||
<summary>Manual installation (if you prefer)</summary>
|
||||
|
||||
```bash
|
||||
# Build from source
|
||||
git clone https://github.com/espressif/qemu.git
|
||||
cd qemu
|
||||
./configure --target-list=xtensa-softmmu --enable-slirp
|
||||
make -j$(nproc)
|
||||
export QEMU_PATH=$(pwd)/build/qemu-system-xtensa
|
||||
|
||||
# Install Python tools
|
||||
pip install esptool pyyaml esp-idf-nvs-partition-gen
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
**For multi-node testing (optional):**
|
||||
|
||||
```bash
|
||||
# Linux only — needed for virtual network bridges
|
||||
sudo apt install socat bridge-utils iproute2
|
||||
```
|
||||
|
||||
### The `qemu-cli.sh` Command
|
||||
|
||||
All QEMU testing is available through a single command:
|
||||
|
||||
```bash
|
||||
bash scripts/qemu-cli.sh <command>
|
||||
```
|
||||
|
||||
| Command | What it does |
|
||||
|---------|-------------|
|
||||
| `install` | Install QEMU (runs the installer above) |
|
||||
| `test` | Run single-node firmware test |
|
||||
| `swarm --preset smoke` | Quick 2-node swarm test |
|
||||
| `swarm --preset standard` | Standard 3-node test |
|
||||
| `mesh 3` | Multi-node mesh test |
|
||||
| `chaos` | Fault injection resilience test |
|
||||
| `fuzz --duration 60` | Run fuzz testing |
|
||||
| `status` | Show what's installed and ready |
|
||||
| `help` | Show all commands |
|
||||
|
||||
### Your First Test Run
|
||||
|
||||
The simplest way to test the firmware:
|
||||
|
||||
```bash
|
||||
# Using the CLI:
|
||||
bash scripts/qemu-cli.sh test
|
||||
|
||||
# Or directly:
|
||||
bash scripts/qemu-esp32s3-test.sh
|
||||
```
|
||||
|
||||
**What happens behind the scenes:**
|
||||
1. The firmware is compiled with a "mock CSI" mode — instead of reading real WiFi signals, it generates synthetic test data that mimics real people walking, falling, or breathing
|
||||
2. The compiled firmware is loaded into QEMU, which boots it like a real ESP32-S3
|
||||
3. The emulator's serial output (what you'd see on a USB cable) is captured
|
||||
4. A validation script checks the output for expected behavior and errors
|
||||
|
||||
If you already built the firmware and want to skip rebuilding:
|
||||
|
||||
```bash
|
||||
SKIP_BUILD=1 bash scripts/qemu-esp32s3-test.sh
|
||||
```
|
||||
|
||||
To give it more time (useful on slower machines):
|
||||
|
||||
```bash
|
||||
QEMU_TIMEOUT=120 bash scripts/qemu-esp32s3-test.sh
|
||||
```
|
||||
|
||||
### Understanding the Test Output
|
||||
|
||||
The test runs 16 checks on the firmware's output. Here's what a successful run looks like:
|
||||
|
||||
```
|
||||
=== QEMU ESP32-S3 Firmware Test (ADR-061) ===
|
||||
|
||||
[PASS] Boot: Firmware booted successfully
|
||||
[PASS] NVS config: Configuration loaded from flash
|
||||
[PASS] Mock CSI: Synthetic WiFi data generator started
|
||||
[PASS] Edge processing: Signal analysis pipeline running
|
||||
[PASS] Frame serialization: Data packets formatted correctly
|
||||
[PASS] No crashes: No error conditions detected
|
||||
...
|
||||
|
||||
16/16 checks passed
|
||||
=== Test Complete (exit code: 0) ===
|
||||
```
|
||||
|
||||
**Exit codes explained:**
|
||||
|
||||
| Code | Meaning | What to do |
|
||||
|------|---------|-----------|
|
||||
| 0 | **PASS** — everything works | Nothing, you're good! |
|
||||
| 1 | **WARN** — minor issues | Review the output; usually safe to continue |
|
||||
| 2 | **FAIL** — something broke | Check the `[FAIL]` lines for what went wrong |
|
||||
| 3 | **FATAL** — can't even start | Usually a missing tool or build failure; check error messages |
|
||||
|
||||
### Testing Multiple Nodes at Once (Swarm)
|
||||
|
||||
Real deployments use 3-8 ESP32 nodes. The **swarm configurator** lets you simulate multiple nodes on your computer, each with a different role:
|
||||
|
||||
- **Sensor nodes** — generate WiFi signal data (like ESP32s placed around a room)
|
||||
- **Coordinator node** — collects data from all sensors and runs analysis
|
||||
- **Gateway node** — bridges data to your computer
|
||||
|
||||
```bash
|
||||
# Quick 2-node smoke test (15 seconds)
|
||||
python3 scripts/qemu_swarm.py --preset smoke
|
||||
|
||||
# Standard 3-node test: 2 sensors + 1 coordinator (60 seconds)
|
||||
python3 scripts/qemu_swarm.py --preset standard
|
||||
|
||||
# See what's available
|
||||
python3 scripts/qemu_swarm.py --list-presets
|
||||
|
||||
# Preview what would run (without actually running)
|
||||
python3 scripts/qemu_swarm.py --preset standard --dry-run
|
||||
```
|
||||
|
||||
**Note:** Multi-node testing with virtual bridges requires Linux and `sudo`. On other systems, nodes use a simpler networking mode where each node can reach the coordinator but not each other.
|
||||
|
||||
### Swarm Presets
|
||||
|
||||
| Preset | Nodes | Duration | Best for |
|
||||
|--------|-------|----------|----------|
|
||||
| `smoke` | 2 | 15s | Quick check that things work |
|
||||
| `standard` | 3 | 60s | Normal development testing |
|
||||
| `ci_matrix` | 3 | 30s | CI/CD pipelines |
|
||||
| `large_mesh` | 6 | 90s | Testing at scale |
|
||||
| `line_relay` | 4 | 60s | Multi-hop relay testing |
|
||||
| `ring_fault` | 4 | 75s | Fault tolerance testing |
|
||||
| `heterogeneous` | 5 | 90s | Mixed scenario testing |
|
||||
|
||||
### Writing Your Own Swarm Config
|
||||
|
||||
Create a YAML file describing your test scenario:
|
||||
|
||||
```yaml
|
||||
# my_test.yaml
|
||||
swarm:
|
||||
name: my-custom-test
|
||||
duration_s: 45
|
||||
topology: star # star, mesh, line, or ring
|
||||
aggregator_port: 5005
|
||||
|
||||
nodes:
|
||||
- role: coordinator
|
||||
node_id: 0
|
||||
scenario: 0 # 0=empty room (baseline)
|
||||
channel: 6
|
||||
edge_tier: 2
|
||||
|
||||
- role: sensor
|
||||
node_id: 1
|
||||
scenario: 2 # 2=walking person
|
||||
channel: 6
|
||||
tdm_slot: 1
|
||||
|
||||
- role: sensor
|
||||
node_id: 2
|
||||
scenario: 3 # 3=fall event
|
||||
channel: 6
|
||||
tdm_slot: 2
|
||||
|
||||
assertions:
|
||||
- all_nodes_boot # Did every node start up?
|
||||
- no_crashes # Any error/panic?
|
||||
- all_nodes_produce_frames # Is each sensor generating data?
|
||||
- fall_detected_by_node_2 # Did node 2 detect the fall?
|
||||
```
|
||||
|
||||
**Available scenarios** (what kind of fake WiFi data to generate):
|
||||
|
||||
| # | Scenario | Description |
|
||||
|---|----------|-------------|
|
||||
| 0 | Empty room | Baseline with just noise |
|
||||
| 1 | Static person | Someone standing still |
|
||||
| 2 | Walking | Someone walking across the room |
|
||||
| 3 | Fall | Someone falling down |
|
||||
| 4 | Multiple people | Two people in the room |
|
||||
| 5 | Channel sweep | Cycling through WiFi channels |
|
||||
| 6 | MAC filter | Testing device filtering |
|
||||
| 7 | Ring overflow | Stress test with burst of data |
|
||||
| 8 | RSSI sweep | Signal strength from weak to strong |
|
||||
| 9 | Zero-length | Edge case: empty data packet |
|
||||
|
||||
**Topology options:**
|
||||
|
||||
| Topology | Shape | When to use |
|
||||
|----------|-------|-------------|
|
||||
| `star` | All sensors connect to one coordinator | Most common setup |
|
||||
| `mesh` | Every node can talk to every other | Testing fully connected networks |
|
||||
| `line` | Nodes in a chain (A → B → C → D) | Testing relay/forwarding |
|
||||
| `ring` | Chain with ends connected | Testing circular routing |
|
||||
|
||||
Run your custom config:
|
||||
|
||||
```bash
|
||||
python3 scripts/qemu_swarm.py --config my_test.yaml
|
||||
```
|
||||
|
||||
### Debugging Firmware in QEMU
|
||||
|
||||
If something goes wrong, you can attach a debugger to the emulated ESP32:
|
||||
|
||||
```bash
|
||||
# Terminal 1: Start QEMU with debug support (paused at boot)
|
||||
qemu-system-xtensa -machine esp32s3 -nographic \
|
||||
-drive file=firmware/esp32-csi-node/build/qemu_flash.bin,if=mtd,format=raw \
|
||||
-s -S
|
||||
|
||||
# Terminal 2: Connect the debugger
|
||||
xtensa-esp-elf-gdb firmware/esp32-csi-node/build/esp32-csi-node.elf \
|
||||
-ex "target remote :1234" \
|
||||
-ex "break app_main" \
|
||||
-ex "continue"
|
||||
```
|
||||
|
||||
Or use VS Code: open the project, press **F5**, and select **"QEMU ESP32-S3 Debug"**.
|
||||
|
||||
### Running the Full Test Suite
|
||||
|
||||
For thorough validation before submitting a pull request:
|
||||
|
||||
```bash
|
||||
# 1. Single-node test (2 minutes)
|
||||
bash scripts/qemu-esp32s3-test.sh
|
||||
|
||||
# 2. Multi-node swarm test (1 minute)
|
||||
python3 scripts/qemu_swarm.py --preset standard
|
||||
|
||||
# 3. Fuzz testing — finds edge-case crashes (1-5 minutes)
|
||||
cd firmware/esp32-csi-node/test
|
||||
make all CC=clang
|
||||
make run_serialize FUZZ_DURATION=60
|
||||
make run_edge FUZZ_DURATION=60
|
||||
make run_nvs FUZZ_DURATION=60
|
||||
|
||||
# 4. NVS configuration matrix — tests 14 config combinations
|
||||
python3 scripts/generate_nvs_matrix.py --output-dir build/nvs_matrix
|
||||
|
||||
# 5. Chaos testing — injects faults to test resilience (2 minutes)
|
||||
bash scripts/qemu-chaos-test.sh
|
||||
```
|
||||
|
||||
All of these also run automatically in CI when you push changes to `firmware/`.
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Docker: "no matching manifest for linux/arm64" on macOS
|
||||
|
|
@ -1015,6 +1306,47 @@ The server applies a 3-stage smoothing pipeline (ADR-048). If readings are still
|
|||
- Hard refresh with Ctrl+Shift+R to clear cached settings
|
||||
- The auto-detect probes `/health` on the same origin — cross-origin won't work
|
||||
|
||||
### QEMU: "qemu-system-xtensa: command not found"
|
||||
|
||||
QEMU for ESP32-S3 must be built from Espressif's fork — it is not in standard package managers:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/espressif/qemu.git
|
||||
cd qemu && ./configure --target-list=xtensa-softmmu && make -j$(nproc)
|
||||
export QEMU_PATH=$(pwd)/build/qemu-system-xtensa
|
||||
```
|
||||
|
||||
Or point to an existing build: `QEMU_PATH=/path/to/qemu-system-xtensa bash scripts/qemu-esp32s3-test.sh`
|
||||
|
||||
### QEMU: Test times out with no output
|
||||
|
||||
The emulator is slower than real hardware. Increase the timeout:
|
||||
|
||||
```bash
|
||||
QEMU_TIMEOUT=120 bash scripts/qemu-esp32s3-test.sh
|
||||
```
|
||||
|
||||
If there's truly no output at all, the firmware build may have failed. Rebuild without `SKIP_BUILD`:
|
||||
|
||||
```bash
|
||||
bash scripts/qemu-esp32s3-test.sh # without SKIP_BUILD
|
||||
```
|
||||
|
||||
### QEMU: "esptool not found"
|
||||
|
||||
Install it with pip: `pip install esptool`
|
||||
|
||||
### QEMU Swarm: "Must be run as root"
|
||||
|
||||
Multi-node swarm tests with virtual network bridges require root on Linux. Two options:
|
||||
|
||||
1. Run with sudo: `sudo python3 scripts/qemu_swarm.py --preset standard`
|
||||
2. Skip bridges (nodes use simpler networking): the tool automatically falls back on non-root systems, but nodes can't communicate with each other (only with the aggregator)
|
||||
|
||||
### QEMU Swarm: "yaml module not found"
|
||||
|
||||
Install PyYAML: `pip install pyyaml`
|
||||
|
||||
---
|
||||
|
||||
## FAQ
|
||||
|
|
|
|||
|
|
@ -523,6 +523,231 @@ The firmware is continuously verified by [`.github/workflows/firmware-ci.yml`](.
|
|||
|
||||
---
|
||||
|
||||
## QEMU Testing (ADR-061)
|
||||
|
||||
Test the firmware without physical hardware using Espressif's QEMU fork. A compile-time mock CSI generator (`CONFIG_CSI_MOCK_ENABLED=y`) replaces the real WiFi CSI callback with a timer-driven synthetic frame injector that exercises the full edge processing pipeline -- biquad filtering, Welford stats, top-K selection, presence/fall detection, and vitals extraction.
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- **ESP-IDF v5.4** -- [installation guide](https://docs.espressif.com/projects/esp-idf/en/v5.4/esp32s3/get-started/)
|
||||
- **Espressif QEMU fork** -- must be built from source (not in Ubuntu packages):
|
||||
|
||||
```bash
|
||||
git clone --depth 1 https://github.com/espressif/qemu.git /tmp/qemu
|
||||
cd /tmp/qemu
|
||||
./configure --target-list=xtensa-softmmu --enable-slirp
|
||||
make -j$(nproc)
|
||||
sudo cp build/qemu-system-xtensa /usr/local/bin/
|
||||
```
|
||||
|
||||
### Quick Start
|
||||
|
||||
Three commands to go from source to running firmware in QEMU:
|
||||
|
||||
```bash
|
||||
cd firmware/esp32-csi-node
|
||||
|
||||
# 1. Build with mock CSI enabled (replaces real WiFi CSI with synthetic frames)
|
||||
idf.py -D SDKCONFIG_DEFAULTS="sdkconfig.defaults;sdkconfig.qemu" build
|
||||
|
||||
# 2. Create merged flash image
|
||||
esptool.py --chip esp32s3 merge_bin -o build/qemu_flash.bin \
|
||||
--flash_mode dio --flash_freq 80m --flash_size 8MB \
|
||||
0x0 build/bootloader/bootloader.bin \
|
||||
0x8000 build/partition_table/partition-table.bin \
|
||||
0x20000 build/esp32-csi-node.bin
|
||||
|
||||
# 3. Run in QEMU
|
||||
qemu-system-xtensa -machine esp32s3 -nographic \
|
||||
-drive file=build/qemu_flash.bin,if=mtd,format=raw \
|
||||
-serial mon:stdio -no-reboot
|
||||
```
|
||||
|
||||
The firmware boots FreeRTOS, loads NVS config, starts the mock CSI generator at 20 Hz, and runs all edge processing. UART output shows log lines that can be validated automatically.
|
||||
|
||||
### Mock CSI Scenarios
|
||||
|
||||
The mock generator cycles through 10 scenarios that exercise every edge processing path:
|
||||
|
||||
| ID | Scenario | Duration | Expected Output |
|
||||
|----|----------|----------|-----------------|
|
||||
| 0 | Empty room | 10 s | `presence=0`, `motion_energy < thresh` |
|
||||
| 1 | Static person | 10 s | `presence=1`, `breathing_rate` in [10, 25], `fall=0` |
|
||||
| 2 | Walking person | 10 s | `presence=1`, `motion_energy > 0.5`, `fall=0` |
|
||||
| 3 | Fall event | 5 s | `fall=1` flag set, `motion_energy` spike |
|
||||
| 4 | Multi-person | 15 s | `n_persons=2`, independent breathing rates |
|
||||
| 5 | Channel sweep | 5 s | Frames on channels 1, 6, 11 in sequence |
|
||||
| 6 | MAC filter test | 5 s | Frames with wrong MAC dropped (counter check) |
|
||||
| 7 | Ring buffer overflow | 3 s | 1000 frames in 100 ms burst, graceful drop |
|
||||
| 8 | Boundary RSSI | 5 s | RSSI sweeps -127 to 0, no crash |
|
||||
| 9 | Zero-length frame | 2 s | `iq_len=0` frames, serialize returns 0 |
|
||||
|
||||
### NVS Provisioning Matrix
|
||||
|
||||
14 NVS configurations are tested in CI to ensure all config paths work correctly:
|
||||
|
||||
| Config | NVS Values | Validates |
|
||||
|--------|-----------|-----------|
|
||||
| `default` | (empty NVS) | Kconfig fallback paths |
|
||||
| `wifi-only` | ssid, password | Basic provisioning |
|
||||
| `full-adr060` | channel=6, filter_mac=AA:BB:CC:DD:EE:FF | Channel override + MAC filter |
|
||||
| `edge-tier0` | edge_tier=0 | Raw CSI passthrough (no DSP) |
|
||||
| `edge-tier1` | edge_tier=1, pres_thresh=100, fall_thresh=2000 | Stats-only mode |
|
||||
| `edge-tier2-custom` | edge_tier=2, vital_win=128, vital_int=500, subk_count=16 | Full vitals with custom params |
|
||||
| `tdm-3node` | tdm_slot=1, tdm_nodes=3, node_id=1 | TDM mesh timing |
|
||||
| `wasm-signed` | wasm_max=4, wasm_verify=1, wasm_pubkey=<32B> | WASM with Ed25519 verification |
|
||||
| `wasm-unsigned` | wasm_max=2, wasm_verify=0 | WASM without signature check |
|
||||
| `5ghz-channel` | channel=36, filter_mac=... | 5 GHz CSI collection |
|
||||
| `boundary-max` | target_port=65535, node_id=255, top_k=32, vital_win=256 | Max-range values |
|
||||
| `boundary-min` | target_port=1, node_id=0, top_k=1, vital_win=32 | Min-range values |
|
||||
| `power-save` | power_duty=10, edge_tier=0 | Low-power mode |
|
||||
| `corrupt-nvs` | (partial/corrupt partition) | Graceful fallback to defaults |
|
||||
|
||||
Generate all configs for CI testing:
|
||||
|
||||
```bash
|
||||
python scripts/generate_nvs_matrix.py
|
||||
```
|
||||
|
||||
### Validation Checks
|
||||
|
||||
The output validation script (`scripts/validate_qemu_output.py`) parses UART logs and checks:
|
||||
|
||||
| Check | Pass Criteria | Severity |
|
||||
|-------|---------------|----------|
|
||||
| Boot | `app_main()` called, no panic/assert | FATAL |
|
||||
| NVS load | `nvs_config:` log line present | FATAL |
|
||||
| Mock CSI init | `mock_csi: Starting mock CSI generator` | FATAL |
|
||||
| Frame generation | `mock_csi: Generated N frames` where N > 0 | ERROR |
|
||||
| Edge pipeline | `edge_processing: DSP task started on Core 1` | ERROR |
|
||||
| Vitals output | At least one `vitals:` log line with valid BPM | ERROR |
|
||||
| Presence detection | `presence=1` during person scenarios | WARN |
|
||||
| Fall detection | `fall=1` during fall scenario | WARN |
|
||||
| MAC filter | `csi_collector: MAC filter dropped N frames` where N > 0 | WARN |
|
||||
| ADR-018 serialize | `csi_collector: Serialized N frames` where N > 0 | ERROR |
|
||||
| No crash | No `Guru Meditation Error`, no `assert failed`, no `abort()` | FATAL |
|
||||
| Clean exit | Firmware reaches end of scenario sequence | ERROR |
|
||||
| Heap OK | No `HEAP_ERROR` or `out of memory` | FATAL |
|
||||
| Stack OK | No `Stack overflow` detected | FATAL |
|
||||
|
||||
Exit codes: `0` = all pass, `1` = WARN only, `2` = ERROR, `3` = FATAL.
|
||||
|
||||
### GDB Debugging
|
||||
|
||||
QEMU provides a built-in GDB stub for zero-cost breakpoint debugging without JTAG hardware:
|
||||
|
||||
```bash
|
||||
# Launch QEMU paused, with GDB stub on port 1234
|
||||
qemu-system-xtensa \
|
||||
-machine esp32s3 -nographic \
|
||||
-drive file=build/qemu_flash.bin,if=mtd,format=raw \
|
||||
-serial mon:stdio \
|
||||
-s -S
|
||||
|
||||
# In another terminal, attach GDB
|
||||
xtensa-esp-elf-gdb build/esp32-csi-node.elf \
|
||||
-ex "target remote :1234" \
|
||||
-ex "b edge_processing.c:dsp_task" \
|
||||
-ex "b csi_collector.c:csi_serialize_frame" \
|
||||
-ex "b mock_csi.c:mock_generate_csi_frame" \
|
||||
-ex "watch g_nvs_config.csi_channel" \
|
||||
-ex "continue"
|
||||
```
|
||||
|
||||
Key breakpoints:
|
||||
|
||||
| Location | Purpose |
|
||||
|----------|---------|
|
||||
| `edge_processing.c:dsp_task` | DSP consumer loop entry |
|
||||
| `edge_processing.c:presence_detect` | Threshold comparison |
|
||||
| `edge_processing.c:fall_detect` | Phase acceleration check |
|
||||
| `csi_collector.c:csi_serialize_frame` | ADR-018 serialization |
|
||||
| `nvs_config.c:nvs_config_load` | NVS parse logic |
|
||||
| `wasm_runtime.c:wasm_on_csi` | WASM module dispatch |
|
||||
| `mock_csi.c:mock_generate_csi_frame` | Synthetic frame generation |
|
||||
|
||||
VS Code integration -- add to `.vscode/launch.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "QEMU ESP32-S3 Debug",
|
||||
"type": "cppdbg",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/firmware/esp32-csi-node/build/esp32-csi-node.elf",
|
||||
"miDebuggerPath": "xtensa-esp-elf-gdb",
|
||||
"miDebuggerServerAddress": "localhost:1234",
|
||||
"setupCommands": [
|
||||
{ "text": "set remote hardware-breakpoint-limit 2" },
|
||||
{ "text": "set remote hardware-watchpoint-limit 2" }
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Code Coverage
|
||||
|
||||
Build with gcov enabled and collect coverage after a QEMU run:
|
||||
|
||||
```bash
|
||||
# Build with coverage overlay
|
||||
idf.py -D SDKCONFIG_DEFAULTS="sdkconfig.defaults;sdkconfig.qemu;sdkconfig.coverage" build
|
||||
|
||||
# After QEMU run, generate HTML report
|
||||
lcov --capture --directory build --output-file coverage.info
|
||||
lcov --remove coverage.info '*/esp-idf/*' '*/test/*' --output-file coverage_filtered.info
|
||||
genhtml coverage_filtered.info --output-directory build/coverage_report
|
||||
```
|
||||
|
||||
Coverage targets:
|
||||
|
||||
| Module | Target |
|
||||
|--------|--------|
|
||||
| `edge_processing.c` | >= 80% |
|
||||
| `csi_collector.c` | >= 90% |
|
||||
| `nvs_config.c` | >= 95% |
|
||||
| `mock_csi.c` | >= 95% |
|
||||
| `stream_sender.c` | >= 80% |
|
||||
| `wasm_runtime.c` | >= 70% |
|
||||
|
||||
### Fuzz Testing
|
||||
|
||||
Host-native fuzz targets compiled with libFuzzer + AddressSanitizer (no QEMU needed):
|
||||
|
||||
```bash
|
||||
cd firmware/esp32-csi-node/test
|
||||
|
||||
# Build fuzz target
|
||||
clang -fsanitize=fuzzer,address -I../main \
|
||||
fuzz_csi_serialize.c ../main/csi_collector.c \
|
||||
-o fuzz_serialize
|
||||
|
||||
# Run for 5 minutes
|
||||
timeout 300 ./fuzz_serialize corpus/ || true
|
||||
```
|
||||
|
||||
Fuzz targets:
|
||||
|
||||
| Target | Input | Looking For |
|
||||
|--------|-------|-------------|
|
||||
| `csi_serialize_frame()` | Random `wifi_csi_info_t` | Buffer overflow, NULL deref |
|
||||
| `nvs_config_load()` | Crafted NVS partition binary | No crash, fallback to defaults |
|
||||
| `edge_enqueue_csi()` | Rapid-fire 10,000 frames | Ring overflow, no data corruption |
|
||||
| `rvf_parser.c` | Malformed RVF packets | Parse rejection, no crash |
|
||||
| `wasm_upload.c` | Corrupt WASM blobs | Rejection without crash |
|
||||
|
||||
### QEMU CI Workflow
|
||||
|
||||
The GitHub Actions workflow (`.github/workflows/firmware-qemu.yml`) runs on every push or PR touching `firmware/**`:
|
||||
|
||||
1. Uses the `espressif/idf:v5.4` container image
|
||||
2. Builds Espressif's QEMU fork from source
|
||||
3. Runs a CI matrix across NVS configurations: `default`, `nvs-full`, `nvs-edge-tier0`, `nvs-tdm-3node`
|
||||
4. For each config: provisions NVS, builds with mock CSI, runs in QEMU with timeout, validates UART output
|
||||
5. Uploads QEMU logs as build artifacts for debugging failures
|
||||
|
||||
No physical ESP32 hardware is needed in CI.
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
| Symptom | Cause | Fix |
|
||||
|
|
@ -556,6 +781,9 @@ This firmware implements or references the following ADRs:
|
|||
| [ADR-029](../../docs/adr/ADR-029-ruvsense-multistatic-sensing-mode.md) | Channel hopping and TDM protocol | Accepted |
|
||||
| [ADR-039](../../docs/adr/ADR-039-esp32-edge-intelligence.md) | Edge intelligence tiers 0-2 | Accepted |
|
||||
| [ADR-040](../../docs/adr/) | WASM programmable sensing (Tier 3) with RVF container format | Alpha |
|
||||
| [ADR-057](../../docs/adr/ADR-057-build-time-csi-guard.md) | Build-time CSI guard (`CONFIG_ESP_WIFI_CSI_ENABLED`) | Accepted |
|
||||
| [ADR-060](../../docs/adr/ADR-060-channel-mac-filter.md) | Channel override and MAC address filter | Accepted |
|
||||
| [ADR-061](../../docs/adr/ADR-061-qemu-esp32s3-firmware-testing.md) | QEMU ESP32-S3 emulation for firmware testing | Proposed |
|
||||
|
||||
---
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,11 @@ set(SRCS
|
|||
|
||||
set(REQUIRES "")
|
||||
|
||||
# ADR-061: Mock CSI generator for QEMU testing
|
||||
if(CONFIG_CSI_MOCK_ENABLED)
|
||||
list(APPEND SRCS "mock_csi.c")
|
||||
endif()
|
||||
|
||||
# ADR-045: AMOLED display support (compile-time optional)
|
||||
if(CONFIG_DISPLAY_ENABLE)
|
||||
list(APPEND SRCS "display_hal.c" "display_ui.c" "display_task.c")
|
||||
|
|
|
|||
|
|
@ -201,3 +201,40 @@ menu "WASM Programmable Sensing (ADR-040)"
|
|||
Default 1000 ms = 1 Hz.
|
||||
|
||||
endmenu
|
||||
|
||||
menu "Mock CSI (QEMU Testing)"
|
||||
config CSI_MOCK_ENABLED
|
||||
bool "Enable mock CSI generator (for QEMU testing)"
|
||||
default n
|
||||
help
|
||||
Replace real WiFi CSI with synthetic frame generator.
|
||||
Use with QEMU emulation for automated testing.
|
||||
|
||||
config CSI_MOCK_SKIP_WIFI_CONNECT
|
||||
bool "Skip WiFi STA connection"
|
||||
depends on CSI_MOCK_ENABLED
|
||||
default y
|
||||
help
|
||||
Skip WiFi initialization when using mock CSI.
|
||||
|
||||
config CSI_MOCK_SCENARIO
|
||||
int "Mock scenario (0-9, 255=all)"
|
||||
depends on CSI_MOCK_ENABLED
|
||||
default 255
|
||||
range 0 255
|
||||
help
|
||||
0=empty, 1=static, 2=walking, 3=fall, 4=multi-person,
|
||||
5=channel-sweep, 6=mac-filter, 7=ring-overflow,
|
||||
8=boundary-rssi, 9=zero-length, 255=run all.
|
||||
|
||||
config CSI_MOCK_SCENARIO_DURATION_MS
|
||||
int "Scenario duration (ms)"
|
||||
depends on CSI_MOCK_ENABLED
|
||||
default 5000
|
||||
range 1000 60000
|
||||
|
||||
config CSI_MOCK_LOG_FRAMES
|
||||
bool "Log every mock frame (verbose)"
|
||||
depends on CSI_MOCK_ENABLED
|
||||
default n
|
||||
endmenu
|
||||
|
|
|
|||
|
|
@ -27,6 +27,9 @@
|
|||
#include "wasm_runtime.h"
|
||||
#include "wasm_upload.h"
|
||||
#include "display_task.h"
|
||||
#ifdef CONFIG_CSI_MOCK_ENABLED
|
||||
#include "mock_csi.h"
|
||||
#endif
|
||||
|
||||
#include "esp_timer.h"
|
||||
|
||||
|
|
@ -134,17 +137,35 @@ void app_main(void)
|
|||
|
||||
ESP_LOGI(TAG, "ESP32-S3 CSI Node (ADR-018) — Node ID: %d", g_nvs_config.node_id);
|
||||
|
||||
/* Initialize WiFi STA */
|
||||
/* Initialize WiFi STA (skip entirely under QEMU mock — no RF hardware) */
|
||||
#ifndef CONFIG_CSI_MOCK_SKIP_WIFI_CONNECT
|
||||
wifi_init_sta();
|
||||
#else
|
||||
ESP_LOGI(TAG, "Mock CSI mode: skipping WiFi init (CONFIG_CSI_MOCK_SKIP_WIFI_CONNECT)");
|
||||
#endif
|
||||
|
||||
/* Initialize UDP sender with runtime target */
|
||||
#ifdef CONFIG_CSI_MOCK_SKIP_WIFI_CONNECT
|
||||
ESP_LOGI(TAG, "Mock CSI mode: skipping UDP sender init (no network)");
|
||||
#else
|
||||
if (stream_sender_init_with(g_nvs_config.target_ip, g_nvs_config.target_port) != 0) {
|
||||
ESP_LOGE(TAG, "Failed to initialize UDP sender");
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Initialize CSI collection */
|
||||
#ifdef CONFIG_CSI_MOCK_ENABLED
|
||||
/* ADR-061: Start mock CSI generator (replaces real WiFi CSI in QEMU) */
|
||||
esp_err_t mock_ret = mock_csi_init(CONFIG_CSI_MOCK_SCENARIO);
|
||||
if (mock_ret != ESP_OK) {
|
||||
ESP_LOGE(TAG, "Mock CSI init failed: %s", esp_err_to_name(mock_ret));
|
||||
} else {
|
||||
ESP_LOGI(TAG, "Mock CSI active (scenario=%d)", CONFIG_CSI_MOCK_SCENARIO);
|
||||
}
|
||||
#else
|
||||
csi_collector_init();
|
||||
#endif
|
||||
|
||||
/* ADR-039: Initialize edge processing pipeline. */
|
||||
edge_config_t edge_cfg = {
|
||||
|
|
@ -162,12 +183,17 @@ void app_main(void)
|
|||
esp_err_to_name(edge_ret));
|
||||
}
|
||||
|
||||
/* Initialize OTA update HTTP server. */
|
||||
/* Initialize OTA update HTTP server (requires network). */
|
||||
httpd_handle_t ota_server = NULL;
|
||||
#ifndef CONFIG_CSI_MOCK_SKIP_WIFI_CONNECT
|
||||
esp_err_t ota_ret = ota_update_init_ex(&ota_server);
|
||||
if (ota_ret != ESP_OK) {
|
||||
ESP_LOGW(TAG, "OTA server init failed: %s", esp_err_to_name(ota_ret));
|
||||
}
|
||||
#else
|
||||
esp_err_t ota_ret = ESP_ERR_NOT_SUPPORTED;
|
||||
ESP_LOGI(TAG, "Mock CSI mode: skipping OTA server (no network)");
|
||||
#endif
|
||||
|
||||
/* ADR-040: Initialize WASM programmable sensing runtime. */
|
||||
esp_err_t wasm_ret = wasm_runtime_init();
|
||||
|
|
@ -205,10 +231,12 @@ void app_main(void)
|
|||
power_mgmt_init(g_nvs_config.power_duty);
|
||||
|
||||
/* ADR-045: Start AMOLED display task (gracefully skips if no display). */
|
||||
#ifdef CONFIG_DISPLAY_ENABLE
|
||||
esp_err_t disp_ret = display_task_start();
|
||||
if (disp_ret != ESP_OK) {
|
||||
ESP_LOGW(TAG, "Display init returned: %s", esp_err_to_name(disp_ret));
|
||||
}
|
||||
#endif
|
||||
|
||||
ESP_LOGI(TAG, "CSI streaming active → %s:%d (edge_tier=%u, OTA=%s, WASM=%s)",
|
||||
g_nvs_config.target_ip, g_nvs_config.target_port,
|
||||
|
|
|
|||
696
firmware/esp32-csi-node/main/mock_csi.c
Normal file
696
firmware/esp32-csi-node/main/mock_csi.c
Normal file
|
|
@ -0,0 +1,696 @@
|
|||
/**
|
||||
* @file mock_csi.c
|
||||
* @brief ADR-061 Mock CSI generator for ESP32-S3 QEMU testing.
|
||||
*
|
||||
* Generates synthetic CSI frames at 20 Hz using an esp_timer callback,
|
||||
* injecting them directly into the edge processing pipeline. This allows
|
||||
* full-stack testing of the CSI signal processing, vitals extraction,
|
||||
* and presence detection pipeline under QEMU without WiFi hardware.
|
||||
*
|
||||
* Signal model per subcarrier k at time t:
|
||||
* A_k(t) = A_base + A_person * exp(-d_k^2 / sigma^2) + noise
|
||||
* phi_k(t) = phi_base + (2*pi*d / lambda) + breathing_mod(t) + noise
|
||||
*
|
||||
* The entire file is guarded by CONFIG_CSI_MOCK_ENABLED so it compiles
|
||||
* to nothing on production builds.
|
||||
*/
|
||||
|
||||
#include "sdkconfig.h"
|
||||
|
||||
#ifdef CONFIG_CSI_MOCK_ENABLED
|
||||
|
||||
#include "mock_csi.h"
|
||||
#include "edge_processing.h"
|
||||
#include "nvs_config.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include "esp_log.h"
|
||||
#include "esp_timer.h"
|
||||
#include "sdkconfig.h"
|
||||
|
||||
static const char *TAG = "mock_csi";
|
||||
|
||||
/* ---- Configuration defaults ---- */
|
||||
|
||||
/** Scenario duration in ms. Kconfig-overridable. */
|
||||
#ifndef CONFIG_CSI_MOCK_SCENARIO_DURATION_MS
|
||||
#define CONFIG_CSI_MOCK_SCENARIO_DURATION_MS 5000
|
||||
#endif
|
||||
|
||||
/* ---- Physical constants ---- */
|
||||
|
||||
#define SPEED_OF_LIGHT_MHZ 300.0f /**< c in m * MHz (simplified). */
|
||||
#define FREQ_CH6_MHZ 2437.0f /**< Center frequency of WiFi channel 6. */
|
||||
#define LAMBDA_CH6 (SPEED_OF_LIGHT_MHZ / FREQ_CH6_MHZ) /**< ~0.123 m */
|
||||
|
||||
/** Breathing rate: ~15 breaths/min = 0.25 Hz. */
|
||||
#define BREATHING_FREQ_HZ 0.25f
|
||||
|
||||
/** Breathing modulation amplitude in radians. */
|
||||
#define BREATHING_AMP_RAD 0.3f
|
||||
|
||||
/** Walking speed in m/s. */
|
||||
#define WALK_SPEED_MS 1.0f
|
||||
|
||||
/** Room width for position wrapping (meters). */
|
||||
#define ROOM_WIDTH_M 6.0f
|
||||
|
||||
/** Gaussian sigma for person influence on subcarriers. */
|
||||
#define PERSON_SIGMA 8.0f
|
||||
|
||||
/** Base amplitude for all subcarriers. */
|
||||
#define A_BASE 80.0f
|
||||
|
||||
/** Person-induced amplitude perturbation. */
|
||||
#define A_PERSON 40.0f
|
||||
|
||||
/** Noise amplitude (peak). */
|
||||
#define NOISE_AMP 3.0f
|
||||
|
||||
/** Phase noise amplitude (radians). */
|
||||
#define PHASE_NOISE_AMP 0.05f
|
||||
|
||||
/** Number of frames in the ring overflow burst (scenario 7). */
|
||||
#define OVERFLOW_BURST_COUNT 1000
|
||||
|
||||
/** Fall detection: number of frames with abrupt phase jump. */
|
||||
#define FALL_FRAME_COUNT 5
|
||||
|
||||
/** Fall phase acceleration magnitude (radians). */
|
||||
#define FALL_PHASE_JUMP 3.14f
|
||||
|
||||
/** Pi constant. */
|
||||
#ifndef M_PI
|
||||
#define M_PI 3.14159265358979323846
|
||||
#endif
|
||||
|
||||
/* ---- Channel sweep table ---- */
|
||||
|
||||
static const uint8_t s_sweep_channels[] = {1, 6, 11, 36};
|
||||
#define SWEEP_CHANNEL_COUNT (sizeof(s_sweep_channels) / sizeof(s_sweep_channels[0]))
|
||||
|
||||
/* ---- MAC addresses for filter test ---- */
|
||||
|
||||
/** "Correct" MAC that matches a typical filter_mac. */
|
||||
static const uint8_t s_good_mac[6] = {0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF};
|
||||
|
||||
/** "Wrong" MAC that should be rejected by the filter. */
|
||||
static const uint8_t s_bad_mac[6] __attribute__((unused)) = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
|
||||
|
||||
/* ---- LFSR pseudo-random number generator ---- */
|
||||
|
||||
/**
|
||||
* 32-bit Galois LFSR for deterministic pseudo-random noise.
|
||||
* Avoids stdlib rand() which may not be available on ESP32 bare-metal.
|
||||
* Taps: bits 32, 31, 29, 1 (Galois LFSR polynomial 0xD0000001).
|
||||
*/
|
||||
static uint32_t s_lfsr = 0xDEADBEEF;
|
||||
|
||||
static uint32_t lfsr_next(void)
|
||||
{
|
||||
uint32_t lsb = s_lfsr & 1u;
|
||||
s_lfsr >>= 1;
|
||||
if (lsb) {
|
||||
s_lfsr ^= 0xD0000001u; /* x^32 + x^31 + x^29 + x^1 */
|
||||
}
|
||||
return s_lfsr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a pseudo-random float in [-1.0, +1.0].
|
||||
*/
|
||||
static float lfsr_float(void)
|
||||
{
|
||||
uint32_t r = lfsr_next();
|
||||
/* Map [0, 65535] to [-1.0, +1.0] using 65535/2 = 32767.5 */
|
||||
return ((float)(r & 0xFFFF) / 32768.0f) - 1.0f;
|
||||
}
|
||||
|
||||
/* ---- Module state ---- */
|
||||
|
||||
static mock_state_t s_state;
|
||||
static esp_timer_handle_t s_timer = NULL;
|
||||
|
||||
/** Tracks whether the MAC filter has been set up in gen_mac_filter. */
|
||||
static bool s_mac_filter_initialized = false;
|
||||
|
||||
/** Tracks whether the overflow burst has fired in gen_ring_overflow. */
|
||||
static bool s_overflow_burst_done = false;
|
||||
|
||||
/* External NVS config (for MAC filter scenario). */
|
||||
extern nvs_config_t g_nvs_config;
|
||||
|
||||
/* ---- Helper: compute channel frequency ---- */
|
||||
|
||||
static uint32_t channel_to_freq_mhz(uint8_t channel)
|
||||
{
|
||||
if (channel >= 1 && channel <= 13) {
|
||||
return 2412 + (channel - 1) * 5;
|
||||
} else if (channel == 14) {
|
||||
return 2484;
|
||||
} else if (channel >= 36 && channel <= 177) {
|
||||
return 5000 + channel * 5;
|
||||
}
|
||||
return 2437; /* Default to ch 6. */
|
||||
}
|
||||
|
||||
/* ---- Helper: compute wavelength for a channel ---- */
|
||||
|
||||
static float channel_to_lambda(uint8_t channel)
|
||||
{
|
||||
float freq = (float)channel_to_freq_mhz(channel);
|
||||
return SPEED_OF_LIGHT_MHZ / freq;
|
||||
}
|
||||
|
||||
/* ---- Helper: elapsed ms since scenario start ---- */
|
||||
|
||||
static int64_t scenario_elapsed_ms(void)
|
||||
{
|
||||
int64_t now = esp_timer_get_time() / 1000;
|
||||
return now - s_state.scenario_start_ms;
|
||||
}
|
||||
|
||||
/* ---- Helper: clamp int8 ---- */
|
||||
|
||||
static int8_t clamp_i8(int32_t val)
|
||||
{
|
||||
if (val < -128) return -128;
|
||||
if (val > 127) return 127;
|
||||
return (int8_t)val;
|
||||
}
|
||||
|
||||
/* ---- Core signal generation ---- */
|
||||
|
||||
/**
|
||||
* Generate one I/Q frame for a single person at position person_x.
|
||||
*
|
||||
* @param iq_buf Output buffer (MOCK_IQ_LEN bytes).
|
||||
* @param person_x Person X position in meters.
|
||||
* @param breathing Breathing phase in radians.
|
||||
* @param has_person Whether a person is present.
|
||||
* @param lambda Wavelength in meters.
|
||||
*/
|
||||
static void generate_person_iq(uint8_t *iq_buf, float person_x,
|
||||
float breathing, bool has_person,
|
||||
float lambda)
|
||||
{
|
||||
for (int k = 0; k < MOCK_N_SUBCARRIERS; k++) {
|
||||
/* Distance of subcarrier k's spatial sample from person. */
|
||||
float d_k = (float)k - person_x * (MOCK_N_SUBCARRIERS / ROOM_WIDTH_M);
|
||||
|
||||
/* Amplitude model. */
|
||||
float amp = A_BASE;
|
||||
if (has_person) {
|
||||
float gauss = expf(-(d_k * d_k) / (2.0f * PERSON_SIGMA * PERSON_SIGMA));
|
||||
amp += A_PERSON * gauss;
|
||||
}
|
||||
amp += NOISE_AMP * lfsr_float();
|
||||
|
||||
/* Phase model. */
|
||||
float phase = (float)k * 0.1f; /* Base phase gradient. */
|
||||
if (has_person) {
|
||||
float d_meters = fabsf(d_k) * (ROOM_WIDTH_M / MOCK_N_SUBCARRIERS);
|
||||
phase += (2.0f * M_PI * d_meters) / lambda;
|
||||
phase += BREATHING_AMP_RAD * sinf(breathing);
|
||||
}
|
||||
phase += PHASE_NOISE_AMP * lfsr_float();
|
||||
|
||||
/* Convert to I/Q (int8). */
|
||||
float i_f = amp * cosf(phase);
|
||||
float q_f = amp * sinf(phase);
|
||||
|
||||
iq_buf[k * 2] = (uint8_t)clamp_i8((int32_t)i_f);
|
||||
iq_buf[k * 2 + 1] = (uint8_t)clamp_i8((int32_t)q_f);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---- Scenario generators ---- */
|
||||
|
||||
/**
|
||||
* Scenario 0: Empty room.
|
||||
* Low-amplitude noise on all subcarriers, no person present.
|
||||
*/
|
||||
static void gen_empty(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi)
|
||||
{
|
||||
generate_person_iq(iq_buf, 0.0f, 0.0f, false, LAMBDA_CH6);
|
||||
*channel = 6;
|
||||
*rssi = -60;
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario 1: Static person.
|
||||
* Person at fixed position with breathing modulation.
|
||||
*/
|
||||
static void gen_static_person(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi)
|
||||
{
|
||||
s_state.breathing_phase += 2.0f * M_PI * BREATHING_FREQ_HZ
|
||||
* (MOCK_CSI_INTERVAL_MS / 1000.0f);
|
||||
if (s_state.breathing_phase > 2.0f * M_PI) {
|
||||
s_state.breathing_phase -= 2.0f * M_PI;
|
||||
}
|
||||
|
||||
generate_person_iq(iq_buf, 3.0f, s_state.breathing_phase, true, LAMBDA_CH6);
|
||||
*channel = 6;
|
||||
*rssi = -45;
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario 2: Walking person.
|
||||
* Person moves across the room and wraps around.
|
||||
*/
|
||||
static void gen_walking(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi)
|
||||
{
|
||||
s_state.breathing_phase += 2.0f * M_PI * BREATHING_FREQ_HZ
|
||||
* (MOCK_CSI_INTERVAL_MS / 1000.0f);
|
||||
if (s_state.breathing_phase > 2.0f * M_PI) {
|
||||
s_state.breathing_phase -= 2.0f * M_PI;
|
||||
}
|
||||
|
||||
s_state.person_x += s_state.person_speed * (MOCK_CSI_INTERVAL_MS / 1000.0f);
|
||||
if (s_state.person_x > ROOM_WIDTH_M) {
|
||||
s_state.person_x -= ROOM_WIDTH_M;
|
||||
}
|
||||
|
||||
generate_person_iq(iq_buf, s_state.person_x, s_state.breathing_phase,
|
||||
true, LAMBDA_CH6);
|
||||
*channel = 6;
|
||||
*rssi = -40;
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario 3: Fall event.
|
||||
* Normal walking for most frames, then an abrupt phase discontinuity
|
||||
* simulating a fall (rapid vertical displacement).
|
||||
*/
|
||||
static void gen_fall(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi)
|
||||
{
|
||||
int64_t elapsed = scenario_elapsed_ms();
|
||||
uint32_t duration = CONFIG_CSI_MOCK_SCENARIO_DURATION_MS;
|
||||
|
||||
/* Fall occurs at 70% of scenario duration. */
|
||||
uint32_t fall_start = (duration * 70) / 100;
|
||||
uint32_t fall_end = fall_start + (FALL_FRAME_COUNT * MOCK_CSI_INTERVAL_MS);
|
||||
|
||||
s_state.breathing_phase += 2.0f * M_PI * BREATHING_FREQ_HZ
|
||||
* (MOCK_CSI_INTERVAL_MS / 1000.0f);
|
||||
|
||||
s_state.person_x += 0.5f * (MOCK_CSI_INTERVAL_MS / 1000.0f);
|
||||
if (s_state.person_x > ROOM_WIDTH_M) {
|
||||
s_state.person_x = ROOM_WIDTH_M;
|
||||
}
|
||||
|
||||
float extra_phase = 0.0f;
|
||||
if (elapsed >= fall_start && elapsed < fall_end) {
|
||||
/* Abrupt phase jump simulating rapid downward motion. */
|
||||
extra_phase = FALL_PHASE_JUMP;
|
||||
}
|
||||
|
||||
/* Build I/Q with fall perturbation. */
|
||||
float lambda = LAMBDA_CH6;
|
||||
for (int k = 0; k < MOCK_N_SUBCARRIERS; k++) {
|
||||
float d_k = (float)k - s_state.person_x * (MOCK_N_SUBCARRIERS / ROOM_WIDTH_M);
|
||||
float gauss = expf(-(d_k * d_k) / (2.0f * PERSON_SIGMA * PERSON_SIGMA));
|
||||
|
||||
float amp = A_BASE + A_PERSON * gauss + NOISE_AMP * lfsr_float();
|
||||
|
||||
float d_meters = fabsf(d_k) * (ROOM_WIDTH_M / MOCK_N_SUBCARRIERS);
|
||||
float phase = (float)k * 0.1f
|
||||
+ (2.0f * M_PI * d_meters) / lambda
|
||||
+ BREATHING_AMP_RAD * sinf(s_state.breathing_phase)
|
||||
+ extra_phase * gauss /* Fall affects nearby subcarriers. */
|
||||
+ PHASE_NOISE_AMP * lfsr_float();
|
||||
|
||||
iq_buf[k * 2] = (uint8_t)clamp_i8((int32_t)(amp * cosf(phase)));
|
||||
iq_buf[k * 2 + 1] = (uint8_t)clamp_i8((int32_t)(amp * sinf(phase)));
|
||||
}
|
||||
|
||||
*channel = 6;
|
||||
*rssi = -42;
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario 4: Multiple people.
|
||||
* Two people at different positions with independent breathing.
|
||||
*/
|
||||
static void gen_multi_person(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi)
|
||||
{
|
||||
float dt = MOCK_CSI_INTERVAL_MS / 1000.0f;
|
||||
|
||||
s_state.breathing_phase += 2.0f * M_PI * BREATHING_FREQ_HZ * dt;
|
||||
float breathing2 = s_state.breathing_phase * 1.3f; /* Slightly different rate. */
|
||||
|
||||
s_state.person_x += s_state.person_speed * dt;
|
||||
s_state.person2_x += s_state.person2_speed * dt;
|
||||
|
||||
/* Wrap positions. */
|
||||
if (s_state.person_x > ROOM_WIDTH_M) s_state.person_x -= ROOM_WIDTH_M;
|
||||
if (s_state.person2_x > ROOM_WIDTH_M) s_state.person2_x -= ROOM_WIDTH_M;
|
||||
|
||||
float lambda = LAMBDA_CH6;
|
||||
|
||||
for (int k = 0; k < MOCK_N_SUBCARRIERS; k++) {
|
||||
/* Superpose contributions from both people. */
|
||||
float d1 = (float)k - s_state.person_x * (MOCK_N_SUBCARRIERS / ROOM_WIDTH_M);
|
||||
float d2 = (float)k - s_state.person2_x * (MOCK_N_SUBCARRIERS / ROOM_WIDTH_M);
|
||||
|
||||
float g1 = expf(-(d1 * d1) / (2.0f * PERSON_SIGMA * PERSON_SIGMA));
|
||||
float g2 = expf(-(d2 * d2) / (2.0f * PERSON_SIGMA * PERSON_SIGMA));
|
||||
|
||||
float amp = A_BASE + A_PERSON * g1 + (A_PERSON * 0.7f) * g2
|
||||
+ NOISE_AMP * lfsr_float();
|
||||
|
||||
float dm1 = fabsf(d1) * (ROOM_WIDTH_M / MOCK_N_SUBCARRIERS);
|
||||
float dm2 = fabsf(d2) * (ROOM_WIDTH_M / MOCK_N_SUBCARRIERS);
|
||||
|
||||
float phase = (float)k * 0.1f
|
||||
+ (2.0f * M_PI * dm1) / lambda * g1
|
||||
+ (2.0f * M_PI * dm2) / lambda * g2
|
||||
+ BREATHING_AMP_RAD * sinf(s_state.breathing_phase) * g1
|
||||
+ BREATHING_AMP_RAD * sinf(breathing2) * g2
|
||||
+ PHASE_NOISE_AMP * lfsr_float();
|
||||
|
||||
iq_buf[k * 2] = (uint8_t)clamp_i8((int32_t)(amp * cosf(phase)));
|
||||
iq_buf[k * 2 + 1] = (uint8_t)clamp_i8((int32_t)(amp * sinf(phase)));
|
||||
}
|
||||
|
||||
*channel = 6;
|
||||
*rssi = -38;
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario 5: Channel sweep.
|
||||
* Cycles through channels 1, 6, 11, 36 every 20 frames.
|
||||
*/
|
||||
static void gen_channel_sweep(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi)
|
||||
{
|
||||
/* Switch channel every 20 frames (1 second at 20 Hz). */
|
||||
if ((s_state.frame_count % 20) == 0 && s_state.frame_count > 0) {
|
||||
s_state.channel_idx = (s_state.channel_idx + 1) % SWEEP_CHANNEL_COUNT;
|
||||
}
|
||||
|
||||
uint8_t ch = s_sweep_channels[s_state.channel_idx];
|
||||
float lambda = channel_to_lambda(ch);
|
||||
|
||||
generate_person_iq(iq_buf, 3.0f, 0.0f, true, lambda);
|
||||
*channel = ch;
|
||||
*rssi = -50;
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario 6: MAC filter test.
|
||||
* Alternates between a "good" MAC (should pass filter) and a "bad" MAC
|
||||
* (should be rejected). Even frames use good MAC, odd frames use bad MAC.
|
||||
*
|
||||
* Note: Since we inject via edge_enqueue_csi() which bypasses the MAC
|
||||
* filter (that happens in wifi_csi_callback), this scenario instead
|
||||
* sets/clears the NVS filter_mac and logs which frames would pass.
|
||||
* The test harness can verify frame_count vs expected.
|
||||
*/
|
||||
static void gen_mac_filter(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi,
|
||||
bool *skip_inject)
|
||||
{
|
||||
/* Set up the filter MAC to match s_good_mac on first frame of this scenario. */
|
||||
if (!s_mac_filter_initialized) {
|
||||
memcpy(g_nvs_config.filter_mac, s_good_mac, 6);
|
||||
g_nvs_config.filter_mac_set = 1;
|
||||
s_mac_filter_initialized = true;
|
||||
ESP_LOGI(TAG, "MAC filter scenario: filter set to %02X:%02X:%02X:%02X:%02X:%02X",
|
||||
s_good_mac[0], s_good_mac[1], s_good_mac[2],
|
||||
s_good_mac[3], s_good_mac[4], s_good_mac[5]);
|
||||
}
|
||||
|
||||
generate_person_iq(iq_buf, 3.0f, 0.0f, true, LAMBDA_CH6);
|
||||
*channel = 6;
|
||||
*rssi = -50;
|
||||
|
||||
/* Odd frames: simulate "wrong" MAC by skipping injection. */
|
||||
if ((s_state.frame_count & 1) != 0) {
|
||||
*skip_inject = true;
|
||||
ESP_LOGD(TAG, "MAC filter: frame %lu skipped (bad MAC)",
|
||||
(unsigned long)s_state.frame_count);
|
||||
} else {
|
||||
*skip_inject = false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario 7: Ring buffer overflow.
|
||||
* Burst OVERFLOW_BURST_COUNT frames as fast as possible to test
|
||||
* the SPSC ring buffer's overflow handling.
|
||||
*/
|
||||
static void gen_ring_overflow(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi,
|
||||
uint16_t *burst_count)
|
||||
{
|
||||
generate_person_iq(iq_buf, 3.0f, 0.0f, true, LAMBDA_CH6);
|
||||
*channel = 6;
|
||||
*rssi = -50;
|
||||
|
||||
/* Burst once on the first timer tick of this scenario. */
|
||||
if (!s_overflow_burst_done) {
|
||||
*burst_count = OVERFLOW_BURST_COUNT;
|
||||
s_overflow_burst_done = true;
|
||||
} else {
|
||||
*burst_count = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario 8: Boundary RSSI sweep.
|
||||
* Sweeps RSSI from -90 dBm to -10 dBm linearly over the scenario duration.
|
||||
*/
|
||||
static void gen_boundary_rssi(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi)
|
||||
{
|
||||
int64_t elapsed = scenario_elapsed_ms();
|
||||
uint32_t duration = CONFIG_CSI_MOCK_SCENARIO_DURATION_MS;
|
||||
|
||||
/* Linear sweep: -90 to -10 dBm. */
|
||||
float frac = (float)elapsed / (float)duration;
|
||||
if (frac > 1.0f) frac = 1.0f;
|
||||
int8_t sweep_rssi = (int8_t)(-90.0f + 80.0f * frac);
|
||||
|
||||
generate_person_iq(iq_buf, 3.0f, 0.0f, true, LAMBDA_CH6);
|
||||
*channel = 6;
|
||||
*rssi = sweep_rssi;
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario 9: Zero-length I/Q.
|
||||
* Injects a frame with iq_len = 0 to test error handling.
|
||||
*/
|
||||
/* Handled inline in the timer callback. */
|
||||
|
||||
/* ---- Scenario transition ---- */
|
||||
|
||||
/**
|
||||
* Advance to the next scenario when running SCENARIO_ALL.
|
||||
*/
|
||||
/** Flag: set when all scenarios are done so timer callback exits early. */
|
||||
static bool s_all_done = false;
|
||||
|
||||
static void advance_scenario(void)
|
||||
{
|
||||
s_state.all_idx++;
|
||||
if (s_state.all_idx >= MOCK_SCENARIO_COUNT) {
|
||||
ESP_LOGI(TAG, "All %d scenarios complete (%lu total frames)",
|
||||
MOCK_SCENARIO_COUNT, (unsigned long)s_state.frame_count);
|
||||
s_all_done = true;
|
||||
return; /* Stop generating — timer callback will check s_all_done. */
|
||||
}
|
||||
|
||||
s_state.scenario = s_state.all_idx;
|
||||
s_state.scenario_start_ms = esp_timer_get_time() / 1000;
|
||||
|
||||
/* Reset per-scenario state. */
|
||||
s_state.person_x = 1.0f;
|
||||
s_state.person_speed = WALK_SPEED_MS;
|
||||
s_state.person2_x = 4.0f;
|
||||
s_state.person2_speed = WALK_SPEED_MS * 0.6f;
|
||||
s_state.breathing_phase = 0.0f;
|
||||
s_state.channel_idx = 0;
|
||||
s_state.rssi_sweep = -90;
|
||||
|
||||
ESP_LOGI(TAG, "=== Scenario %u started ===", (unsigned)s_state.scenario);
|
||||
}
|
||||
|
||||
/* ---- Timer callback ---- */
|
||||
|
||||
static void mock_timer_cb(void *arg)
|
||||
{
|
||||
(void)arg;
|
||||
|
||||
/* All scenarios finished — stop generating. */
|
||||
if (s_all_done) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Check for scenario timeout in SCENARIO_ALL mode. */
|
||||
if (s_state.scenario == MOCK_SCENARIO_ALL ||
|
||||
(s_state.all_idx > 0 && s_state.all_idx < MOCK_SCENARIO_COUNT)) {
|
||||
/* We're running in sequential mode. */
|
||||
int64_t elapsed = scenario_elapsed_ms();
|
||||
if (elapsed >= CONFIG_CSI_MOCK_SCENARIO_DURATION_MS) {
|
||||
advance_scenario();
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t iq_buf[MOCK_IQ_LEN];
|
||||
uint8_t channel = 6;
|
||||
int8_t rssi = -50;
|
||||
uint16_t iq_len = MOCK_IQ_LEN;
|
||||
uint16_t burst = 1;
|
||||
bool skip = false;
|
||||
|
||||
uint8_t active_scenario = s_state.scenario;
|
||||
|
||||
switch (active_scenario) {
|
||||
case MOCK_SCENARIO_EMPTY:
|
||||
gen_empty(iq_buf, &channel, &rssi);
|
||||
break;
|
||||
|
||||
case MOCK_SCENARIO_STATIC_PERSON:
|
||||
gen_static_person(iq_buf, &channel, &rssi);
|
||||
break;
|
||||
|
||||
case MOCK_SCENARIO_WALKING:
|
||||
gen_walking(iq_buf, &channel, &rssi);
|
||||
break;
|
||||
|
||||
case MOCK_SCENARIO_FALL:
|
||||
gen_fall(iq_buf, &channel, &rssi);
|
||||
break;
|
||||
|
||||
case MOCK_SCENARIO_MULTI_PERSON:
|
||||
gen_multi_person(iq_buf, &channel, &rssi);
|
||||
break;
|
||||
|
||||
case MOCK_SCENARIO_CHANNEL_SWEEP:
|
||||
gen_channel_sweep(iq_buf, &channel, &rssi);
|
||||
break;
|
||||
|
||||
case MOCK_SCENARIO_MAC_FILTER:
|
||||
gen_mac_filter(iq_buf, &channel, &rssi, &skip);
|
||||
break;
|
||||
|
||||
case MOCK_SCENARIO_RING_OVERFLOW:
|
||||
gen_ring_overflow(iq_buf, &channel, &rssi, &burst);
|
||||
break;
|
||||
|
||||
case MOCK_SCENARIO_BOUNDARY_RSSI:
|
||||
gen_boundary_rssi(iq_buf, &channel, &rssi);
|
||||
break;
|
||||
|
||||
case MOCK_SCENARIO_ZERO_LENGTH:
|
||||
/* Deliberately inject zero-length data to test error path. */
|
||||
iq_len = 0;
|
||||
memset(iq_buf, 0, sizeof(iq_buf));
|
||||
break;
|
||||
|
||||
default:
|
||||
ESP_LOGW(TAG, "Unknown scenario %u, defaulting to empty", active_scenario);
|
||||
gen_empty(iq_buf, &channel, &rssi);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Inject frame(s) into the edge processing pipeline. */
|
||||
if (!skip) {
|
||||
for (uint16_t i = 0; i < burst; i++) {
|
||||
edge_enqueue_csi(iq_buf, iq_len, rssi, channel);
|
||||
s_state.frame_count++;
|
||||
}
|
||||
} else {
|
||||
/* Count skipped frames for MAC filter validation. */
|
||||
s_state.frame_count++;
|
||||
}
|
||||
|
||||
/* Periodic logging (every 20 frames = 1 second). */
|
||||
if ((s_state.frame_count % 20) == 0) {
|
||||
ESP_LOGI(TAG, "scenario=%u frames=%lu ch=%u rssi=%d",
|
||||
active_scenario, (unsigned long)s_state.frame_count,
|
||||
(unsigned)channel, (int)rssi);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---- Public API ---- */
|
||||
|
||||
esp_err_t mock_csi_init(uint8_t scenario)
|
||||
{
|
||||
if (s_timer != NULL) {
|
||||
ESP_LOGW(TAG, "Mock CSI already running");
|
||||
return ESP_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
/* Initialize state. */
|
||||
memset(&s_state, 0, sizeof(s_state));
|
||||
s_state.person_x = 1.0f;
|
||||
s_state.person_speed = WALK_SPEED_MS;
|
||||
s_state.person2_x = 4.0f;
|
||||
s_state.person2_speed = WALK_SPEED_MS * 0.6f;
|
||||
s_state.scenario_start_ms = esp_timer_get_time() / 1000;
|
||||
s_all_done = false;
|
||||
s_mac_filter_initialized = false;
|
||||
s_overflow_burst_done = false;
|
||||
|
||||
/* Reset LFSR to deterministic seed. */
|
||||
s_lfsr = 0xDEADBEEF;
|
||||
|
||||
if (scenario == MOCK_SCENARIO_ALL) {
|
||||
s_state.scenario = 0;
|
||||
s_state.all_idx = 0;
|
||||
ESP_LOGI(TAG, "Mock CSI: running ALL %d scenarios sequentially (%u ms each)",
|
||||
MOCK_SCENARIO_COUNT, CONFIG_CSI_MOCK_SCENARIO_DURATION_MS);
|
||||
} else {
|
||||
s_state.scenario = scenario;
|
||||
s_state.all_idx = 0;
|
||||
ESP_LOGI(TAG, "Mock CSI: scenario=%u, interval=%u ms, duration=%u ms",
|
||||
(unsigned)scenario, MOCK_CSI_INTERVAL_MS,
|
||||
CONFIG_CSI_MOCK_SCENARIO_DURATION_MS);
|
||||
}
|
||||
|
||||
/* Create periodic timer. */
|
||||
esp_timer_create_args_t timer_args = {
|
||||
.callback = mock_timer_cb,
|
||||
.arg = NULL,
|
||||
.name = "mock_csi",
|
||||
};
|
||||
|
||||
esp_err_t err = esp_timer_create(&timer_args, &s_timer);
|
||||
if (err != ESP_OK) {
|
||||
ESP_LOGE(TAG, "Failed to create mock CSI timer: %s", esp_err_to_name(err));
|
||||
return err;
|
||||
}
|
||||
|
||||
uint64_t period_us = (uint64_t)MOCK_CSI_INTERVAL_MS * 1000;
|
||||
err = esp_timer_start_periodic(s_timer, period_us);
|
||||
if (err != ESP_OK) {
|
||||
ESP_LOGE(TAG, "Failed to start mock CSI timer: %s", esp_err_to_name(err));
|
||||
esp_timer_delete(s_timer);
|
||||
s_timer = NULL;
|
||||
return err;
|
||||
}
|
||||
|
||||
ESP_LOGI(TAG, "Mock CSI generator started (20 Hz, %u subcarriers, %u bytes/frame)",
|
||||
MOCK_N_SUBCARRIERS, MOCK_IQ_LEN);
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
void mock_csi_stop(void)
|
||||
{
|
||||
if (s_timer == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
esp_timer_stop(s_timer);
|
||||
esp_timer_delete(s_timer);
|
||||
s_timer = NULL;
|
||||
|
||||
ESP_LOGI(TAG, "Mock CSI stopped after %lu frames",
|
||||
(unsigned long)s_state.frame_count);
|
||||
}
|
||||
|
||||
uint32_t mock_csi_get_frame_count(void)
|
||||
{
|
||||
return s_state.frame_count;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_CSI_MOCK_ENABLED */
|
||||
107
firmware/esp32-csi-node/main/mock_csi.h
Normal file
107
firmware/esp32-csi-node/main/mock_csi.h
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
/**
|
||||
* @file mock_csi.h
|
||||
* @brief ADR-061 Mock CSI generator for ESP32-S3 QEMU testing.
|
||||
*
|
||||
* Generates synthetic CSI frames at 20 Hz using an esp_timer, injecting
|
||||
* them directly into the edge processing pipeline via edge_enqueue_csi().
|
||||
* Ten scenarios exercise the full signal processing and edge intelligence
|
||||
* pipeline without requiring real WiFi hardware.
|
||||
*
|
||||
* Signal model per subcarrier k at time t:
|
||||
* A_k(t) = A_base + A_person * exp(-d_k^2 / sigma^2) + noise
|
||||
* phi_k(t) = phi_base + (2*pi*d / lambda) + breathing_mod(t) + noise
|
||||
*
|
||||
* Enable via: idf.py menuconfig -> CSI Mock Generator -> Enable
|
||||
* Or add CONFIG_CSI_MOCK_ENABLED=y to sdkconfig.defaults.
|
||||
*/
|
||||
|
||||
#ifndef MOCK_CSI_H
|
||||
#define MOCK_CSI_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "esp_err.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* ---- Timing ---- */
|
||||
|
||||
/** Mock CSI frame interval in milliseconds (20 Hz). */
|
||||
#define MOCK_CSI_INTERVAL_MS 50
|
||||
|
||||
/* ---- HT20 subcarrier geometry ---- */
|
||||
|
||||
/** Number of OFDM subcarriers for HT20 (802.11n). */
|
||||
#define MOCK_N_SUBCARRIERS 52
|
||||
|
||||
/** I/Q data length in bytes: 52 subcarriers * 2 bytes (I + Q). */
|
||||
#define MOCK_IQ_LEN (MOCK_N_SUBCARRIERS * 2)
|
||||
|
||||
/* ---- Scenarios ---- */
|
||||
|
||||
/** Scenario identifiers for mock CSI generation. */
|
||||
typedef enum {
|
||||
MOCK_SCENARIO_EMPTY = 0, /**< Empty room: low-noise baseline. */
|
||||
MOCK_SCENARIO_STATIC_PERSON = 1, /**< Static person: amplitude dip, no motion. */
|
||||
MOCK_SCENARIO_WALKING = 2, /**< Walking person: moving reflector. */
|
||||
MOCK_SCENARIO_FALL = 3, /**< Fall event: abrupt phase acceleration. */
|
||||
MOCK_SCENARIO_MULTI_PERSON = 4, /**< Multiple people at different positions. */
|
||||
MOCK_SCENARIO_CHANNEL_SWEEP = 5, /**< Sweep through channels 1, 6, 11, 36. */
|
||||
MOCK_SCENARIO_MAC_FILTER = 6, /**< Alternate correct/wrong MAC for filter test. */
|
||||
MOCK_SCENARIO_RING_OVERFLOW = 7, /**< Burst 1000 frames rapidly to overflow ring. */
|
||||
MOCK_SCENARIO_BOUNDARY_RSSI = 8, /**< Sweep RSSI from -90 to -10 dBm. */
|
||||
MOCK_SCENARIO_ZERO_LENGTH = 9, /**< Zero-length I/Q payload (error case). */
|
||||
|
||||
MOCK_SCENARIO_COUNT = 10, /**< Total number of individual scenarios. */
|
||||
MOCK_SCENARIO_ALL = 255 /**< Meta: run all scenarios sequentially. */
|
||||
} mock_scenario_t;
|
||||
|
||||
/* ---- State ---- */
|
||||
|
||||
/** Internal state for the mock CSI generator. */
|
||||
typedef struct {
|
||||
uint8_t scenario; /**< Current active scenario. */
|
||||
uint32_t frame_count; /**< Total frames emitted since init. */
|
||||
float person_x; /**< Person X position in meters (walking). */
|
||||
float person_speed; /**< Person movement speed in m/s. */
|
||||
float breathing_phase; /**< Breathing oscillator phase in radians. */
|
||||
float person2_x; /**< Second person X position (multi-person). */
|
||||
float person2_speed; /**< Second person movement speed. */
|
||||
uint8_t channel_idx; /**< Index into channel sweep table. */
|
||||
int8_t rssi_sweep; /**< Current RSSI for boundary sweep. */
|
||||
int64_t scenario_start_ms; /**< Timestamp when current scenario started. */
|
||||
uint8_t all_idx; /**< Current scenario index in SCENARIO_ALL mode. */
|
||||
} mock_state_t;
|
||||
|
||||
/**
|
||||
* Initialize and start the mock CSI generator.
|
||||
*
|
||||
* Creates a periodic esp_timer that fires every MOCK_CSI_INTERVAL_MS
|
||||
* and injects synthetic CSI frames into edge_enqueue_csi().
|
||||
*
|
||||
* @param scenario Scenario to run (0-9), or MOCK_SCENARIO_ALL (255)
|
||||
* to run all scenarios sequentially.
|
||||
* @return ESP_OK on success, ESP_ERR_INVALID_STATE if already running.
|
||||
*/
|
||||
esp_err_t mock_csi_init(uint8_t scenario);
|
||||
|
||||
/**
|
||||
* Stop and destroy the mock CSI timer.
|
||||
*
|
||||
* Safe to call even if the timer is not running.
|
||||
*/
|
||||
void mock_csi_stop(void);
|
||||
|
||||
/**
|
||||
* Get the total number of mock frames emitted since init.
|
||||
*
|
||||
* @return Frame count (useful for test validation).
|
||||
*/
|
||||
uint32_t mock_csi_get_frame_count(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* MOCK_CSI_H */
|
||||
54
firmware/esp32-csi-node/sdkconfig.coverage
Normal file
54
firmware/esp32-csi-node/sdkconfig.coverage
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
# sdkconfig.coverage -- ESP-IDF sdkconfig overlay for gcov/lcov code coverage
|
||||
#
|
||||
# This overlay enables GCC code coverage instrumentation (gcov) and the
|
||||
# application-level trace (apptrace) channel required to extract .gcda
|
||||
# files from the target via JTAG/QEMU GDB.
|
||||
#
|
||||
# Usage (combine with sdkconfig.defaults as the base):
|
||||
#
|
||||
# idf.py -D SDKCONFIG_DEFAULTS="sdkconfig.defaults;sdkconfig.coverage" build
|
||||
#
|
||||
# After running the firmware under QEMU, dump coverage data through GDB:
|
||||
#
|
||||
# (gdb) mon gcov dump
|
||||
#
|
||||
# Then process the .gcda files on the host with lcov/genhtml:
|
||||
#
|
||||
# lcov --capture --directory build --output-file coverage.info \
|
||||
# --gcov-tool xtensa-esp-elf-gcov
|
||||
# genhtml coverage.info --output-directory coverage_html
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Compiler: disable optimizations so every source line maps 1:1 to object code
|
||||
# ---------------------------------------------------------------------------
|
||||
CONFIG_COMPILER_OPTIMIZATION_NONE=y
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Application-level trace: enables the gcov data channel over JTAG
|
||||
# ---------------------------------------------------------------------------
|
||||
CONFIG_APPTRACE_ENABLE=y
|
||||
CONFIG_APPTRACE_DEST_JTAG=y
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CSI mock mode: identical to sdkconfig.qemu so coverage runs use the same
|
||||
# deterministic mock data path (no real WiFi hardware needed)
|
||||
# ---------------------------------------------------------------------------
|
||||
CONFIG_CSI_MOCK_ENABLED=y
|
||||
CONFIG_CSI_MOCK_SKIP_WIFI_CONNECT=y
|
||||
CONFIG_CSI_MOCK_SCENARIO=255
|
||||
CONFIG_CSI_TARGET_IP="10.0.2.2"
|
||||
CONFIG_CSI_MOCK_SCENARIO_DURATION_MS=5000
|
||||
CONFIG_CSI_MOCK_LOG_FRAMES=y
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# FreeRTOS and watchdog: match sdkconfig.qemu for QEMU timing tolerance
|
||||
# ---------------------------------------------------------------------------
|
||||
CONFIG_FREERTOS_TIMER_TASK_STACK_DEPTH=4096
|
||||
CONFIG_ESP_TASK_WDT_TIMEOUT_S=30
|
||||
CONFIG_ESP_INT_WDT_TIMEOUT_MS=800
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Logging and display
|
||||
# ---------------------------------------------------------------------------
|
||||
CONFIG_LOG_DEFAULT_LEVEL_INFO=y
|
||||
CONFIG_DISPLAY_ENABLE=n
|
||||
27
firmware/esp32-csi-node/sdkconfig.qemu
Normal file
27
firmware/esp32-csi-node/sdkconfig.qemu
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
# QEMU ESP32-S3 sdkconfig overlay (ADR-061)
|
||||
#
|
||||
# Merge with: idf.py -D SDKCONFIG_DEFAULTS="sdkconfig.defaults;sdkconfig.qemu" build
|
||||
|
||||
# ---- Mock CSI generator (replaces real WiFi CSI) ----
|
||||
CONFIG_CSI_MOCK_ENABLED=y
|
||||
CONFIG_CSI_MOCK_SKIP_WIFI_CONNECT=y
|
||||
CONFIG_CSI_MOCK_SCENARIO=255
|
||||
CONFIG_CSI_MOCK_SCENARIO_DURATION_MS=5000
|
||||
CONFIG_CSI_MOCK_LOG_FRAMES=y
|
||||
|
||||
# ---- Network (QEMU SLIRP provides 10.0.2.x) ----
|
||||
CONFIG_CSI_TARGET_IP="10.0.2.2"
|
||||
|
||||
# ---- Logging (verbose for validation) ----
|
||||
CONFIG_LOG_DEFAULT_LEVEL_INFO=y
|
||||
|
||||
# ---- FreeRTOS tuning for QEMU ----
|
||||
# Increase timer task stack to prevent overflow from mock_csi timer callback
|
||||
CONFIG_FREERTOS_TIMER_TASK_STACK_DEPTH=4096
|
||||
|
||||
# ---- Watchdog (relaxed for emulation — QEMU timing is not cycle-accurate) ----
|
||||
CONFIG_ESP_TASK_WDT_TIMEOUT_S=30
|
||||
CONFIG_ESP_INT_WDT_TIMEOUT_MS=800
|
||||
|
||||
# ---- Disable hardware-dependent features ----
|
||||
CONFIG_DISPLAY_ENABLE=n
|
||||
79
firmware/esp32-csi-node/test/Makefile
Normal file
79
firmware/esp32-csi-node/test/Makefile
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
# Makefile for ESP32 CSI firmware fuzz testing targets (ADR-061 Layer 6).
|
||||
#
|
||||
# Requirements:
|
||||
# - clang with libFuzzer support (clang 6.0+)
|
||||
# - Linux or macOS (host-based fuzzing, no ESP-IDF needed)
|
||||
#
|
||||
# Usage:
|
||||
# make all # Build all fuzz targets
|
||||
# make fuzz_serialize # Build serialize target only
|
||||
# make fuzz_edge # Build edge enqueue target only
|
||||
# make fuzz_nvs # Build NVS config target only
|
||||
# make run_serialize # Build and run serialize fuzzer (30s)
|
||||
# make run_edge # Build and run edge fuzzer (30s)
|
||||
# make run_nvs # Build and run NVS fuzzer (30s)
|
||||
# make run_all # Run all fuzzers (30s each)
|
||||
# make clean # Remove build artifacts
|
||||
#
|
||||
# Environment variables:
|
||||
# FUZZ_DURATION=60 # Override fuzz duration in seconds
|
||||
# FUZZ_JOBS=4 # Parallel fuzzing jobs
|
||||
|
||||
CC = clang
|
||||
CFLAGS = -fsanitize=fuzzer,address,undefined -g -O1 \
|
||||
-Istubs -I../main \
|
||||
-DCONFIG_CSI_NODE_ID=1 \
|
||||
-DCONFIG_CSI_WIFI_CHANNEL=6 \
|
||||
-DCONFIG_CSI_WIFI_SSID=\"test\" \
|
||||
-DCONFIG_CSI_TARGET_IP=\"192.168.1.1\" \
|
||||
-DCONFIG_CSI_TARGET_PORT=5500 \
|
||||
-DCONFIG_ESP_WIFI_CSI_ENABLED=1 \
|
||||
-Wno-unused-function
|
||||
|
||||
STUBS_SRC = stubs/esp_stubs.c
|
||||
MAIN_DIR = ../main
|
||||
|
||||
# Default fuzz duration (seconds) and jobs
|
||||
FUZZ_DURATION ?= 30
|
||||
FUZZ_JOBS ?= 1
|
||||
|
||||
.PHONY: all clean run_serialize run_edge run_nvs run_all
|
||||
|
||||
all: fuzz_serialize fuzz_edge fuzz_nvs
|
||||
|
||||
# --- Serialize fuzzer ---
|
||||
# Tests csi_serialize_frame() with random wifi_csi_info_t inputs.
|
||||
# Links against the real csi_collector.c (with stubs for ESP-IDF).
|
||||
fuzz_serialize: fuzz_csi_serialize.c $(MAIN_DIR)/csi_collector.c $(STUBS_SRC)
|
||||
$(CC) $(CFLAGS) $^ -o $@ -lm
|
||||
|
||||
# --- Edge enqueue fuzzer ---
|
||||
# Tests the SPSC ring buffer push/pop logic with rapid-fire enqueues.
|
||||
# Self-contained: reproduces ring buffer logic from edge_processing.c.
|
||||
fuzz_edge: fuzz_edge_enqueue.c $(STUBS_SRC)
|
||||
$(CC) $(CFLAGS) $^ -o $@ -lm
|
||||
|
||||
# --- NVS config validation fuzzer ---
|
||||
# Tests all NVS config validation ranges with random values.
|
||||
# Self-contained: reproduces validation logic from nvs_config.c.
|
||||
fuzz_nvs: fuzz_nvs_config.c $(STUBS_SRC)
|
||||
$(CC) $(CFLAGS) $^ -o $@ -lm
|
||||
|
||||
# --- Run targets ---
|
||||
run_serialize: fuzz_serialize
|
||||
@mkdir -p corpus_serialize
|
||||
./fuzz_serialize corpus_serialize/ -max_total_time=$(FUZZ_DURATION) -max_len=2048 -jobs=$(FUZZ_JOBS)
|
||||
|
||||
run_edge: fuzz_edge
|
||||
@mkdir -p corpus_edge
|
||||
./fuzz_edge corpus_edge/ -max_total_time=$(FUZZ_DURATION) -max_len=4096 -jobs=$(FUZZ_JOBS)
|
||||
|
||||
run_nvs: fuzz_nvs
|
||||
@mkdir -p corpus_nvs
|
||||
./fuzz_nvs corpus_nvs/ -max_total_time=$(FUZZ_DURATION) -max_len=256 -jobs=$(FUZZ_JOBS)
|
||||
|
||||
run_all: run_serialize run_edge run_nvs
|
||||
|
||||
clean:
|
||||
rm -f fuzz_serialize fuzz_edge fuzz_nvs
|
||||
rm -rf corpus_serialize/ corpus_edge/ corpus_nvs/
|
||||
BIN
firmware/esp32-csi-node/test/corpus/seed_edge_normal.bin
Normal file
BIN
firmware/esp32-csi-node/test/corpus/seed_edge_normal.bin
Normal file
Binary file not shown.
BIN
firmware/esp32-csi-node/test/corpus/seed_edge_overflow.bin
Normal file
BIN
firmware/esp32-csi-node/test/corpus/seed_edge_overflow.bin
Normal file
Binary file not shown.
BIN
firmware/esp32-csi-node/test/corpus/seed_empty.bin
Normal file
BIN
firmware/esp32-csi-node/test/corpus/seed_empty.bin
Normal file
Binary file not shown.
BIN
firmware/esp32-csi-node/test/corpus/seed_large.bin
Normal file
BIN
firmware/esp32-csi-node/test/corpus/seed_large.bin
Normal file
Binary file not shown.
BIN
firmware/esp32-csi-node/test/corpus/seed_normal.bin
Normal file
BIN
firmware/esp32-csi-node/test/corpus/seed_normal.bin
Normal file
Binary file not shown.
BIN
firmware/esp32-csi-node/test/corpus/seed_nvs.bin
Normal file
BIN
firmware/esp32-csi-node/test/corpus/seed_nvs.bin
Normal file
Binary file not shown.
203
firmware/esp32-csi-node/test/fuzz_csi_serialize.c
Normal file
203
firmware/esp32-csi-node/test/fuzz_csi_serialize.c
Normal file
|
|
@ -0,0 +1,203 @@
|
|||
/**
|
||||
* @file fuzz_csi_serialize.c
|
||||
* @brief libFuzzer target for csi_serialize_frame() (ADR-061 Layer 6).
|
||||
*
|
||||
* Takes fuzz input and constructs wifi_csi_info_t structs with random
|
||||
* field values including extreme boundaries. Verifies that
|
||||
* csi_serialize_frame() never crashes, triggers ASAN, or causes UBSAN.
|
||||
*
|
||||
* Build (Linux/macOS with clang):
|
||||
* make fuzz_serialize
|
||||
*
|
||||
* Run:
|
||||
* ./fuzz_serialize corpus/ -max_len=2048
|
||||
*/
|
||||
|
||||
#include "esp_stubs.h"
|
||||
|
||||
/* Provide the globals that csi_collector.c references. */
|
||||
#include "nvs_config.h"
|
||||
nvs_config_t g_nvs_config;
|
||||
|
||||
/* Pull in the serialization function. */
|
||||
#include "csi_collector.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/**
|
||||
* Helper: read a value from the fuzz data, advancing the cursor.
|
||||
* Returns 0 if insufficient data remains.
|
||||
*/
|
||||
static size_t fuzz_read(const uint8_t **data, size_t *size,
|
||||
void *out, size_t n)
|
||||
{
|
||||
if (*size < n) {
|
||||
memset(out, 0, n);
|
||||
return 0;
|
||||
}
|
||||
memcpy(out, *data, n);
|
||||
*data += n;
|
||||
*size -= n;
|
||||
return n;
|
||||
}
|
||||
|
||||
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
|
||||
{
|
||||
if (size < 8) {
|
||||
return 0; /* Need at least a few control bytes. */
|
||||
}
|
||||
|
||||
const uint8_t *cursor = data;
|
||||
size_t remaining = size;
|
||||
|
||||
/* Parse control bytes from fuzz input. */
|
||||
uint8_t test_case;
|
||||
int16_t iq_len_raw;
|
||||
int8_t rssi;
|
||||
uint8_t channel;
|
||||
int8_t noise_floor;
|
||||
uint8_t out_buf_scale; /* Controls output buffer size: 0-255. */
|
||||
|
||||
fuzz_read(&cursor, &remaining, &test_case, 1);
|
||||
fuzz_read(&cursor, &remaining, &iq_len_raw, 2);
|
||||
fuzz_read(&cursor, &remaining, &rssi, 1);
|
||||
fuzz_read(&cursor, &remaining, &channel, 1);
|
||||
fuzz_read(&cursor, &remaining, &noise_floor, 1);
|
||||
fuzz_read(&cursor, &remaining, &out_buf_scale, 1);
|
||||
|
||||
/* --- Test case 0: Normal operation with fuzz-controlled values --- */
|
||||
|
||||
wifi_csi_info_t info;
|
||||
memset(&info, 0, sizeof(info));
|
||||
info.rx_ctrl.rssi = rssi;
|
||||
info.rx_ctrl.channel = channel & 0x0F; /* 4-bit field */
|
||||
info.rx_ctrl.noise_floor = noise_floor;
|
||||
|
||||
/* Use remaining fuzz data as I/Q buffer content. */
|
||||
uint16_t iq_len;
|
||||
if (iq_len_raw < 0) {
|
||||
iq_len = 0;
|
||||
} else if (iq_len_raw > (int16_t)remaining) {
|
||||
iq_len = (uint16_t)remaining;
|
||||
} else {
|
||||
iq_len = (uint16_t)iq_len_raw;
|
||||
}
|
||||
|
||||
int8_t iq_buf[CSI_MAX_FRAME_SIZE];
|
||||
if (iq_len > 0 && remaining > 0) {
|
||||
uint16_t copy = (iq_len > remaining) ? (uint16_t)remaining : iq_len;
|
||||
memcpy(iq_buf, cursor, copy);
|
||||
/* Zero-fill the rest if iq_len > available data. */
|
||||
if (copy < iq_len) {
|
||||
memset(iq_buf + copy, 0, iq_len - copy);
|
||||
}
|
||||
info.buf = iq_buf;
|
||||
} else {
|
||||
info.buf = iq_buf;
|
||||
memset(iq_buf, 0, sizeof(iq_buf));
|
||||
}
|
||||
info.len = (int16_t)iq_len;
|
||||
|
||||
/* Output buffer: scale from tiny (1 byte) to full size. */
|
||||
uint8_t out_buf[CSI_MAX_FRAME_SIZE + 64];
|
||||
size_t out_len;
|
||||
if (out_buf_scale == 0) {
|
||||
out_len = 0;
|
||||
} else if (out_buf_scale < 20) {
|
||||
/* Small buffer: test buffer-too-small path. */
|
||||
out_len = (size_t)out_buf_scale;
|
||||
} else {
|
||||
/* Normal/large buffer. */
|
||||
out_len = sizeof(out_buf);
|
||||
}
|
||||
|
||||
/* Call the function under test. Must not crash. */
|
||||
size_t result = csi_serialize_frame(&info, out_buf, out_len);
|
||||
|
||||
/* Basic sanity: result must be 0 (error) or <= out_len. */
|
||||
if (result > out_len) {
|
||||
__builtin_trap(); /* Buffer overflow detected. */
|
||||
}
|
||||
|
||||
/* --- Test case 1: NULL info pointer --- */
|
||||
if (test_case & 0x01) {
|
||||
result = csi_serialize_frame(NULL, out_buf, sizeof(out_buf));
|
||||
if (result != 0) {
|
||||
__builtin_trap(); /* NULL info should return 0. */
|
||||
}
|
||||
}
|
||||
|
||||
/* --- Test case 2: NULL output buffer --- */
|
||||
if (test_case & 0x02) {
|
||||
result = csi_serialize_frame(&info, NULL, sizeof(out_buf));
|
||||
if (result != 0) {
|
||||
__builtin_trap(); /* NULL buf should return 0. */
|
||||
}
|
||||
}
|
||||
|
||||
/* --- Test case 3: NULL I/Q buffer in info --- */
|
||||
if (test_case & 0x04) {
|
||||
wifi_csi_info_t null_iq_info = info;
|
||||
null_iq_info.buf = NULL;
|
||||
result = csi_serialize_frame(&null_iq_info, out_buf, sizeof(out_buf));
|
||||
if (result != 0) {
|
||||
__builtin_trap(); /* NULL info->buf should return 0. */
|
||||
}
|
||||
}
|
||||
|
||||
/* --- Test case 4: Extreme channel values --- */
|
||||
if (test_case & 0x08) {
|
||||
wifi_csi_info_t extreme_info = info;
|
||||
extreme_info.buf = iq_buf;
|
||||
|
||||
/* Channel 0 (invalid). */
|
||||
extreme_info.rx_ctrl.channel = 0;
|
||||
csi_serialize_frame(&extreme_info, out_buf, sizeof(out_buf));
|
||||
|
||||
/* Channel 15 (max 4-bit value, invalid for WiFi). */
|
||||
extreme_info.rx_ctrl.channel = 15;
|
||||
csi_serialize_frame(&extreme_info, out_buf, sizeof(out_buf));
|
||||
}
|
||||
|
||||
/* --- Test case 5: Extreme RSSI values --- */
|
||||
if (test_case & 0x10) {
|
||||
wifi_csi_info_t rssi_info = info;
|
||||
rssi_info.buf = iq_buf;
|
||||
|
||||
rssi_info.rx_ctrl.rssi = -128;
|
||||
csi_serialize_frame(&rssi_info, out_buf, sizeof(out_buf));
|
||||
|
||||
rssi_info.rx_ctrl.rssi = 127;
|
||||
csi_serialize_frame(&rssi_info, out_buf, sizeof(out_buf));
|
||||
}
|
||||
|
||||
/* --- Test case 6: Zero-length I/Q --- */
|
||||
if (test_case & 0x20) {
|
||||
wifi_csi_info_t zero_info = info;
|
||||
zero_info.buf = iq_buf;
|
||||
zero_info.len = 0;
|
||||
result = csi_serialize_frame(&zero_info, out_buf, sizeof(out_buf));
|
||||
/* len=0 means frame_size = CSI_HEADER_SIZE + 0 = 20 bytes. */
|
||||
if (result != 0 && result != CSI_HEADER_SIZE) {
|
||||
/* Either 0 (rejected) or exactly the header size is acceptable. */
|
||||
}
|
||||
}
|
||||
|
||||
/* --- Test case 7: Output buffer exactly header size --- */
|
||||
if (test_case & 0x40) {
|
||||
wifi_csi_info_t hdr_info = info;
|
||||
hdr_info.buf = iq_buf;
|
||||
hdr_info.len = 4; /* Small I/Q. */
|
||||
/* Buffer exactly header_size + iq_len = 24 bytes. */
|
||||
uint8_t tight_buf[CSI_HEADER_SIZE + 4];
|
||||
result = csi_serialize_frame(&hdr_info, tight_buf, sizeof(tight_buf));
|
||||
if (result > sizeof(tight_buf)) {
|
||||
__builtin_trap();
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
217
firmware/esp32-csi-node/test/fuzz_edge_enqueue.c
Normal file
217
firmware/esp32-csi-node/test/fuzz_edge_enqueue.c
Normal file
|
|
@ -0,0 +1,217 @@
|
|||
/**
|
||||
* @file fuzz_edge_enqueue.c
|
||||
* @brief libFuzzer target for edge_enqueue_csi() (ADR-061 Layer 6).
|
||||
*
|
||||
* Rapid-fire enqueues with varying iq_len from 0 to beyond
|
||||
* EDGE_MAX_IQ_BYTES, testing the SPSC ring buffer overflow behavior
|
||||
* and verifying no out-of-bounds writes occur.
|
||||
*
|
||||
* Build (Linux/macOS with clang):
|
||||
* make fuzz_edge
|
||||
*
|
||||
* Run:
|
||||
* ./fuzz_edge corpus/ -max_len=4096
|
||||
*/
|
||||
|
||||
#include "esp_stubs.h"
|
||||
|
||||
/*
|
||||
* We cannot include edge_processing.c directly because it references
|
||||
* FreeRTOS task creation and other ESP-IDF APIs in edge_processing_init().
|
||||
* Instead, we re-implement the SPSC ring buffer and edge_enqueue_csi()
|
||||
* logic identically to the production code, testing the same algorithm.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/* ---- Reproduce the ring buffer from edge_processing.h ---- */
|
||||
#define EDGE_RING_SLOTS 16
|
||||
#define EDGE_MAX_IQ_BYTES 1024
|
||||
#define EDGE_MAX_SUBCARRIERS 128
|
||||
|
||||
typedef struct {
|
||||
uint8_t iq_data[EDGE_MAX_IQ_BYTES];
|
||||
uint16_t iq_len;
|
||||
int8_t rssi;
|
||||
uint8_t channel;
|
||||
uint32_t timestamp_us;
|
||||
} fuzz_ring_slot_t;
|
||||
|
||||
typedef struct {
|
||||
fuzz_ring_slot_t slots[EDGE_RING_SLOTS];
|
||||
volatile uint32_t head;
|
||||
volatile uint32_t tail;
|
||||
} fuzz_ring_buf_t;
|
||||
|
||||
static fuzz_ring_buf_t s_ring;
|
||||
|
||||
/**
|
||||
* ring_push: identical logic to edge_processing.c::ring_push().
|
||||
* This is the code path exercised by edge_enqueue_csi().
|
||||
*/
|
||||
static bool ring_push(const uint8_t *iq, uint16_t len,
|
||||
int8_t rssi, uint8_t channel)
|
||||
{
|
||||
uint32_t next = (s_ring.head + 1) % EDGE_RING_SLOTS;
|
||||
if (next == s_ring.tail) {
|
||||
return false; /* Full. */
|
||||
}
|
||||
|
||||
fuzz_ring_slot_t *slot = &s_ring.slots[s_ring.head];
|
||||
uint16_t copy_len = (len > EDGE_MAX_IQ_BYTES) ? EDGE_MAX_IQ_BYTES : len;
|
||||
memcpy(slot->iq_data, iq, copy_len);
|
||||
slot->iq_len = copy_len;
|
||||
slot->rssi = rssi;
|
||||
slot->channel = channel;
|
||||
slot->timestamp_us = (uint32_t)(esp_timer_get_time() & 0xFFFFFFFF);
|
||||
|
||||
__sync_synchronize();
|
||||
s_ring.head = next;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* ring_pop: identical logic to edge_processing.c::ring_pop().
|
||||
*/
|
||||
static bool ring_pop(fuzz_ring_slot_t *out)
|
||||
{
|
||||
if (s_ring.tail == s_ring.head) {
|
||||
return false;
|
||||
}
|
||||
|
||||
memcpy(out, &s_ring.slots[s_ring.tail], sizeof(fuzz_ring_slot_t));
|
||||
|
||||
__sync_synchronize();
|
||||
s_ring.tail = (s_ring.tail + 1) % EDGE_RING_SLOTS;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Canary pattern: write to a buffer zone after ring memory to detect
|
||||
* out-of-bounds writes. If the canary is overwritten, we trap.
|
||||
*/
|
||||
#define CANARY_SIZE 64
|
||||
#define CANARY_BYTE 0xCD
|
||||
static uint8_t s_canary_before[CANARY_SIZE];
|
||||
/* s_ring is between the canaries (static allocation order not guaranteed,
|
||||
* but ASAN will catch OOB writes regardless). */
|
||||
static uint8_t s_canary_after[CANARY_SIZE];
|
||||
|
||||
static void init_canaries(void)
|
||||
{
|
||||
memset(s_canary_before, CANARY_BYTE, CANARY_SIZE);
|
||||
memset(s_canary_after, CANARY_BYTE, CANARY_SIZE);
|
||||
}
|
||||
|
||||
static void check_canaries(void)
|
||||
{
|
||||
for (int i = 0; i < CANARY_SIZE; i++) {
|
||||
if (s_canary_before[i] != CANARY_BYTE) __builtin_trap();
|
||||
if (s_canary_after[i] != CANARY_BYTE) __builtin_trap();
|
||||
}
|
||||
}
|
||||
|
||||
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
|
||||
{
|
||||
if (size < 4) return 0;
|
||||
|
||||
/* Reset ring buffer state for each fuzz iteration. */
|
||||
memset(&s_ring, 0, sizeof(s_ring));
|
||||
init_canaries();
|
||||
|
||||
const uint8_t *cursor = data;
|
||||
size_t remaining = size;
|
||||
|
||||
/*
|
||||
* Protocol: each "enqueue command" is:
|
||||
* [0..1] iq_len (LE u16)
|
||||
* [2] rssi (i8)
|
||||
* [3] channel (u8)
|
||||
* [4..] iq_data (up to iq_len bytes, zero-padded if short)
|
||||
*
|
||||
* We consume commands until data is exhausted.
|
||||
*/
|
||||
uint32_t enqueue_count = 0;
|
||||
uint32_t full_count = 0;
|
||||
uint32_t pop_count = 0;
|
||||
|
||||
while (remaining >= 4) {
|
||||
uint16_t iq_len = (uint16_t)cursor[0] | ((uint16_t)cursor[1] << 8);
|
||||
int8_t rssi = (int8_t)cursor[2];
|
||||
uint8_t channel = cursor[3];
|
||||
cursor += 4;
|
||||
remaining -= 4;
|
||||
|
||||
/* Prepare I/Q data buffer.
|
||||
* Even if iq_len > EDGE_MAX_IQ_BYTES, we pass it to ring_push
|
||||
* which must clamp it internally. We need a source buffer that
|
||||
* is at least iq_len bytes to avoid reading OOB. */
|
||||
uint8_t iq_buf[EDGE_MAX_IQ_BYTES + 128];
|
||||
memset(iq_buf, 0, sizeof(iq_buf));
|
||||
|
||||
/* Copy available fuzz data into iq_buf. */
|
||||
uint16_t avail = (remaining > sizeof(iq_buf))
|
||||
? (uint16_t)sizeof(iq_buf)
|
||||
: (uint16_t)remaining;
|
||||
if (avail > 0) {
|
||||
memcpy(iq_buf, cursor, avail);
|
||||
}
|
||||
|
||||
/* Advance cursor past the I/Q data portion.
|
||||
* We consume min(iq_len, remaining) bytes. */
|
||||
uint16_t consume = (iq_len > remaining) ? (uint16_t)remaining : iq_len;
|
||||
cursor += consume;
|
||||
remaining -= consume;
|
||||
|
||||
/* The key test: iq_len can be 0, normal, EDGE_MAX_IQ_BYTES,
|
||||
* or larger (up to 65535). ring_push must clamp to EDGE_MAX_IQ_BYTES. */
|
||||
bool ok = ring_push(iq_buf, iq_len, rssi, channel);
|
||||
if (ok) {
|
||||
enqueue_count++;
|
||||
} else {
|
||||
full_count++;
|
||||
|
||||
/* When ring is full, drain one slot to make room.
|
||||
* This tests the interleaved push/pop pattern. */
|
||||
fuzz_ring_slot_t popped;
|
||||
if (ring_pop(&popped)) {
|
||||
pop_count++;
|
||||
|
||||
/* Verify popped data is sane. */
|
||||
if (popped.iq_len > EDGE_MAX_IQ_BYTES) {
|
||||
__builtin_trap(); /* Clamping failed. */
|
||||
}
|
||||
}
|
||||
|
||||
/* Retry the enqueue after popping. */
|
||||
ring_push(iq_buf, iq_len, rssi, channel);
|
||||
}
|
||||
|
||||
/* Periodically check canaries. */
|
||||
if ((enqueue_count + full_count) % 8 == 0) {
|
||||
check_canaries();
|
||||
}
|
||||
}
|
||||
|
||||
/* Drain remaining items and verify each. */
|
||||
fuzz_ring_slot_t popped;
|
||||
while (ring_pop(&popped)) {
|
||||
pop_count++;
|
||||
if (popped.iq_len > EDGE_MAX_IQ_BYTES) {
|
||||
__builtin_trap();
|
||||
}
|
||||
}
|
||||
|
||||
/* Final canary check. */
|
||||
check_canaries();
|
||||
|
||||
/* Verify ring is now empty. */
|
||||
if (s_ring.head != s_ring.tail) {
|
||||
__builtin_trap();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
286
firmware/esp32-csi-node/test/fuzz_nvs_config.c
Normal file
286
firmware/esp32-csi-node/test/fuzz_nvs_config.c
Normal file
|
|
@ -0,0 +1,286 @@
|
|||
/**
|
||||
* @file fuzz_nvs_config.c
|
||||
* @brief libFuzzer target for NVS config validation logic (ADR-061 Layer 6).
|
||||
*
|
||||
* Since we cannot easily mock the full ESP-IDF NVS API under libFuzzer,
|
||||
* this target extracts and tests the validation ranges used by
|
||||
* nvs_config_load() when processing NVS values. Each validation check
|
||||
* from nvs_config.c is reproduced here with fuzz-driven inputs.
|
||||
*
|
||||
* Build (Linux/macOS with clang):
|
||||
* clang -fsanitize=fuzzer,address -g -I stubs fuzz_nvs_config.c \
|
||||
* stubs/esp_stubs.c -o fuzz_nvs_config -lm
|
||||
*
|
||||
* Run:
|
||||
* ./fuzz_nvs_config corpus/ -max_len=256
|
||||
*/
|
||||
|
||||
#include "esp_stubs.h"
|
||||
#include "nvs_config.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
/**
|
||||
* Validate a hop_count value using the same logic as nvs_config_load().
|
||||
* Returns the validated value (0 = rejected).
|
||||
*/
|
||||
static uint8_t validate_hop_count(uint8_t val)
|
||||
{
|
||||
if (val >= 1 && val <= NVS_CFG_HOP_MAX) return val;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate dwell_ms using the same logic as nvs_config_load().
|
||||
* Returns the validated value (0 = rejected).
|
||||
*/
|
||||
static uint32_t validate_dwell_ms(uint32_t val)
|
||||
{
|
||||
if (val >= 10) return val;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate TDM node count.
|
||||
*/
|
||||
static uint8_t validate_tdm_node_count(uint8_t val)
|
||||
{
|
||||
if (val >= 1) return val;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate edge_tier (0-2).
|
||||
*/
|
||||
static uint8_t validate_edge_tier(uint8_t val)
|
||||
{
|
||||
if (val <= 2) return val;
|
||||
return 0xFF; /* Invalid. */
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate vital_window (32-256).
|
||||
*/
|
||||
static uint16_t validate_vital_window(uint16_t val)
|
||||
{
|
||||
if (val >= 32 && val <= 256) return val;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate vital_interval_ms (>= 100).
|
||||
*/
|
||||
static uint16_t validate_vital_interval(uint16_t val)
|
||||
{
|
||||
if (val >= 100) return val;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate top_k_count (1-32).
|
||||
*/
|
||||
static uint8_t validate_top_k(uint8_t val)
|
||||
{
|
||||
if (val >= 1 && val <= 32) return val;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate power_duty (10-100).
|
||||
*/
|
||||
static uint8_t validate_power_duty(uint8_t val)
|
||||
{
|
||||
if (val >= 10 && val <= 100) return val;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate wasm_max_modules (1-8).
|
||||
*/
|
||||
static uint8_t validate_wasm_max(uint8_t val)
|
||||
{
|
||||
if (val >= 1 && val <= 8) return val;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate CSI channel: 1-14 (2.4 GHz) or 36-177 (5 GHz).
|
||||
*/
|
||||
static uint8_t validate_csi_channel(uint8_t val)
|
||||
{
|
||||
if ((val >= 1 && val <= 14) || (val >= 36 && val <= 177)) return val;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate tdm_slot_index < tdm_node_count (clamp to 0 on violation).
|
||||
*/
|
||||
static uint8_t validate_tdm_slot(uint8_t slot, uint8_t node_count)
|
||||
{
|
||||
if (slot >= node_count) return 0;
|
||||
return slot;
|
||||
}
|
||||
|
||||
/**
|
||||
* Test string field handling: ensure NVS_CFG_SSID_MAX length is respected.
|
||||
*/
|
||||
static void test_string_bounds(const uint8_t *data, size_t len)
|
||||
{
|
||||
char ssid[NVS_CFG_SSID_MAX];
|
||||
char password[NVS_CFG_PASS_MAX];
|
||||
char ip[NVS_CFG_IP_MAX];
|
||||
|
||||
/* Simulate strncpy with NVS_CFG_*_MAX bounds. */
|
||||
size_t ssid_len = (len > NVS_CFG_SSID_MAX - 1) ? NVS_CFG_SSID_MAX - 1 : len;
|
||||
memcpy(ssid, data, ssid_len);
|
||||
ssid[ssid_len] = '\0';
|
||||
|
||||
size_t pass_len = (len > NVS_CFG_PASS_MAX - 1) ? NVS_CFG_PASS_MAX - 1 : len;
|
||||
memcpy(password, data, pass_len);
|
||||
password[pass_len] = '\0';
|
||||
|
||||
size_t ip_len = (len > NVS_CFG_IP_MAX - 1) ? NVS_CFG_IP_MAX - 1 : len;
|
||||
memcpy(ip, data, ip_len);
|
||||
ip[ip_len] = '\0';
|
||||
|
||||
/* Ensure null termination holds. */
|
||||
if (ssid[NVS_CFG_SSID_MAX - 1] != '\0' && ssid_len == NVS_CFG_SSID_MAX - 1) {
|
||||
/* OK: we set terminator above. */
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test presence_thresh and fall_thresh fixed-point conversion.
|
||||
* nvs_config.c stores as u16 with value * 1000.
|
||||
*/
|
||||
static void test_thresh_conversion(uint16_t pres_raw, uint16_t fall_raw)
|
||||
{
|
||||
float pres = (float)pres_raw / 1000.0f;
|
||||
float fall = (float)fall_raw / 1000.0f;
|
||||
|
||||
/* Ensure no NaN or Inf from valid integer inputs. */
|
||||
if (pres != pres) __builtin_trap(); /* NaN check. */
|
||||
if (fall != fall) __builtin_trap(); /* NaN check. */
|
||||
|
||||
/* Range: 0.0 to 65.535 for u16/1000. Both should be finite. */
|
||||
if (pres < 0.0f || pres > 65.536f) __builtin_trap();
|
||||
if (fall < 0.0f || fall > 65.536f) __builtin_trap();
|
||||
}
|
||||
|
||||
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
|
||||
{
|
||||
if (size < 32) return 0;
|
||||
|
||||
const uint8_t *p = data;
|
||||
|
||||
/* Extract fuzz-driven config field values. */
|
||||
uint8_t hop_count = p[0];
|
||||
uint32_t dwell_ms = (uint32_t)p[1] | ((uint32_t)p[2] << 8)
|
||||
| ((uint32_t)p[3] << 16) | ((uint32_t)p[4] << 24);
|
||||
uint8_t tdm_slot = p[5];
|
||||
uint8_t tdm_nodes = p[6];
|
||||
uint8_t edge_tier = p[7];
|
||||
uint16_t vital_win = (uint16_t)p[8] | ((uint16_t)p[9] << 8);
|
||||
uint16_t vital_int = (uint16_t)p[10] | ((uint16_t)p[11] << 8);
|
||||
uint8_t top_k = p[12];
|
||||
uint8_t power_duty = p[13];
|
||||
uint8_t wasm_max = p[14];
|
||||
uint8_t csi_channel = p[15];
|
||||
uint16_t pres_thresh = (uint16_t)p[16] | ((uint16_t)p[17] << 8);
|
||||
uint16_t fall_thresh = (uint16_t)p[18] | ((uint16_t)p[19] << 8);
|
||||
uint8_t node_id = p[20];
|
||||
uint16_t target_port = (uint16_t)p[21] | ((uint16_t)p[22] << 8);
|
||||
uint8_t wasm_verify = p[23];
|
||||
|
||||
/* Run all validators. These must not crash regardless of input. */
|
||||
(void)validate_hop_count(hop_count);
|
||||
(void)validate_dwell_ms(dwell_ms);
|
||||
(void)validate_tdm_node_count(tdm_nodes);
|
||||
(void)validate_edge_tier(edge_tier);
|
||||
(void)validate_vital_window(vital_win);
|
||||
(void)validate_vital_interval(vital_int);
|
||||
(void)validate_top_k(top_k);
|
||||
(void)validate_power_duty(power_duty);
|
||||
(void)validate_wasm_max(wasm_max);
|
||||
(void)validate_csi_channel(csi_channel);
|
||||
|
||||
/* Validate TDM slot with validated node count. */
|
||||
uint8_t valid_nodes = validate_tdm_node_count(tdm_nodes);
|
||||
if (valid_nodes > 0) {
|
||||
(void)validate_tdm_slot(tdm_slot, valid_nodes);
|
||||
}
|
||||
|
||||
/* Test threshold conversions. */
|
||||
test_thresh_conversion(pres_thresh, fall_thresh);
|
||||
|
||||
/* Test string field bounds with remaining data. */
|
||||
if (size > 24) {
|
||||
test_string_bounds(data + 24, size - 24);
|
||||
}
|
||||
|
||||
/* Construct a full nvs_config_t and verify field assignments don't overflow. */
|
||||
nvs_config_t cfg;
|
||||
memset(&cfg, 0, sizeof(cfg));
|
||||
|
||||
cfg.target_port = target_port;
|
||||
cfg.node_id = node_id;
|
||||
|
||||
uint8_t valid_hop = validate_hop_count(hop_count);
|
||||
cfg.channel_hop_count = valid_hop ? valid_hop : 1;
|
||||
|
||||
/* Fill channel list from fuzz data. */
|
||||
for (uint8_t i = 0; i < NVS_CFG_HOP_MAX && (24 + i) < size; i++) {
|
||||
cfg.channel_list[i] = data[24 + i];
|
||||
}
|
||||
|
||||
cfg.dwell_ms = validate_dwell_ms(dwell_ms) ? dwell_ms : 50;
|
||||
cfg.tdm_slot_index = 0;
|
||||
cfg.tdm_node_count = valid_nodes ? valid_nodes : 1;
|
||||
|
||||
if (cfg.tdm_slot_index >= cfg.tdm_node_count) {
|
||||
cfg.tdm_slot_index = 0;
|
||||
}
|
||||
|
||||
uint8_t valid_tier = validate_edge_tier(edge_tier);
|
||||
cfg.edge_tier = (valid_tier != 0xFF) ? valid_tier : 2;
|
||||
|
||||
cfg.presence_thresh = (float)pres_thresh / 1000.0f;
|
||||
cfg.fall_thresh = (float)fall_thresh / 1000.0f;
|
||||
|
||||
uint16_t valid_win = validate_vital_window(vital_win);
|
||||
cfg.vital_window = valid_win ? valid_win : 256;
|
||||
|
||||
uint16_t valid_int = validate_vital_interval(vital_int);
|
||||
cfg.vital_interval_ms = valid_int ? valid_int : 1000;
|
||||
|
||||
uint8_t valid_topk = validate_top_k(top_k);
|
||||
cfg.top_k_count = valid_topk ? valid_topk : 8;
|
||||
|
||||
uint8_t valid_duty = validate_power_duty(power_duty);
|
||||
cfg.power_duty = valid_duty ? valid_duty : 100;
|
||||
|
||||
uint8_t valid_wasm = validate_wasm_max(wasm_max);
|
||||
cfg.wasm_max_modules = valid_wasm ? valid_wasm : 4;
|
||||
cfg.wasm_verify = wasm_verify ? 1 : 0;
|
||||
|
||||
uint8_t valid_ch = validate_csi_channel(csi_channel);
|
||||
cfg.csi_channel = valid_ch;
|
||||
|
||||
/* MAC filter: use 6 bytes from fuzz data if available. */
|
||||
if (size >= 32) {
|
||||
memcpy(cfg.filter_mac, data + 24, 6);
|
||||
cfg.filter_mac_set = (data[30] & 0x01) ? 1 : 0;
|
||||
}
|
||||
|
||||
/* Verify struct is self-consistent — no field should be in an impossible state. */
|
||||
if (cfg.channel_hop_count > NVS_CFG_HOP_MAX) __builtin_trap();
|
||||
if (cfg.tdm_slot_index >= cfg.tdm_node_count) __builtin_trap();
|
||||
if (cfg.edge_tier > 2) __builtin_trap();
|
||||
if (cfg.wasm_max_modules > 8 || cfg.wasm_max_modules < 1) __builtin_trap();
|
||||
if (cfg.top_k_count > 32 || cfg.top_k_count < 1) __builtin_trap();
|
||||
if (cfg.power_duty > 100 || cfg.power_duty < 10) __builtin_trap();
|
||||
|
||||
return 0;
|
||||
}
|
||||
5
firmware/esp32-csi-node/test/stubs/esp_err.h
Normal file
5
firmware/esp32-csi-node/test/stubs/esp_err.h
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
/* Stub: redirect to unified stubs header. */
|
||||
#ifndef ESP_ERR_H_STUB
|
||||
#define ESP_ERR_H_STUB
|
||||
#include "esp_stubs.h"
|
||||
#endif
|
||||
5
firmware/esp32-csi-node/test/stubs/esp_log.h
Normal file
5
firmware/esp32-csi-node/test/stubs/esp_log.h
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
/* Stub: redirect to unified stubs header. */
|
||||
#ifndef ESP_LOG_H_STUB
|
||||
#define ESP_LOG_H_STUB
|
||||
#include "esp_stubs.h"
|
||||
#endif
|
||||
65
firmware/esp32-csi-node/test/stubs/esp_stubs.c
Normal file
65
firmware/esp32-csi-node/test/stubs/esp_stubs.c
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
/**
|
||||
* @file esp_stubs.c
|
||||
* @brief Implementation of ESP-IDF stubs for host-based fuzz testing.
|
||||
*
|
||||
* Must be compiled with: -Istubs -I../main
|
||||
* so that ESP-IDF headers resolve to stubs/ and firmware headers
|
||||
* resolve to ../main/.
|
||||
*/
|
||||
|
||||
#include "esp_stubs.h"
|
||||
#include "edge_processing.h"
|
||||
#include "wasm_runtime.h"
|
||||
#include <stdint.h>
|
||||
|
||||
/** Monotonically increasing microsecond counter for esp_timer_get_time(). */
|
||||
static int64_t s_fake_time_us = 0;
|
||||
|
||||
int64_t esp_timer_get_time(void)
|
||||
{
|
||||
/* Advance by 50ms each call (~20 Hz CSI rate simulation). */
|
||||
s_fake_time_us += 50000;
|
||||
return s_fake_time_us;
|
||||
}
|
||||
|
||||
/* ---- stream_sender stubs ---- */
|
||||
|
||||
int stream_sender_send(const uint8_t *data, size_t len)
|
||||
{
|
||||
(void)data;
|
||||
return (int)len;
|
||||
}
|
||||
|
||||
int stream_sender_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int stream_sender_init_with(const char *ip, uint16_t port)
|
||||
{
|
||||
(void)ip; (void)port;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void stream_sender_deinit(void)
|
||||
{
|
||||
}
|
||||
|
||||
/* ---- wasm_runtime stubs ---- */
|
||||
|
||||
void wasm_runtime_on_frame(const float *phases, const float *amplitudes,
|
||||
const float *variances, uint16_t n_sc,
|
||||
const edge_vitals_pkt_t *vitals)
|
||||
{
|
||||
(void)phases; (void)amplitudes; (void)variances;
|
||||
(void)n_sc; (void)vitals;
|
||||
}
|
||||
|
||||
esp_err_t wasm_runtime_init(void) { return ESP_OK; }
|
||||
esp_err_t wasm_runtime_load(const uint8_t *d, uint32_t l, uint8_t *id) { (void)d; (void)l; (void)id; return ESP_OK; }
|
||||
esp_err_t wasm_runtime_start(uint8_t id) { (void)id; return ESP_OK; }
|
||||
esp_err_t wasm_runtime_stop(uint8_t id) { (void)id; return ESP_OK; }
|
||||
esp_err_t wasm_runtime_unload(uint8_t id) { (void)id; return ESP_OK; }
|
||||
void wasm_runtime_on_timer(void) {}
|
||||
void wasm_runtime_get_info(wasm_module_info_t *info, uint8_t *count) { (void)info; if(count) *count = 0; }
|
||||
esp_err_t wasm_runtime_set_manifest(uint8_t id, const char *n, uint32_t c, uint32_t m) { (void)id; (void)n; (void)c; (void)m; return ESP_OK; }
|
||||
169
firmware/esp32-csi-node/test/stubs/esp_stubs.h
Normal file
169
firmware/esp32-csi-node/test/stubs/esp_stubs.h
Normal file
|
|
@ -0,0 +1,169 @@
|
|||
/**
|
||||
* @file esp_stubs.h
|
||||
* @brief Minimal ESP-IDF type stubs for host-based fuzz testing.
|
||||
*
|
||||
* Provides just enough type definitions and macros to compile
|
||||
* csi_collector.c and edge_processing.c on a Linux/macOS host
|
||||
* without the full ESP-IDF SDK.
|
||||
*/
|
||||
|
||||
#ifndef ESP_STUBS_H
|
||||
#define ESP_STUBS_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
/* ---- esp_err.h ---- */
|
||||
typedef int esp_err_t;
|
||||
#define ESP_OK 0
|
||||
#define ESP_FAIL (-1)
|
||||
#define ESP_ERR_NO_MEM 0x101
|
||||
#define ESP_ERR_INVALID_ARG 0x102
|
||||
|
||||
/* ---- esp_log.h ---- */
|
||||
#define ESP_LOGI(tag, fmt, ...) ((void)0)
|
||||
#define ESP_LOGW(tag, fmt, ...) ((void)0)
|
||||
#define ESP_LOGE(tag, fmt, ...) ((void)0)
|
||||
#define ESP_LOGD(tag, fmt, ...) ((void)0)
|
||||
#define ESP_ERROR_CHECK(x) ((void)(x))
|
||||
|
||||
/* ---- esp_timer.h ---- */
|
||||
typedef void *esp_timer_handle_t;
|
||||
|
||||
/**
|
||||
* Stub: returns a monotonically increasing microsecond counter.
|
||||
* Declared here, defined in esp_stubs.c.
|
||||
*/
|
||||
int64_t esp_timer_get_time(void);
|
||||
|
||||
/* ---- esp_wifi_types.h ---- */
|
||||
|
||||
/** Minimal rx_ctrl fields needed by csi_serialize_frame. */
|
||||
typedef struct {
|
||||
signed rssi : 8;
|
||||
unsigned channel : 4;
|
||||
unsigned noise_floor : 8;
|
||||
unsigned rx_ant : 2;
|
||||
/* Padding to fill out the struct so it compiles. */
|
||||
unsigned _pad : 10;
|
||||
} wifi_pkt_rx_ctrl_t;
|
||||
|
||||
/** Minimal wifi_csi_info_t needed by csi_serialize_frame. */
|
||||
typedef struct {
|
||||
wifi_pkt_rx_ctrl_t rx_ctrl;
|
||||
uint8_t mac[6];
|
||||
int16_t len; /**< Length of the I/Q buffer in bytes. */
|
||||
int8_t *buf; /**< Pointer to I/Q data. */
|
||||
} wifi_csi_info_t;
|
||||
|
||||
/* ---- Kconfig defaults ---- */
|
||||
#ifndef CONFIG_CSI_NODE_ID
|
||||
#define CONFIG_CSI_NODE_ID 1
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_CSI_WIFI_CHANNEL
|
||||
#define CONFIG_CSI_WIFI_CHANNEL 6
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_CSI_WIFI_SSID
|
||||
#define CONFIG_CSI_WIFI_SSID "test_ssid"
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_CSI_TARGET_IP
|
||||
#define CONFIG_CSI_TARGET_IP "192.168.1.1"
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_CSI_TARGET_PORT
|
||||
#define CONFIG_CSI_TARGET_PORT 5500
|
||||
#endif
|
||||
|
||||
/* Suppress the build-time guard in csi_collector.c */
|
||||
#ifndef CONFIG_ESP_WIFI_CSI_ENABLED
|
||||
#define CONFIG_ESP_WIFI_CSI_ENABLED 1
|
||||
#endif
|
||||
|
||||
/* ---- sdkconfig.h stub ---- */
|
||||
/* (empty — all needed CONFIG_ macros are above) */
|
||||
|
||||
/* ---- FreeRTOS stubs ---- */
|
||||
#define pdMS_TO_TICKS(x) ((x))
|
||||
#define pdPASS 1
|
||||
typedef int BaseType_t;
|
||||
|
||||
static inline int xPortGetCoreID(void) { return 0; }
|
||||
static inline void vTaskDelay(uint32_t ticks) { (void)ticks; }
|
||||
static inline BaseType_t xTaskCreatePinnedToCore(
|
||||
void (*fn)(void *), const char *name, uint32_t stack,
|
||||
void *arg, int prio, void *handle, int core)
|
||||
{
|
||||
(void)fn; (void)name; (void)stack; (void)arg;
|
||||
(void)prio; (void)handle; (void)core;
|
||||
return pdPASS;
|
||||
}
|
||||
|
||||
/* ---- WiFi API stubs (no-ops) ---- */
|
||||
typedef int wifi_interface_t;
|
||||
typedef int wifi_second_chan_t;
|
||||
#define WIFI_IF_STA 0
|
||||
#define WIFI_SECOND_CHAN_NONE 0
|
||||
|
||||
typedef struct {
|
||||
unsigned filter_mask;
|
||||
} wifi_promiscuous_filter_t;
|
||||
|
||||
typedef int wifi_promiscuous_pkt_type_t;
|
||||
#define WIFI_PROMIS_FILTER_MASK_MGMT 1
|
||||
#define WIFI_PROMIS_FILTER_MASK_DATA 2
|
||||
|
||||
typedef struct {
|
||||
int lltf_en;
|
||||
int htltf_en;
|
||||
int stbc_htltf2_en;
|
||||
int ltf_merge_en;
|
||||
int channel_filter_en;
|
||||
int manu_scale;
|
||||
int shift;
|
||||
} wifi_csi_config_t;
|
||||
|
||||
typedef struct {
|
||||
uint8_t primary;
|
||||
} wifi_ap_record_t;
|
||||
|
||||
static inline esp_err_t esp_wifi_set_promiscuous(bool en) { (void)en; return ESP_OK; }
|
||||
static inline esp_err_t esp_wifi_set_promiscuous_rx_cb(void *cb) { (void)cb; return ESP_OK; }
|
||||
static inline esp_err_t esp_wifi_set_promiscuous_filter(wifi_promiscuous_filter_t *f) { (void)f; return ESP_OK; }
|
||||
static inline esp_err_t esp_wifi_set_csi_config(wifi_csi_config_t *c) { (void)c; return ESP_OK; }
|
||||
static inline esp_err_t esp_wifi_set_csi_rx_cb(void *cb, void *ctx) { (void)cb; (void)ctx; return ESP_OK; }
|
||||
static inline esp_err_t esp_wifi_set_csi(bool en) { (void)en; return ESP_OK; }
|
||||
static inline esp_err_t esp_wifi_set_channel(uint8_t ch, wifi_second_chan_t sc) { (void)ch; (void)sc; return ESP_OK; }
|
||||
static inline esp_err_t esp_wifi_80211_tx(wifi_interface_t ifx, const void *b, int len, bool en) { (void)ifx; (void)b; (void)len; (void)en; return ESP_OK; }
|
||||
static inline esp_err_t esp_wifi_sta_get_ap_info(wifi_ap_record_t *ap) { (void)ap; return ESP_FAIL; }
|
||||
static inline const char *esp_err_to_name(esp_err_t code) { (void)code; return "STUB"; }
|
||||
|
||||
/* ---- NVS stubs ---- */
|
||||
typedef uint32_t nvs_handle_t;
|
||||
#define NVS_READONLY 0
|
||||
static inline esp_err_t nvs_open(const char *ns, int mode, nvs_handle_t *h) { (void)ns; (void)mode; (void)h; return ESP_FAIL; }
|
||||
static inline void nvs_close(nvs_handle_t h) { (void)h; }
|
||||
static inline esp_err_t nvs_get_str(nvs_handle_t h, const char *k, char *v, size_t *l) { (void)h; (void)k; (void)v; (void)l; return ESP_FAIL; }
|
||||
static inline esp_err_t nvs_get_u8(nvs_handle_t h, const char *k, uint8_t *v) { (void)h; (void)k; (void)v; return ESP_FAIL; }
|
||||
static inline esp_err_t nvs_get_u16(nvs_handle_t h, const char *k, uint16_t *v) { (void)h; (void)k; (void)v; return ESP_FAIL; }
|
||||
static inline esp_err_t nvs_get_u32(nvs_handle_t h, const char *k, uint32_t *v) { (void)h; (void)k; (void)v; return ESP_FAIL; }
|
||||
static inline esp_err_t nvs_get_blob(nvs_handle_t h, const char *k, void *v, size_t *l) { (void)h; (void)k; (void)v; (void)l; return ESP_FAIL; }
|
||||
|
||||
/* ---- stream_sender stubs (defined in esp_stubs.c) ---- */
|
||||
int stream_sender_send(const uint8_t *data, size_t len);
|
||||
int stream_sender_init(void);
|
||||
int stream_sender_init_with(const char *ip, uint16_t port);
|
||||
void stream_sender_deinit(void);
|
||||
|
||||
/*
|
||||
* wasm_runtime stubs: defined in esp_stubs.c.
|
||||
* The actual prototype comes from ../main/wasm_runtime.h (via csi_collector.c).
|
||||
* We just need the definition in esp_stubs.c to link.
|
||||
*/
|
||||
|
||||
#endif /* ESP_STUBS_H */
|
||||
5
firmware/esp32-csi-node/test/stubs/esp_timer.h
Normal file
5
firmware/esp32-csi-node/test/stubs/esp_timer.h
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
/* Stub: redirect to unified stubs header. */
|
||||
#ifndef ESP_TIMER_H_STUB
|
||||
#define ESP_TIMER_H_STUB
|
||||
#include "esp_stubs.h"
|
||||
#endif
|
||||
5
firmware/esp32-csi-node/test/stubs/esp_wifi.h
Normal file
5
firmware/esp32-csi-node/test/stubs/esp_wifi.h
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
/* Stub: redirect to unified stubs header. */
|
||||
#ifndef ESP_WIFI_H_STUB
|
||||
#define ESP_WIFI_H_STUB
|
||||
#include "esp_stubs.h"
|
||||
#endif
|
||||
5
firmware/esp32-csi-node/test/stubs/esp_wifi_types.h
Normal file
5
firmware/esp32-csi-node/test/stubs/esp_wifi_types.h
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
/* Stub: redirect to unified stubs header. */
|
||||
#ifndef ESP_WIFI_TYPES_H_STUB
|
||||
#define ESP_WIFI_TYPES_H_STUB
|
||||
#include "esp_stubs.h"
|
||||
#endif
|
||||
5
firmware/esp32-csi-node/test/stubs/freertos/FreeRTOS.h
Normal file
5
firmware/esp32-csi-node/test/stubs/freertos/FreeRTOS.h
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
/* Stub: redirect to unified stubs header. */
|
||||
#ifndef FREERTOS_H_STUB
|
||||
#define FREERTOS_H_STUB
|
||||
#include "esp_stubs.h"
|
||||
#endif
|
||||
5
firmware/esp32-csi-node/test/stubs/freertos/task.h
Normal file
5
firmware/esp32-csi-node/test/stubs/freertos/task.h
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
/* Stub: redirect to unified stubs header. */
|
||||
#ifndef FREERTOS_TASK_H_STUB
|
||||
#define FREERTOS_TASK_H_STUB
|
||||
#include "esp_stubs.h"
|
||||
#endif
|
||||
5
firmware/esp32-csi-node/test/stubs/nvs.h
Normal file
5
firmware/esp32-csi-node/test/stubs/nvs.h
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
/* Stub: redirect to unified stubs header. */
|
||||
#ifndef NVS_H_STUB
|
||||
#define NVS_H_STUB
|
||||
#include "esp_stubs.h"
|
||||
#endif
|
||||
5
firmware/esp32-csi-node/test/stubs/nvs_flash.h
Normal file
5
firmware/esp32-csi-node/test/stubs/nvs_flash.h
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
/* Stub: redirect to unified stubs header. */
|
||||
#ifndef NVS_FLASH_H_STUB
|
||||
#define NVS_FLASH_H_STUB
|
||||
#include "esp_stubs.h"
|
||||
#endif
|
||||
5
firmware/esp32-csi-node/test/stubs/sdkconfig.h
Normal file
5
firmware/esp32-csi-node/test/stubs/sdkconfig.h
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
/* Stub: sdkconfig.h — all CONFIG_ macros provided by esp_stubs.h. */
|
||||
#ifndef SDKCONFIG_H_STUB
|
||||
#define SDKCONFIG_H_STUB
|
||||
#include "esp_stubs.h"
|
||||
#endif
|
||||
290
scripts/check_health.py
Executable file
290
scripts/check_health.py
Executable file
|
|
@ -0,0 +1,290 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
QEMU Post-Fault Health Checker — ADR-061 Layer 9
|
||||
|
||||
Reads a log segment captured after a fault injection and checks whether
|
||||
the firmware is still healthy. Used by qemu-chaos-test.sh after each
|
||||
fault in the chaos testing loop.
|
||||
|
||||
Health checks:
|
||||
1. No crash patterns (Guru Meditation, assert, panic, abort)
|
||||
2. No heap errors (OOM, heap corruption, alloc failure)
|
||||
3. No stack overflow (FreeRTOS stack overflow hook)
|
||||
4. Firmware still producing frames (CSI frame activity)
|
||||
|
||||
Exit codes:
|
||||
0 HEALTHY — all checks pass
|
||||
1 DEGRADED — no crash, but missing expected activity
|
||||
2 UNHEALTHY — crash, heap error, or stack overflow detected
|
||||
|
||||
Usage:
|
||||
python3 check_health.py --log /path/to/fault_segment.log --after-fault wifi_kill
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
|
||||
# ANSI colors
|
||||
USE_COLOR = sys.stdout.isatty()
|
||||
|
||||
|
||||
def color(text: str, code: str) -> str:
|
||||
if not USE_COLOR:
|
||||
return text
|
||||
return f"\033[{code}m{text}\033[0m"
|
||||
|
||||
|
||||
def green(t: str) -> str:
|
||||
return color(t, "32")
|
||||
|
||||
|
||||
def yellow(t: str) -> str:
|
||||
return color(t, "33")
|
||||
|
||||
|
||||
def red(t: str) -> str:
|
||||
return color(t, "1;31")
|
||||
|
||||
|
||||
@dataclass
|
||||
class HealthCheck:
|
||||
name: str
|
||||
passed: bool
|
||||
message: str
|
||||
severity: int # 0=pass, 1=degraded, 2=unhealthy
|
||||
|
||||
|
||||
def check_no_crash(lines: List[str]) -> HealthCheck:
|
||||
"""Check for crash indicators in the log."""
|
||||
crash_patterns = [
|
||||
r"Guru Meditation",
|
||||
r"assert failed",
|
||||
r"abort\(\)",
|
||||
r"panic",
|
||||
r"LoadProhibited",
|
||||
r"StoreProhibited",
|
||||
r"InstrFetchProhibited",
|
||||
r"IllegalInstruction",
|
||||
r"Unhandled debug exception",
|
||||
r"Fatal exception",
|
||||
]
|
||||
|
||||
for line in lines:
|
||||
for pat in crash_patterns:
|
||||
if re.search(pat, line):
|
||||
return HealthCheck(
|
||||
name="No crash",
|
||||
passed=False,
|
||||
message=f"Crash detected: {line.strip()[:120]}",
|
||||
severity=2,
|
||||
)
|
||||
|
||||
return HealthCheck(
|
||||
name="No crash",
|
||||
passed=True,
|
||||
message="No crash indicators found",
|
||||
severity=0,
|
||||
)
|
||||
|
||||
|
||||
def check_no_heap_errors(lines: List[str]) -> HealthCheck:
|
||||
"""Check for heap/memory errors."""
|
||||
heap_patterns = [
|
||||
r"HEAP_ERROR",
|
||||
r"out of memory",
|
||||
r"heap_caps_alloc.*failed",
|
||||
r"malloc.*fail",
|
||||
r"heap corruption",
|
||||
r"CORRUPT HEAP",
|
||||
r"multi_heap",
|
||||
r"heap_lock",
|
||||
]
|
||||
|
||||
for line in lines:
|
||||
for pat in heap_patterns:
|
||||
if re.search(pat, line, re.IGNORECASE):
|
||||
return HealthCheck(
|
||||
name="No heap errors",
|
||||
passed=False,
|
||||
message=f"Heap error: {line.strip()[:120]}",
|
||||
severity=2,
|
||||
)
|
||||
|
||||
return HealthCheck(
|
||||
name="No heap errors",
|
||||
passed=True,
|
||||
message="No heap errors found",
|
||||
severity=0,
|
||||
)
|
||||
|
||||
|
||||
def check_no_stack_overflow(lines: List[str]) -> HealthCheck:
|
||||
"""Check for FreeRTOS stack overflow."""
|
||||
stack_patterns = [
|
||||
r"[Ss]tack overflow",
|
||||
r"stack_overflow",
|
||||
r"vApplicationStackOverflowHook",
|
||||
r"stack smashing",
|
||||
]
|
||||
|
||||
for line in lines:
|
||||
for pat in stack_patterns:
|
||||
if re.search(pat, line):
|
||||
return HealthCheck(
|
||||
name="No stack overflow",
|
||||
passed=False,
|
||||
message=f"Stack overflow: {line.strip()[:120]}",
|
||||
severity=2,
|
||||
)
|
||||
|
||||
return HealthCheck(
|
||||
name="No stack overflow",
|
||||
passed=True,
|
||||
message="No stack overflow detected",
|
||||
severity=0,
|
||||
)
|
||||
|
||||
|
||||
def check_frame_activity(lines: List[str]) -> HealthCheck:
|
||||
"""Check that the firmware is still producing CSI frames."""
|
||||
frame_patterns = [
|
||||
r"frame",
|
||||
r"CSI",
|
||||
r"mock_csi",
|
||||
r"iq_data",
|
||||
r"subcarrier",
|
||||
r"csi_collector",
|
||||
r"enqueue",
|
||||
r"presence",
|
||||
r"vitals",
|
||||
r"breathing",
|
||||
]
|
||||
|
||||
activity_lines = 0
|
||||
for line in lines:
|
||||
for pat in frame_patterns:
|
||||
if re.search(pat, line, re.IGNORECASE):
|
||||
activity_lines += 1
|
||||
break
|
||||
|
||||
if activity_lines > 0:
|
||||
return HealthCheck(
|
||||
name="Frame activity",
|
||||
passed=True,
|
||||
message=f"Firmware producing output ({activity_lines} activity lines)",
|
||||
severity=0,
|
||||
)
|
||||
else:
|
||||
return HealthCheck(
|
||||
name="Frame activity",
|
||||
passed=False,
|
||||
message="No frame/CSI activity detected after fault",
|
||||
severity=1, # Degraded, not fatal
|
||||
)
|
||||
|
||||
|
||||
def run_health_checks(
|
||||
log_path: Path,
|
||||
fault_name: str,
|
||||
tail_lines: int = 200,
|
||||
) -> int:
|
||||
"""Run all health checks and report results.
|
||||
|
||||
Returns:
|
||||
0 = healthy, 1 = degraded, 2 = unhealthy
|
||||
"""
|
||||
if not log_path.exists():
|
||||
print(f" ERROR: Log file not found: {log_path}", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
text = log_path.read_text(encoding="utf-8", errors="replace")
|
||||
all_lines = text.splitlines()
|
||||
|
||||
# Use last N lines (most recent, after fault injection)
|
||||
lines = all_lines[-tail_lines:] if len(all_lines) > tail_lines else all_lines
|
||||
|
||||
if not lines:
|
||||
print(f" WARNING: Log file is empty (fault may have killed output)")
|
||||
# Empty log after fault is degraded, not necessarily unhealthy
|
||||
return 1
|
||||
|
||||
print(f" Health check after fault: {fault_name}")
|
||||
print(f" Log lines analyzed: {len(lines)} (of {len(all_lines)} total)")
|
||||
print()
|
||||
|
||||
# Run checks
|
||||
checks = [
|
||||
check_no_crash(lines),
|
||||
check_no_heap_errors(lines),
|
||||
check_no_stack_overflow(lines),
|
||||
check_frame_activity(lines),
|
||||
]
|
||||
|
||||
max_severity = 0
|
||||
for check in checks:
|
||||
if check.passed:
|
||||
icon = green("PASS")
|
||||
elif check.severity == 1:
|
||||
icon = yellow("WARN")
|
||||
else:
|
||||
icon = red("FAIL")
|
||||
|
||||
print(f" [{icon}] {check.name}: {check.message}")
|
||||
max_severity = max(max_severity, check.severity)
|
||||
|
||||
print()
|
||||
|
||||
# Summary
|
||||
passed = sum(1 for c in checks if c.passed)
|
||||
total = len(checks)
|
||||
|
||||
if max_severity == 0:
|
||||
print(f" {green(f'HEALTHY')} — {passed}/{total} checks passed")
|
||||
elif max_severity == 1:
|
||||
print(f" {yellow(f'DEGRADED')} — {passed}/{total} checks passed")
|
||||
else:
|
||||
print(f" {red(f'UNHEALTHY')} — {passed}/{total} checks passed")
|
||||
|
||||
return max_severity
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="QEMU Post-Fault Health Checker — ADR-061 Layer 9",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog=(
|
||||
"Example output:\n"
|
||||
" [HEALTHY] t=30s frames=150 (5.0 fps) crashes=0 heap_err=0 wdt=0 reboots=0\n"
|
||||
" \n"
|
||||
" VERDICT: Firmware is healthy. No critical issues detected."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--log", required=True,
|
||||
help="Path to the log file (or log segment) to check",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--after-fault", required=True,
|
||||
help="Name of the fault that was injected (for reporting)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tail", type=int, default=200,
|
||||
help="Number of lines from end of log to analyze (default: 200)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
exit_code = run_health_checks(
|
||||
log_path=Path(args.log),
|
||||
fault_name=args.after_fault,
|
||||
tail_lines=args.tail,
|
||||
)
|
||||
sys.exit(exit_code)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
430
scripts/generate_nvs_matrix.py
Normal file
430
scripts/generate_nvs_matrix.py
Normal file
|
|
@ -0,0 +1,430 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
NVS Test Matrix Generator (ADR-061)
|
||||
|
||||
Generates NVS partition binaries for 14 test configurations using the
|
||||
provision.py script's CSV builder and NVS binary generator. Each binary
|
||||
can be injected into a QEMU flash image at offset 0x9000 for automated
|
||||
firmware testing under different NVS configurations.
|
||||
|
||||
Usage:
|
||||
python3 generate_nvs_matrix.py --output-dir build/nvs_matrix
|
||||
|
||||
# Generate only specific configs:
|
||||
python3 generate_nvs_matrix.py --output-dir build/nvs_matrix --only default,full-adr060
|
||||
|
||||
Requirements:
|
||||
- esp_idf_nvs_partition_gen (pip install) or ESP-IDF nvs_partition_gen.py
|
||||
- Python 3.8+
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
|
||||
# NVS partition size must match partitions_display.csv: 0x6000 = 24576 bytes
|
||||
NVS_PARTITION_SIZE = 0x6000
|
||||
|
||||
|
||||
@dataclass
|
||||
class NvsEntry:
|
||||
"""A single NVS key-value entry."""
|
||||
key: str
|
||||
type: str # "data" or "namespace"
|
||||
encoding: str # "string", "u8", "u16", "u32", "hex2bin", ""
|
||||
value: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class NvsConfig:
|
||||
"""A named NVS configuration with a list of entries."""
|
||||
name: str
|
||||
description: str
|
||||
entries: List[NvsEntry] = field(default_factory=list)
|
||||
|
||||
def to_csv(self) -> str:
|
||||
"""Generate NVS CSV content."""
|
||||
buf = io.StringIO()
|
||||
writer = csv.writer(buf)
|
||||
writer.writerow(["key", "type", "encoding", "value"])
|
||||
writer.writerow(["csi_cfg", "namespace", "", ""])
|
||||
for entry in self.entries:
|
||||
writer.writerow([entry.key, entry.type, entry.encoding, entry.value])
|
||||
return buf.getvalue()
|
||||
|
||||
|
||||
def define_configs() -> List[NvsConfig]:
|
||||
"""Define all 14 NVS test configurations."""
|
||||
configs = []
|
||||
|
||||
# 1. default - no NVS entries (firmware uses Kconfig defaults)
|
||||
configs.append(NvsConfig(
|
||||
name="default",
|
||||
description="No NVS entries; firmware uses Kconfig defaults",
|
||||
entries=[],
|
||||
))
|
||||
|
||||
# 2. wifi-only - just WiFi credentials
|
||||
configs.append(NvsConfig(
|
||||
name="wifi-only",
|
||||
description="WiFi SSID and password only",
|
||||
entries=[
|
||||
NvsEntry("ssid", "data", "string", "TestNetwork"),
|
||||
NvsEntry("password", "data", "string", "testpass123"),
|
||||
],
|
||||
))
|
||||
|
||||
# 3. full-adr060 - channel override + MAC filter
|
||||
configs.append(NvsConfig(
|
||||
name="full-adr060",
|
||||
description="ADR-060: channel override + MAC filter + full config",
|
||||
entries=[
|
||||
NvsEntry("ssid", "data", "string", "TestNetwork"),
|
||||
NvsEntry("password", "data", "string", "testpass123"),
|
||||
NvsEntry("target_ip", "data", "string", "10.0.2.2"),
|
||||
NvsEntry("target_port", "data", "u16", "5005"),
|
||||
NvsEntry("node_id", "data", "u8", "1"),
|
||||
NvsEntry("csi_channel", "data", "u8", "6"),
|
||||
NvsEntry("filter_mac", "data", "hex2bin", "aabbccddeeff"),
|
||||
],
|
||||
))
|
||||
|
||||
# 4. edge-tier0 - raw passthrough (no DSP)
|
||||
configs.append(NvsConfig(
|
||||
name="edge-tier0",
|
||||
description="Edge tier 0: raw CSI passthrough, no on-device DSP",
|
||||
entries=[
|
||||
NvsEntry("ssid", "data", "string", "TestNetwork"),
|
||||
NvsEntry("password", "data", "string", "testpass123"),
|
||||
NvsEntry("target_ip", "data", "string", "10.0.2.2"),
|
||||
NvsEntry("edge_tier", "data", "u8", "0"),
|
||||
],
|
||||
))
|
||||
|
||||
# 5. edge-tier1 - basic presence/motion detection
|
||||
configs.append(NvsConfig(
|
||||
name="edge-tier1",
|
||||
description="Edge tier 1: basic presence and motion detection",
|
||||
entries=[
|
||||
NvsEntry("ssid", "data", "string", "TestNetwork"),
|
||||
NvsEntry("password", "data", "string", "testpass123"),
|
||||
NvsEntry("target_ip", "data", "string", "10.0.2.2"),
|
||||
NvsEntry("edge_tier", "data", "u8", "1"),
|
||||
NvsEntry("pres_thresh", "data", "u16", "50"),
|
||||
],
|
||||
))
|
||||
|
||||
# 6. edge-tier2-custom - full pipeline with custom thresholds
|
||||
configs.append(NvsConfig(
|
||||
name="edge-tier2-custom",
|
||||
description="Edge tier 2: full pipeline with custom thresholds",
|
||||
entries=[
|
||||
NvsEntry("ssid", "data", "string", "TestNetwork"),
|
||||
NvsEntry("password", "data", "string", "testpass123"),
|
||||
NvsEntry("target_ip", "data", "string", "10.0.2.2"),
|
||||
NvsEntry("edge_tier", "data", "u8", "2"),
|
||||
NvsEntry("pres_thresh", "data", "u16", "100"),
|
||||
NvsEntry("fall_thresh", "data", "u16", "3000"),
|
||||
NvsEntry("vital_win", "data", "u16", "256"),
|
||||
NvsEntry("vital_int", "data", "u16", "500"),
|
||||
NvsEntry("subk_count", "data", "u8", "16"),
|
||||
],
|
||||
))
|
||||
|
||||
# 7. tdm-3node - TDM mesh with 3 nodes (slot 0)
|
||||
configs.append(NvsConfig(
|
||||
name="tdm-3node",
|
||||
description="TDM mesh: 3-node schedule, this node is slot 0",
|
||||
entries=[
|
||||
NvsEntry("ssid", "data", "string", "TestNetwork"),
|
||||
NvsEntry("password", "data", "string", "testpass123"),
|
||||
NvsEntry("target_ip", "data", "string", "10.0.2.2"),
|
||||
NvsEntry("node_id", "data", "u8", "0"),
|
||||
NvsEntry("tdm_slot", "data", "u8", "0"),
|
||||
NvsEntry("tdm_nodes", "data", "u8", "3"),
|
||||
],
|
||||
))
|
||||
|
||||
# 8. wasm-signed - WASM runtime with signature verification
|
||||
configs.append(NvsConfig(
|
||||
name="wasm-signed",
|
||||
description="WASM runtime enabled with Ed25519 signature verification",
|
||||
entries=[
|
||||
NvsEntry("ssid", "data", "string", "TestNetwork"),
|
||||
NvsEntry("password", "data", "string", "testpass123"),
|
||||
NvsEntry("target_ip", "data", "string", "10.0.2.2"),
|
||||
NvsEntry("edge_tier", "data", "u8", "2"),
|
||||
# wasm_verify=1 + a 32-byte dummy Ed25519 pubkey
|
||||
NvsEntry("wasm_verify", "data", "u8", "1"),
|
||||
NvsEntry("wasm_pubkey", "data", "hex2bin",
|
||||
"0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"),
|
||||
],
|
||||
))
|
||||
|
||||
# 9. wasm-unsigned - WASM runtime without signature verification
|
||||
configs.append(NvsConfig(
|
||||
name="wasm-unsigned",
|
||||
description="WASM runtime with signature verification disabled",
|
||||
entries=[
|
||||
NvsEntry("ssid", "data", "string", "TestNetwork"),
|
||||
NvsEntry("password", "data", "string", "testpass123"),
|
||||
NvsEntry("target_ip", "data", "string", "10.0.2.2"),
|
||||
NvsEntry("edge_tier", "data", "u8", "2"),
|
||||
NvsEntry("wasm_verify", "data", "u8", "0"),
|
||||
NvsEntry("wasm_max", "data", "u8", "2"),
|
||||
],
|
||||
))
|
||||
|
||||
# 10. 5ghz-channel - 5 GHz channel override
|
||||
configs.append(NvsConfig(
|
||||
name="5ghz-channel",
|
||||
description="ADR-060: 5 GHz channel 36 override",
|
||||
entries=[
|
||||
NvsEntry("ssid", "data", "string", "TestNetwork5G"),
|
||||
NvsEntry("password", "data", "string", "testpass123"),
|
||||
NvsEntry("target_ip", "data", "string", "10.0.2.2"),
|
||||
NvsEntry("csi_channel", "data", "u8", "36"),
|
||||
],
|
||||
))
|
||||
|
||||
# 11. boundary-max - maximum VALID values for all numeric fields
|
||||
# Uses firmware-validated max ranges (not raw u8/u16 max):
|
||||
# vital_win: 32-256, top_k: 1-32, power_duty: 10-100
|
||||
configs.append(NvsConfig(
|
||||
name="boundary-max",
|
||||
description="Boundary test: maximum valid values per firmware validation ranges",
|
||||
entries=[
|
||||
NvsEntry("ssid", "data", "string", "TestNetwork"),
|
||||
NvsEntry("password", "data", "string", "testpass123"),
|
||||
NvsEntry("target_ip", "data", "string", "10.0.2.2"),
|
||||
NvsEntry("target_port", "data", "u16", "65535"),
|
||||
NvsEntry("node_id", "data", "u8", "255"),
|
||||
NvsEntry("edge_tier", "data", "u8", "2"),
|
||||
NvsEntry("pres_thresh", "data", "u16", "65535"),
|
||||
NvsEntry("fall_thresh", "data", "u16", "65535"),
|
||||
NvsEntry("vital_win", "data", "u16", "256"), # max validated
|
||||
NvsEntry("vital_int", "data", "u16", "10000"),
|
||||
NvsEntry("subk_count", "data", "u8", "32"),
|
||||
NvsEntry("power_duty", "data", "u8", "100"),
|
||||
],
|
||||
))
|
||||
|
||||
# 12. boundary-min - minimum VALID values for all numeric fields
|
||||
configs.append(NvsConfig(
|
||||
name="boundary-min",
|
||||
description="Boundary test: minimum valid values per firmware validation ranges",
|
||||
entries=[
|
||||
NvsEntry("ssid", "data", "string", "TestNetwork"),
|
||||
NvsEntry("password", "data", "string", "testpass123"),
|
||||
NvsEntry("target_ip", "data", "string", "10.0.2.2"),
|
||||
NvsEntry("target_port", "data", "u16", "1024"),
|
||||
NvsEntry("node_id", "data", "u8", "0"),
|
||||
NvsEntry("edge_tier", "data", "u8", "0"),
|
||||
NvsEntry("pres_thresh", "data", "u16", "1"),
|
||||
NvsEntry("fall_thresh", "data", "u16", "100"), # min valid (0.1 rad/s²)
|
||||
NvsEntry("vital_win", "data", "u16", "32"), # min validated
|
||||
NvsEntry("vital_int", "data", "u16", "100"),
|
||||
NvsEntry("subk_count", "data", "u8", "1"),
|
||||
NvsEntry("power_duty", "data", "u8", "10"),
|
||||
],
|
||||
))
|
||||
|
||||
# 13. power-save - low power duty cycle configuration
|
||||
configs.append(NvsConfig(
|
||||
name="power-save",
|
||||
description="Power-save mode: 10% duty cycle for battery-powered nodes",
|
||||
entries=[
|
||||
NvsEntry("ssid", "data", "string", "TestNetwork"),
|
||||
NvsEntry("password", "data", "string", "testpass123"),
|
||||
NvsEntry("target_ip", "data", "string", "10.0.2.2"),
|
||||
NvsEntry("edge_tier", "data", "u8", "1"),
|
||||
NvsEntry("power_duty", "data", "u8", "10"),
|
||||
],
|
||||
))
|
||||
|
||||
# 14. empty-strings - empty SSID/password to test fallback to Kconfig
|
||||
configs.append(NvsConfig(
|
||||
name="empty-strings",
|
||||
description="Empty SSID and password to verify Kconfig fallback",
|
||||
entries=[
|
||||
NvsEntry("ssid", "data", "string", ""),
|
||||
NvsEntry("password", "data", "string", ""),
|
||||
NvsEntry("target_ip", "data", "string", "10.0.2.2"),
|
||||
],
|
||||
))
|
||||
|
||||
return configs
|
||||
|
||||
|
||||
def generate_nvs_binary(csv_content: str, size: int) -> bytes:
|
||||
"""Generate an NVS partition binary from CSV content.
|
||||
|
||||
Tries multiple methods to find nvs_partition_gen:
|
||||
1. esp_idf_nvs_partition_gen pip package
|
||||
2. Legacy nvs_partition_gen pip package
|
||||
3. ESP-IDF bundled script (via IDF_PATH)
|
||||
4. Module invocation
|
||||
"""
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f_csv:
|
||||
f_csv.write(csv_content)
|
||||
csv_path = f_csv.name
|
||||
|
||||
bin_path = csv_path.replace(".csv", ".bin")
|
||||
|
||||
try:
|
||||
# Try pip-installed version first
|
||||
try:
|
||||
from esp_idf_nvs_partition_gen import nvs_partition_gen
|
||||
nvs_partition_gen.generate(csv_path, bin_path, size)
|
||||
with open(bin_path, "rb") as f:
|
||||
return f.read()
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# Try legacy import
|
||||
try:
|
||||
import nvs_partition_gen
|
||||
nvs_partition_gen.generate(csv_path, bin_path, size)
|
||||
with open(bin_path, "rb") as f:
|
||||
return f.read()
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# Try ESP-IDF bundled script
|
||||
idf_path = os.environ.get("IDF_PATH", "")
|
||||
gen_script = os.path.join(
|
||||
idf_path, "components", "nvs_flash",
|
||||
"nvs_partition_generator", "nvs_partition_gen.py"
|
||||
)
|
||||
if os.path.isfile(gen_script):
|
||||
subprocess.check_call([
|
||||
sys.executable, gen_script, "generate",
|
||||
csv_path, bin_path, hex(size)
|
||||
])
|
||||
with open(bin_path, "rb") as f:
|
||||
return f.read()
|
||||
|
||||
# Last resort: try as a module
|
||||
try:
|
||||
subprocess.check_call([
|
||||
sys.executable, "-m", "nvs_partition_gen", "generate",
|
||||
csv_path, bin_path, hex(size)
|
||||
])
|
||||
with open(bin_path, "rb") as f:
|
||||
return f.read()
|
||||
except (subprocess.CalledProcessError, FileNotFoundError):
|
||||
print("ERROR: NVS partition generator tool not found.", file=sys.stderr)
|
||||
print("Install: pip install esp-idf-nvs-partition-gen", file=sys.stderr)
|
||||
print("Or set IDF_PATH to your ESP-IDF installation", file=sys.stderr)
|
||||
raise RuntimeError(
|
||||
"NVS partition generator not available. "
|
||||
"Install: pip install esp-idf-nvs-partition-gen"
|
||||
)
|
||||
|
||||
finally:
|
||||
for p in set((csv_path, bin_path)): # deduplicate in case paths are identical
|
||||
if os.path.isfile(p):
|
||||
os.unlink(p)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate NVS partition binaries for QEMU firmware test matrix (ADR-061)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-dir", required=True,
|
||||
help="Directory to write NVS binary files",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--only", type=str, default=None,
|
||||
help="Comma-separated list of config names to generate (default: all)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--csv-only", action="store_true",
|
||||
help="Only generate CSV files, skip binary generation",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--list", action="store_true", dest="list_configs",
|
||||
help="List all available configurations and exit",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
all_configs = define_configs()
|
||||
|
||||
if args.list_configs:
|
||||
print(f"{'Name':<20} {'Description'}")
|
||||
print("-" * 70)
|
||||
for cfg in all_configs:
|
||||
print(f"{cfg.name:<20} {cfg.description}")
|
||||
sys.exit(0)
|
||||
|
||||
# Filter configs if --only specified
|
||||
if args.only:
|
||||
selected = set(args.only.split(","))
|
||||
configs = [c for c in all_configs if c.name in selected]
|
||||
missing = selected - {c.name for c in configs}
|
||||
if missing:
|
||||
print(f"WARNING: Unknown config names: {', '.join(sorted(missing))}",
|
||||
file=sys.stderr)
|
||||
else:
|
||||
configs = all_configs
|
||||
|
||||
output_dir = Path(args.output_dir)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print(f"Generating {len(configs)} NVS configurations in {output_dir}/")
|
||||
print()
|
||||
|
||||
success = 0
|
||||
errors = 0
|
||||
|
||||
for cfg in configs:
|
||||
csv_content = cfg.to_csv()
|
||||
|
||||
# Always write the CSV for reference
|
||||
csv_path = output_dir / f"nvs_{cfg.name}.csv"
|
||||
csv_path.write_text(csv_content)
|
||||
|
||||
if cfg.name == "default" and not cfg.entries:
|
||||
# "default" means no NVS — just produce an empty marker
|
||||
print(f" [{cfg.name}] No NVS entries (uses Kconfig defaults)")
|
||||
# Write a zero-filled NVS partition (erased state = 0xFF)
|
||||
bin_path = output_dir / f"nvs_{cfg.name}.bin"
|
||||
bin_path.write_bytes(b"\xff" * NVS_PARTITION_SIZE)
|
||||
success += 1
|
||||
continue
|
||||
|
||||
if args.csv_only:
|
||||
print(f" [{cfg.name}] CSV only: {csv_path}")
|
||||
success += 1
|
||||
continue
|
||||
|
||||
try:
|
||||
nvs_bin = generate_nvs_binary(csv_content, NVS_PARTITION_SIZE)
|
||||
bin_path = output_dir / f"nvs_{cfg.name}.bin"
|
||||
bin_path.write_bytes(nvs_bin)
|
||||
print(f" [{cfg.name}] {len(nvs_bin)} bytes -> {bin_path}")
|
||||
success += 1
|
||||
except Exception as e:
|
||||
print(f" [{cfg.name}] ERROR: {e}", file=sys.stderr)
|
||||
errors += 1
|
||||
|
||||
print()
|
||||
print(f"Done: {success} succeeded, {errors} failed")
|
||||
|
||||
if errors > 0:
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
258
scripts/inject_fault.py
Executable file
258
scripts/inject_fault.py
Executable file
|
|
@ -0,0 +1,258 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
QEMU Fault Injector — ADR-061 Layer 9
|
||||
|
||||
Connects to a QEMU monitor socket and injects a specified fault type.
|
||||
Used by qemu-chaos-test.sh to stress-test firmware resilience.
|
||||
|
||||
Supported faults:
|
||||
wifi_kill - Pause/resume VM (simulates WiFi reconnect)
|
||||
ring_flood - Send 1000 rapid commands to stress ring buffer
|
||||
heap_exhaust - Write to heap metadata region to simulate OOM
|
||||
timer_starvation - Pause VM for 500ms to starve FreeRTOS timers
|
||||
corrupt_frame - Write bad magic bytes to CSI frame buffer area
|
||||
nvs_corrupt - Write garbage to NVS flash region (offset 0x9000)
|
||||
|
||||
Usage:
|
||||
python3 inject_fault.py --socket /path/to/qemu.sock --fault wifi_kill
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import random
|
||||
import socket
|
||||
import sys
|
||||
import time
|
||||
|
||||
|
||||
# Timeout for each monitor command (seconds)
|
||||
CMD_TIMEOUT = 5.0
|
||||
|
||||
# QEMU monitor response buffer size
|
||||
RECV_BUFSIZE = 4096
|
||||
|
||||
|
||||
def connect_monitor(sock_path: str, timeout: float = CMD_TIMEOUT) -> socket.socket:
|
||||
"""Connect to the QEMU monitor Unix domain socket."""
|
||||
s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
||||
s.settimeout(timeout)
|
||||
try:
|
||||
s.connect(sock_path)
|
||||
except (socket.error, FileNotFoundError) as e:
|
||||
print(f"ERROR: Cannot connect to QEMU monitor at {sock_path}: {e}",
|
||||
file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
# Read the initial QEMU monitor banner/prompt
|
||||
try:
|
||||
banner = s.recv(RECV_BUFSIZE).decode("utf-8", errors="replace")
|
||||
if banner:
|
||||
pass # Consume silently
|
||||
else:
|
||||
print(f"WARNING: Connected to {sock_path} but received no banner data. "
|
||||
f"QEMU monitor may not be ready.", file=sys.stderr)
|
||||
except socket.timeout:
|
||||
print(f"WARNING: Connected to {sock_path} but timed out waiting for banner "
|
||||
f"after {timeout}s. QEMU monitor may be unresponsive.", file=sys.stderr)
|
||||
|
||||
return s
|
||||
|
||||
|
||||
def send_cmd(s: socket.socket, cmd: str, timeout: float = CMD_TIMEOUT) -> str:
|
||||
"""Send a command to the QEMU monitor and return the response."""
|
||||
s.settimeout(timeout)
|
||||
try:
|
||||
s.sendall((cmd + "\n").encode("utf-8"))
|
||||
except (BrokenPipeError, ConnectionResetError) as e:
|
||||
print(f"ERROR: Lost connection to QEMU monitor: {e}", file=sys.stderr)
|
||||
return ""
|
||||
|
||||
# Read response (may be multi-line)
|
||||
response = ""
|
||||
try:
|
||||
while True:
|
||||
chunk = s.recv(RECV_BUFSIZE).decode("utf-8", errors="replace")
|
||||
if not chunk:
|
||||
break
|
||||
response += chunk
|
||||
# QEMU monitor prompt ends with "(qemu) "
|
||||
if "(qemu)" in chunk:
|
||||
break
|
||||
except socket.timeout:
|
||||
pass # Response may not have a clean prompt
|
||||
|
||||
return response
|
||||
|
||||
|
||||
def fault_wifi_kill(s: socket.socket) -> None:
|
||||
"""Pause VM for 2s then resume — simulates WiFi disconnect/reconnect."""
|
||||
print("[wifi_kill] Pausing VM...")
|
||||
send_cmd(s, "stop")
|
||||
time.sleep(2.0)
|
||||
print("[wifi_kill] Resuming VM...")
|
||||
send_cmd(s, "cont")
|
||||
print("[wifi_kill] Injected: 2s pause/resume cycle")
|
||||
|
||||
|
||||
def fault_ring_flood(s: socket.socket) -> None:
|
||||
"""Send 1000 rapid NMI injections to stress the ring buffer.
|
||||
|
||||
On real hardware, scenario 7 is a high-rate CSI burst. Under QEMU
|
||||
we simulate this by rapidly triggering NMIs which the mock CSI
|
||||
handler processes as frame events.
|
||||
"""
|
||||
print("[ring_flood] Sending 1000 rapid commands...")
|
||||
sent = 0
|
||||
for i in range(1000):
|
||||
try:
|
||||
# Use 'nmi' to trigger interrupt handler (mock CSI frame path)
|
||||
s.sendall(b"nmi\n")
|
||||
sent += 1
|
||||
except (BrokenPipeError, ConnectionResetError):
|
||||
print(f"[ring_flood] Connection lost after {sent} commands")
|
||||
break
|
||||
|
||||
# Drain any accumulated responses
|
||||
s.settimeout(1.0)
|
||||
try:
|
||||
while True:
|
||||
chunk = s.recv(RECV_BUFSIZE)
|
||||
if not chunk:
|
||||
break
|
||||
except socket.timeout:
|
||||
pass
|
||||
|
||||
print(f"[ring_flood] Injected: {sent}/1000 rapid NMI triggers")
|
||||
|
||||
|
||||
def fault_heap_exhaust(s: socket.socket, flash_path: str = None) -> None:
|
||||
"""Simulate memory pressure by pausing VM to trigger watchdog/heap checks.
|
||||
|
||||
Actual heap memory writes require a GDB stub (-gdb tcp::1234).
|
||||
This function probes the heap region and pauses the VM to stress
|
||||
heap management as a realistic simulation.
|
||||
"""
|
||||
heap_base = 0x3FC88000
|
||||
print("[heap_exhaust] Probing heap region...")
|
||||
resp = send_cmd(s, f"xp /4xw 0x{heap_base:08x}")
|
||||
print(f"[heap_exhaust] Heap header: {resp.strip()}")
|
||||
# Pause VM to stress memory management
|
||||
print("[heap_exhaust] Pausing VM for 3s to stress heap management...")
|
||||
send_cmd(s, "stop")
|
||||
time.sleep(3.0)
|
||||
send_cmd(s, "cont")
|
||||
print("[heap_exhaust] WARNING: Actual heap corruption requires GDB stub (-gdb tcp::1234)")
|
||||
print("[heap_exhaust] Injected: 3s VM pause (simulates memory pressure)")
|
||||
|
||||
|
||||
def fault_timer_starvation(s: socket.socket) -> None:
|
||||
"""Pause VM for 500ms — starves FreeRTOS tick and timer callbacks."""
|
||||
print("[timer_starvation] Pausing VM for 500ms...")
|
||||
send_cmd(s, "stop")
|
||||
time.sleep(0.5)
|
||||
send_cmd(s, "cont")
|
||||
print("[timer_starvation] Injected: 500ms execution pause")
|
||||
|
||||
|
||||
def fault_corrupt_frame(s: socket.socket, flash_path: str = None) -> None:
|
||||
"""Simulate CSI frame corruption by pausing VM during frame processing.
|
||||
|
||||
Actual memory writes to the frame buffer require a GDB stub
|
||||
(-gdb tcp::1234). This function probes the frame buffer region
|
||||
and pauses the VM mid-frame to simulate corruption effects.
|
||||
"""
|
||||
frame_buf_addr = 0x3FCA0000
|
||||
print(f"[corrupt_frame] Probing frame buffer at 0x{frame_buf_addr:08X}...")
|
||||
resp = send_cmd(s, f"xp /4xb 0x{frame_buf_addr:08x}")
|
||||
print(f"[corrupt_frame] Frame buffer: {resp.strip()}")
|
||||
# Pause VM briefly to disrupt frame processing timing
|
||||
print("[corrupt_frame] Pausing VM for 1s to disrupt frame processing...")
|
||||
send_cmd(s, "stop")
|
||||
time.sleep(1.0)
|
||||
send_cmd(s, "cont")
|
||||
print("[corrupt_frame] WARNING: Actual frame corruption requires GDB stub (-gdb tcp::1234)")
|
||||
print(f"[corrupt_frame] Injected: 1s VM pause during frame processing")
|
||||
|
||||
|
||||
def fault_nvs_corrupt(s: socket.socket, flash_path: str = None) -> None:
|
||||
"""Write garbage to the NVS flash region on disk.
|
||||
|
||||
When a flash image path is provided, writes random bytes directly
|
||||
to the NVS partition offset (0x9000) in the flash image file.
|
||||
Without a flash path, falls back to a read-only probe via monitor.
|
||||
"""
|
||||
if flash_path and os.path.isfile(flash_path):
|
||||
nvs_offset = 0x9000
|
||||
garbage = bytes(random.randint(0, 255) for _ in range(16))
|
||||
with open(flash_path, "r+b") as f:
|
||||
f.seek(nvs_offset)
|
||||
f.write(garbage)
|
||||
print(f"[nvs_corrupt] Wrote 16 garbage bytes at flash offset 0x{nvs_offset:X}")
|
||||
print(f"[nvs_corrupt] Flash image: {flash_path}")
|
||||
else:
|
||||
# Fallback: attempt via monitor (read-only probe)
|
||||
resp = send_cmd(s, f"xp /8xb 0x3C009000")
|
||||
print(f"[nvs_corrupt] NVS region (read-only probe): {resp.strip()}")
|
||||
print(f"[nvs_corrupt] WARNING: No --flash path provided; NVS corruption was NOT injected")
|
||||
print(f"[nvs_corrupt] Pass --flash /path/to/flash.bin for actual corruption")
|
||||
|
||||
|
||||
# Map fault names to injection functions
|
||||
FAULT_MAP = {
|
||||
"wifi_kill": fault_wifi_kill,
|
||||
"ring_flood": fault_ring_flood,
|
||||
"heap_exhaust": fault_heap_exhaust,
|
||||
"timer_starvation": fault_timer_starvation,
|
||||
"corrupt_frame": fault_corrupt_frame,
|
||||
"nvs_corrupt": fault_nvs_corrupt,
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="QEMU Fault Injector — ADR-061 Layer 9",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog=__doc__,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--socket", required=True,
|
||||
help="Path to QEMU monitor Unix domain socket",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--fault", required=True, choices=list(FAULT_MAP.keys()),
|
||||
help="Fault type to inject",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--timeout", type=float, default=CMD_TIMEOUT,
|
||||
help=f"Per-command timeout in seconds (default: {CMD_TIMEOUT})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--flash", default=None,
|
||||
help="Path to flash image (for nvs_corrupt direct file writes)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f"[inject_fault] Connecting to {args.socket}...")
|
||||
s = connect_monitor(args.socket, timeout=args.timeout)
|
||||
|
||||
print(f"[inject_fault] Injecting fault: {args.fault}")
|
||||
try:
|
||||
fault_fn = FAULT_MAP[args.fault]
|
||||
# Pass flash_path to faults that accept it
|
||||
import inspect
|
||||
sig = inspect.signature(fault_fn)
|
||||
if "flash_path" in sig.parameters:
|
||||
fault_fn(s, flash_path=args.flash)
|
||||
else:
|
||||
fault_fn(s)
|
||||
except Exception as e:
|
||||
print(f"ERROR: Fault injection failed: {e}", file=sys.stderr)
|
||||
s.close()
|
||||
sys.exit(1)
|
||||
|
||||
s.close()
|
||||
print(f"[inject_fault] Complete: {args.fault}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
337
scripts/install-qemu.sh
Normal file
337
scripts/install-qemu.sh
Normal file
|
|
@ -0,0 +1,337 @@
|
|||
#!/bin/bash
|
||||
# install-qemu.sh — Install QEMU with ESP32-S3 support (Espressif fork)
|
||||
# Usage: bash scripts/install-qemu.sh [OPTIONS]
|
||||
set -euo pipefail
|
||||
|
||||
# ── Colors ────────────────────────────────────────────────────────────────────
|
||||
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'; CYAN='\033[0;36m'; BOLD='\033[1m'; NC='\033[0m'
|
||||
|
||||
info() { echo -e "${BLUE}[INFO]${NC} $*"; }
|
||||
ok() { echo -e "${GREEN}[OK]${NC} $*"; }
|
||||
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
|
||||
err() { echo -e "${RED}[ERROR]${NC} $*"; }
|
||||
step() { echo -e "\n${CYAN}${BOLD}▶ $*${NC}"; }
|
||||
|
||||
# ── Defaults ──────────────────────────────────────────────────────────────────
|
||||
INSTALL_DIR="$HOME/.espressif/qemu"
|
||||
BRANCH="esp-develop"
|
||||
JOBS=""
|
||||
SKIP_DEPS=false
|
||||
UNINSTALL=false
|
||||
CHECK_ONLY=false
|
||||
QEMU_REPO="https://github.com/espressif/qemu.git"
|
||||
|
||||
# ── Usage ─────────────────────────────────────────────────────────────────────
|
||||
usage() {
|
||||
cat <<EOF
|
||||
${BOLD}install-qemu.sh${NC} — Install QEMU with ESP32-S3 support (Espressif fork)
|
||||
|
||||
${BOLD}USAGE${NC}
|
||||
bash scripts/install-qemu.sh [OPTIONS]
|
||||
|
||||
${BOLD}OPTIONS${NC}
|
||||
--install-dir DIR Installation directory (default: ~/.espressif/qemu)
|
||||
--branch TAG QEMU branch or tag to build (default: esp-develop)
|
||||
--jobs N Parallel build jobs (default: nproc)
|
||||
--skip-deps Skip system dependency installation
|
||||
--uninstall Remove QEMU installation
|
||||
--check Verify existing installation and exit
|
||||
-h, --help Show this help
|
||||
|
||||
${BOLD}EXIT CODES${NC}
|
||||
0 Success
|
||||
1 Dependency installation failed
|
||||
2 Build failed
|
||||
3 Unsupported OS
|
||||
|
||||
${BOLD}EXAMPLES${NC}
|
||||
bash scripts/install-qemu.sh
|
||||
bash scripts/install-qemu.sh --install-dir /opt/qemu-esp --jobs 8
|
||||
bash scripts/install-qemu.sh --check
|
||||
bash scripts/install-qemu.sh --uninstall
|
||||
EOF
|
||||
}
|
||||
|
||||
# ── Parse args ────────────────────────────────────────────────────────────────
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--install-dir) INSTALL_DIR="$2"; shift 2 ;;
|
||||
--branch) BRANCH="$2"; shift 2 ;;
|
||||
--jobs) JOBS="$2"; shift 2 ;;
|
||||
--skip-deps) SKIP_DEPS=true; shift ;;
|
||||
--uninstall) UNINSTALL=true; shift ;;
|
||||
--check) CHECK_ONLY=true; shift ;;
|
||||
-h|--help) usage; exit 0 ;;
|
||||
*) err "Unknown option: $1"; usage; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# ── OS detection ──────────────────────────────────────────────────────────────
|
||||
detect_os() {
|
||||
OS="unknown"
|
||||
DISTRO="unknown"
|
||||
IS_WSL=false
|
||||
|
||||
case "$(uname -s)" in
|
||||
Linux)
|
||||
OS="linux"
|
||||
if grep -qi microsoft /proc/version 2>/dev/null; then
|
||||
IS_WSL=true
|
||||
fi
|
||||
if [ -f /etc/os-release ]; then
|
||||
# shellcheck disable=SC1091
|
||||
. /etc/os-release
|
||||
case "$ID" in
|
||||
ubuntu|debian|pop|linuxmint|elementary) DISTRO="debian" ;;
|
||||
fedora|rhel|centos|rocky|alma) DISTRO="fedora" ;;
|
||||
arch|manjaro|endeavouros) DISTRO="arch" ;;
|
||||
opensuse*|sles) DISTRO="suse" ;;
|
||||
*) DISTRO="$ID" ;;
|
||||
esac
|
||||
fi
|
||||
;;
|
||||
Darwin) OS="macos"; DISTRO="macos" ;;
|
||||
MINGW*|MSYS*)
|
||||
err "Native Windows/MINGW detected."
|
||||
err "QEMU ESP32-S3 must be built on Linux or macOS."
|
||||
err "Options:"
|
||||
err " 1. Use WSL: wsl bash scripts/install-qemu.sh"
|
||||
err " 2. Use Docker: docker run -it ubuntu:22.04 bash"
|
||||
err " 3. Download pre-built: https://github.com/espressif/qemu/releases"
|
||||
exit 3
|
||||
;;
|
||||
*) err "Unsupported OS: $(uname -s)"; exit 3 ;;
|
||||
esac
|
||||
|
||||
info "Detected: OS=${OS} Distro=${DISTRO} WSL=${IS_WSL}"
|
||||
}
|
||||
|
||||
# ── Check existing installation ───────────────────────────────────────────────
|
||||
check_installation() {
|
||||
local qemu_bin="$INSTALL_DIR/build/qemu-system-xtensa"
|
||||
if [ -x "$qemu_bin" ]; then
|
||||
local version
|
||||
version=$("$qemu_bin" --version 2>/dev/null | head -1) || true
|
||||
if [ -n "$version" ]; then
|
||||
ok "QEMU installed: $version"
|
||||
ok "Binary: $qemu_bin"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
# Check PATH
|
||||
if command -v qemu-system-xtensa &>/dev/null; then
|
||||
local version
|
||||
version=$(qemu-system-xtensa --version 2>/dev/null | head -1) || true
|
||||
ok "QEMU found in PATH: $version"
|
||||
return 0
|
||||
fi
|
||||
warn "QEMU with ESP32-S3 support not found"
|
||||
return 1
|
||||
}
|
||||
|
||||
if $CHECK_ONLY; then
|
||||
detect_os
|
||||
if check_installation; then exit 0; else exit 1; fi
|
||||
fi
|
||||
|
||||
# ── Uninstall ─────────────────────────────────────────────────────────────────
|
||||
if $UNINSTALL; then
|
||||
step "Uninstalling QEMU from $INSTALL_DIR"
|
||||
if [ -d "$INSTALL_DIR" ]; then
|
||||
rm -rf "$INSTALL_DIR"
|
||||
ok "Removed $INSTALL_DIR"
|
||||
else
|
||||
warn "Directory not found: $INSTALL_DIR"
|
||||
fi
|
||||
# Remove symlink
|
||||
local_bin="$HOME/.local/bin/qemu-system-xtensa"
|
||||
if [ -L "$local_bin" ]; then
|
||||
rm -f "$local_bin"
|
||||
ok "Removed symlink $local_bin"
|
||||
fi
|
||||
ok "Uninstall complete"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# ── Main install flow ─────────────────────────────────────────────────────────
|
||||
detect_os
|
||||
|
||||
# Default jobs = nproc
|
||||
if [ -z "$JOBS" ]; then
|
||||
if command -v nproc &>/dev/null; then
|
||||
JOBS=$(nproc)
|
||||
elif command -v sysctl &>/dev/null; then
|
||||
JOBS=$(sysctl -n hw.ncpu 2>/dev/null || echo 4)
|
||||
else
|
||||
JOBS=4
|
||||
fi
|
||||
fi
|
||||
info "Build parallelism: $JOBS jobs"
|
||||
|
||||
# ── Step 1: Install dependencies ──────────────────────────────────────────────
|
||||
install_deps() {
|
||||
step "Installing build dependencies"
|
||||
|
||||
case "$DISTRO" in
|
||||
debian)
|
||||
info "Using apt (Debian/Ubuntu)"
|
||||
sudo apt-get update -qq
|
||||
sudo apt-get install -y -qq \
|
||||
git build-essential python3 python3-pip python3-venv \
|
||||
ninja-build pkg-config libglib2.0-dev libpixman-1-dev \
|
||||
libslirp-dev libgcrypt-dev
|
||||
;;
|
||||
fedora)
|
||||
info "Using dnf (Fedora/RHEL)"
|
||||
sudo dnf install -y \
|
||||
git gcc gcc-c++ make python3 python3-pip \
|
||||
ninja-build pkgconfig glib2-devel pixman-devel \
|
||||
libslirp-devel libgcrypt-devel
|
||||
;;
|
||||
arch)
|
||||
info "Using pacman (Arch)"
|
||||
sudo pacman -S --needed --noconfirm \
|
||||
git base-devel python python-pip \
|
||||
ninja pkgconf glib2 pixman libslirp libgcrypt
|
||||
;;
|
||||
suse)
|
||||
info "Using zypper (openSUSE)"
|
||||
sudo zypper install -y \
|
||||
git gcc gcc-c++ make python3 python3-pip \
|
||||
ninja pkg-config glib2-devel libpixman-1-0-devel \
|
||||
libslirp-devel libgcrypt-devel
|
||||
;;
|
||||
macos)
|
||||
info "Using Homebrew"
|
||||
if ! command -v brew &>/dev/null; then
|
||||
err "Homebrew not found. Install from https://brew.sh"
|
||||
exit 1
|
||||
fi
|
||||
brew install glib pixman ninja pkg-config libslirp libgcrypt || true
|
||||
;;
|
||||
*)
|
||||
warn "Unknown distro '$DISTRO' — install these manually:"
|
||||
warn " git, gcc/g++, python3, ninja, pkg-config, glib2-dev, pixman-dev, libslirp-dev"
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
ok "Dependencies installed"
|
||||
}
|
||||
|
||||
if ! $SKIP_DEPS; then
|
||||
install_deps || { err "Dependency installation failed"; exit 1; }
|
||||
else
|
||||
info "Skipping dependency installation (--skip-deps)"
|
||||
fi
|
||||
|
||||
# ── Step 2: Clone Espressif QEMU fork ─────────────────────────────────────────
|
||||
step "Cloning Espressif QEMU fork"
|
||||
|
||||
SRC_DIR="$INSTALL_DIR"
|
||||
if [ -d "$SRC_DIR/.git" ]; then
|
||||
info "Repository already exists at $SRC_DIR"
|
||||
info "Fetching latest changes on branch $BRANCH"
|
||||
git -C "$SRC_DIR" fetch origin "$BRANCH" --depth=1
|
||||
git -C "$SRC_DIR" checkout "$BRANCH" 2>/dev/null || git -C "$SRC_DIR" checkout "origin/$BRANCH"
|
||||
ok "Updated to latest $BRANCH"
|
||||
else
|
||||
info "Cloning $QEMU_REPO (branch: $BRANCH)"
|
||||
mkdir -p "$(dirname "$SRC_DIR")"
|
||||
git clone --depth=1 --branch "$BRANCH" "$QEMU_REPO" "$SRC_DIR"
|
||||
ok "Cloned to $SRC_DIR"
|
||||
fi
|
||||
|
||||
# ── Step 3: Configure and build ───────────────────────────────────────────────
|
||||
step "Configuring QEMU (target: xtensa-softmmu)"
|
||||
|
||||
BUILD_DIR="$SRC_DIR/build"
|
||||
mkdir -p "$BUILD_DIR"
|
||||
cd "$SRC_DIR"
|
||||
|
||||
./configure \
|
||||
--target-list=xtensa-softmmu \
|
||||
--enable-slirp \
|
||||
--enable-gcrypt \
|
||||
--prefix="$INSTALL_DIR/dist" \
|
||||
2>&1 | tail -5
|
||||
|
||||
step "Building QEMU ($JOBS parallel jobs)"
|
||||
make -j"$JOBS" -C "$BUILD_DIR" 2>&1 | tail -20
|
||||
|
||||
if [ ! -x "$BUILD_DIR/qemu-system-xtensa" ]; then
|
||||
err "Build failed — qemu-system-xtensa binary not found"
|
||||
err "Troubleshooting:"
|
||||
err " 1. Check build output above for errors"
|
||||
err " 2. Ensure all dependencies are installed: re-run without --skip-deps"
|
||||
err " 3. Try with fewer jobs: --jobs 1"
|
||||
err " 4. On macOS, ensure Xcode CLT: xcode-select --install"
|
||||
exit 2
|
||||
fi
|
||||
ok "Build succeeded: $BUILD_DIR/qemu-system-xtensa"
|
||||
|
||||
# ── Step 4: Create symlink / add to PATH ──────────────────────────────────────
|
||||
step "Setting up PATH access"
|
||||
|
||||
LOCAL_BIN="$HOME/.local/bin"
|
||||
mkdir -p "$LOCAL_BIN"
|
||||
ln -sf "$BUILD_DIR/qemu-system-xtensa" "$LOCAL_BIN/qemu-system-xtensa"
|
||||
ok "Symlinked to $LOCAL_BIN/qemu-system-xtensa"
|
||||
|
||||
# Check if ~/.local/bin is in PATH
|
||||
if ! echo "$PATH" | tr ':' '\n' | grep -qx "$LOCAL_BIN"; then
|
||||
warn "$LOCAL_BIN is not in your PATH"
|
||||
warn "Add this to your shell profile (~/.bashrc or ~/.zshrc):"
|
||||
echo -e " ${BOLD}export PATH=\"\$HOME/.local/bin:\$PATH\"${NC}"
|
||||
fi
|
||||
|
||||
# ── Step 5: Verify ────────────────────────────────────────────────────────────
|
||||
step "Verifying installation"
|
||||
|
||||
QEMU_VERSION=$("$BUILD_DIR/qemu-system-xtensa" --version | head -1)
|
||||
ok "$QEMU_VERSION"
|
||||
|
||||
# Check ESP32-S3 machine support
|
||||
if "$BUILD_DIR/qemu-system-xtensa" -machine help 2>/dev/null | grep -q esp32s3; then
|
||||
ok "ESP32-S3 machine type available"
|
||||
else
|
||||
warn "ESP32-S3 machine type not listed (may still work with newer builds)"
|
||||
fi
|
||||
|
||||
# ── Step 6: Install Python packages ──────────────────────────────────────────
|
||||
step "Installing Python packages (esptool, pyyaml, nvs-partition-gen)"
|
||||
|
||||
PIP_CMD="pip3"
|
||||
if ! command -v pip3 &>/dev/null; then
|
||||
PIP_CMD="python3 -m pip"
|
||||
fi
|
||||
|
||||
$PIP_CMD install --user --quiet \
|
||||
esptool \
|
||||
pyyaml \
|
||||
esp-idf-nvs-partition-gen \
|
||||
2>&1 || warn "Some Python packages failed to install (non-fatal)"
|
||||
|
||||
ok "Python packages installed"
|
||||
|
||||
# ── Done ──────────────────────────────────────────────────────────────────────
|
||||
echo ""
|
||||
echo -e "${GREEN}${BOLD}Installation complete!${NC}"
|
||||
echo ""
|
||||
echo -e "${BOLD}Next steps:${NC}"
|
||||
echo ""
|
||||
echo " 1. Run a smoke test:"
|
||||
echo -e " ${CYAN}qemu-system-xtensa -nographic -machine esp32s3 \\${NC}"
|
||||
echo -e " ${CYAN} -drive file=firmware.bin,if=mtd,format=raw \\${NC}"
|
||||
echo -e " ${CYAN} -serial mon:stdio${NC}"
|
||||
echo ""
|
||||
echo " 2. Run the project QEMU tests:"
|
||||
echo -e " ${CYAN}cd $(dirname "$0")/.."
|
||||
echo -e " pytest firmware/esp32-csi-node/tests/qemu/ -v${NC}"
|
||||
echo ""
|
||||
echo " 3. Binary location:"
|
||||
echo -e " ${CYAN}$BUILD_DIR/qemu-system-xtensa${NC}"
|
||||
echo ""
|
||||
echo -e " 4. Uninstall:"
|
||||
echo -e " ${CYAN}bash scripts/install-qemu.sh --uninstall${NC}"
|
||||
echo ""
|
||||
397
scripts/qemu-chaos-test.sh
Executable file
397
scripts/qemu-chaos-test.sh
Executable file
|
|
@ -0,0 +1,397 @@
|
|||
#!/bin/bash
|
||||
# QEMU Chaos / Fault Injection Test Runner — ADR-061 Layer 9
|
||||
#
|
||||
# Launches firmware under QEMU and injects a series of faults to verify
|
||||
# the firmware's resilience. Each fault is injected via the QEMU monitor
|
||||
# socket (or GDB stub), followed by a recovery window and health check.
|
||||
#
|
||||
# Fault types:
|
||||
# 1. wifi_kill — Pause/resume VM to simulate WiFi reconnect
|
||||
# 2. ring_flood — Inject 1000 rapid mock frames (ring buffer stress)
|
||||
# 3. heap_exhaust — Write to heap metadata to simulate low memory
|
||||
# 4. timer_starvation — Pause VM for 500ms to starve FreeRTOS timers
|
||||
# 5. corrupt_frame — Inject a CSI frame with bad magic bytes
|
||||
# 6. nvs_corrupt — Write garbage to NVS flash region
|
||||
#
|
||||
# Environment variables:
|
||||
# QEMU_PATH - Path to qemu-system-xtensa (default: qemu-system-xtensa)
|
||||
# QEMU_TIMEOUT - Boot timeout in seconds (default: 15)
|
||||
# FLASH_IMAGE - Path to merged flash image (default: build/qemu_flash.bin)
|
||||
# FAULT_WAIT - Seconds to wait after fault injection (default: 5)
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 PASS — all checks passed
|
||||
# 1 WARN — non-critical checks failed
|
||||
# 2 FAIL — critical checks failed
|
||||
# 3 FATAL — build error, crash, or infrastructure failure
|
||||
|
||||
# ── Help ──────────────────────────────────────────────────────────────
|
||||
usage() {
|
||||
cat <<'HELP'
|
||||
Usage: qemu-chaos-test.sh [OPTIONS]
|
||||
|
||||
Launch firmware under QEMU and inject a series of faults to verify the
|
||||
firmware's resilience. Each fault is injected via the QEMU monitor socket,
|
||||
followed by a recovery window and health check.
|
||||
|
||||
Fault types:
|
||||
wifi_kill Pause/resume VM to simulate WiFi reconnect
|
||||
ring_flood Inject 1000 rapid mock frames (ring buffer stress)
|
||||
heap_exhaust Write to heap metadata to simulate low memory
|
||||
timer_starvation Pause VM for 500ms to starve FreeRTOS timers
|
||||
corrupt_frame Inject a CSI frame with bad magic bytes
|
||||
nvs_corrupt Write garbage to NVS flash region
|
||||
|
||||
Options:
|
||||
-h, --help Show this help message and exit
|
||||
|
||||
Environment variables:
|
||||
QEMU_PATH Path to qemu-system-xtensa (default: qemu-system-xtensa)
|
||||
QEMU_TIMEOUT Boot timeout in seconds (default: 15)
|
||||
FLASH_IMAGE Path to merged flash image (default: build/qemu_flash.bin)
|
||||
FAULT_WAIT Seconds to wait after injection (default: 5)
|
||||
|
||||
Examples:
|
||||
./qemu-chaos-test.sh
|
||||
QEMU_TIMEOUT=30 FAULT_WAIT=10 ./qemu-chaos-test.sh
|
||||
FLASH_IMAGE=/path/to/image.bin ./qemu-chaos-test.sh
|
||||
|
||||
Exit codes:
|
||||
0 PASS — all checks passed
|
||||
1 WARN — non-critical checks failed
|
||||
2 FAIL — critical checks failed
|
||||
3 FATAL — build error, crash, or infrastructure failure
|
||||
HELP
|
||||
exit 0
|
||||
}
|
||||
|
||||
case "${1:-}" in -h|--help) usage ;; esac
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
|
||||
FIRMWARE_DIR="$PROJECT_ROOT/firmware/esp32-csi-node"
|
||||
BUILD_DIR="$FIRMWARE_DIR/build"
|
||||
QEMU_BIN="${QEMU_PATH:-qemu-system-xtensa}"
|
||||
FLASH_IMAGE="${FLASH_IMAGE:-$BUILD_DIR/qemu_flash.bin}"
|
||||
BOOT_TIMEOUT="${QEMU_TIMEOUT:-15}"
|
||||
FAULT_WAIT="${FAULT_WAIT:-5}"
|
||||
MONITOR_SOCK="$BUILD_DIR/qemu-chaos.sock"
|
||||
LOG_DIR="$BUILD_DIR/chaos-tests"
|
||||
UART_LOG="$LOG_DIR/qemu_uart.log"
|
||||
QEMU_PID=""
|
||||
|
||||
# Fault definitions
|
||||
FAULTS=("wifi_kill" "ring_flood" "heap_exhaust" "timer_starvation" "corrupt_frame" "nvs_corrupt")
|
||||
declare -a FAULT_RESULTS=()
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Cleanup
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
cleanup() {
|
||||
echo ""
|
||||
echo "[cleanup] Shutting down QEMU and removing socket..."
|
||||
if [ -n "$QEMU_PID" ] && kill -0 "$QEMU_PID" 2>/dev/null; then
|
||||
kill "$QEMU_PID" 2>/dev/null || true
|
||||
wait "$QEMU_PID" 2>/dev/null || true
|
||||
fi
|
||||
rm -f "$MONITOR_SOCK"
|
||||
echo "[cleanup] Done."
|
||||
}
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Helpers
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
monitor_cmd() {
|
||||
local cmd="$1"
|
||||
local timeout="${2:-5}"
|
||||
echo "$cmd" | socat - "UNIX-CONNECT:$MONITOR_SOCK,connect-timeout=$timeout" 2>/dev/null
|
||||
}
|
||||
|
||||
log_line_count() {
|
||||
wc -l < "$UART_LOG" 2>/dev/null || echo 0
|
||||
}
|
||||
|
||||
wait_for_boot() {
|
||||
local elapsed=0
|
||||
while [ "$elapsed" -lt "$BOOT_TIMEOUT" ]; do
|
||||
if [ -f "$UART_LOG" ] && grep -qE "app_main|main_task|ESP32-S3|mock_csi" "$UART_LOG" 2>/dev/null; then
|
||||
return 0
|
||||
fi
|
||||
sleep 1
|
||||
elapsed=$((elapsed + 1))
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Fault injection functions
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
inject_wifi_kill() {
|
||||
# Simulate WiFi disconnect/reconnect by pausing and resuming the VM.
|
||||
# The firmware should handle the time gap gracefully.
|
||||
echo " [inject] Pausing VM for 2s (simulating WiFi disconnect)..."
|
||||
monitor_cmd "stop"
|
||||
sleep 2
|
||||
echo " [inject] Resuming VM (simulating WiFi reconnect)..."
|
||||
monitor_cmd "cont"
|
||||
}
|
||||
|
||||
inject_ring_flood() {
|
||||
# Send 1000 rapid mock frames by triggering scenario 7 repeatedly.
|
||||
# This stresses the ring buffer and tests backpressure handling.
|
||||
echo " [inject] Flooding ring buffer with 1000 rapid frame triggers..."
|
||||
python3 "$SCRIPT_DIR/inject_fault.py" \
|
||||
--socket "$MONITOR_SOCK" \
|
||||
--fault ring_flood
|
||||
}
|
||||
|
||||
inject_heap_exhaust() {
|
||||
# Simulate memory pressure by pausing the VM to stress heap management.
|
||||
# Actual heap memory writes require GDB stub.
|
||||
echo " [inject] Simulating heap pressure via VM pause..."
|
||||
python3 "$SCRIPT_DIR/inject_fault.py" \
|
||||
--socket "$MONITOR_SOCK" \
|
||||
--fault heap_exhaust
|
||||
}
|
||||
|
||||
inject_timer_starvation() {
|
||||
# Pause execution for 500ms to starve FreeRTOS timer callbacks.
|
||||
# Tests watchdog recovery and timer resilience.
|
||||
echo " [inject] Starving timers (500ms pause)..."
|
||||
monitor_cmd "stop"
|
||||
sleep 0.5
|
||||
monitor_cmd "cont"
|
||||
}
|
||||
|
||||
inject_corrupt_frame() {
|
||||
# Inject a CSI frame with bad magic bytes via monitor memory write.
|
||||
# The frame parser should reject it without crashing.
|
||||
echo " [inject] Injecting corrupt CSI frame (bad magic)..."
|
||||
python3 "$SCRIPT_DIR/inject_fault.py" \
|
||||
--socket "$MONITOR_SOCK" \
|
||||
--fault corrupt_frame
|
||||
}
|
||||
|
||||
inject_nvs_corrupt() {
|
||||
# Write garbage to the NVS flash region (offset 0x9000) via direct file write.
|
||||
# The firmware should detect NVS corruption and fall back to defaults.
|
||||
echo " [inject] Corrupting NVS flash region..."
|
||||
python3 "$SCRIPT_DIR/inject_fault.py" \
|
||||
--socket "$MONITOR_SOCK" \
|
||||
--fault nvs_corrupt \
|
||||
--flash "$FLASH_IMAGE"
|
||||
}
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Pre-flight checks
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
echo "=== QEMU Chaos Test Runner — ADR-061 Layer 9 ==="
|
||||
echo "QEMU binary: $QEMU_BIN"
|
||||
echo "Flash image: $FLASH_IMAGE"
|
||||
echo "Boot timeout: ${BOOT_TIMEOUT}s"
|
||||
echo "Fault wait: ${FAULT_WAIT}s"
|
||||
echo "Faults: ${FAULTS[*]}"
|
||||
echo ""
|
||||
|
||||
if ! command -v "$QEMU_BIN" &>/dev/null; then
|
||||
echo "ERROR: QEMU binary not found: $QEMU_BIN"
|
||||
echo " Install: sudo apt install qemu-system-misc # Debian/Ubuntu"
|
||||
echo " Install: brew install qemu # macOS"
|
||||
echo " Or set QEMU_PATH to the qemu-system-xtensa binary."
|
||||
exit 3
|
||||
fi
|
||||
|
||||
if ! command -v socat &>/dev/null; then
|
||||
echo "ERROR: socat not found (needed for QEMU monitor communication)."
|
||||
echo " Install: sudo apt install socat # Debian/Ubuntu"
|
||||
echo " Install: brew install socat # macOS"
|
||||
exit 3
|
||||
fi
|
||||
|
||||
if ! command -v python3 &>/dev/null; then
|
||||
echo "ERROR: python3 not found (needed for fault injection scripts)."
|
||||
echo " Install: sudo apt install python3 # Debian/Ubuntu"
|
||||
echo " Install: brew install python # macOS"
|
||||
exit 3
|
||||
fi
|
||||
|
||||
if [ ! -f "$FLASH_IMAGE" ]; then
|
||||
echo "ERROR: Flash image not found: $FLASH_IMAGE"
|
||||
echo "Run qemu-esp32s3-test.sh first to build the flash image."
|
||||
exit 3
|
||||
fi
|
||||
|
||||
mkdir -p "$LOG_DIR"
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Launch QEMU
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
echo "── Launching QEMU ──"
|
||||
echo ""
|
||||
|
||||
rm -f "$MONITOR_SOCK"
|
||||
> "$UART_LOG"
|
||||
|
||||
QEMU_ARGS=(
|
||||
-machine esp32s3
|
||||
-nographic
|
||||
-drive "file=$FLASH_IMAGE,if=mtd,format=raw"
|
||||
-serial "file:$UART_LOG"
|
||||
-no-reboot
|
||||
-monitor "unix:$MONITOR_SOCK,server,nowait"
|
||||
)
|
||||
|
||||
"$QEMU_BIN" "${QEMU_ARGS[@]}" &
|
||||
QEMU_PID=$!
|
||||
echo "[qemu] PID=$QEMU_PID"
|
||||
|
||||
# Wait for monitor socket
|
||||
waited=0
|
||||
while [ ! -S "$MONITOR_SOCK" ] && [ "$waited" -lt 10 ]; do
|
||||
sleep 1
|
||||
waited=$((waited + 1))
|
||||
done
|
||||
|
||||
if [ ! -S "$MONITOR_SOCK" ]; then
|
||||
echo "ERROR: QEMU monitor socket did not appear after 10s"
|
||||
exit 3
|
||||
fi
|
||||
|
||||
# Wait for boot
|
||||
echo "[boot] Waiting for firmware boot (up to ${BOOT_TIMEOUT}s)..."
|
||||
if wait_for_boot; then
|
||||
echo "[boot] Firmware booted successfully."
|
||||
else
|
||||
echo "[boot] No boot indicator found (continuing anyway)."
|
||||
fi
|
||||
|
||||
# Let firmware stabilize for a few seconds
|
||||
echo "[boot] Stabilizing (3s)..."
|
||||
sleep 3
|
||||
echo ""
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Fault injection loop
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
echo "── Fault Injection ──"
|
||||
echo ""
|
||||
|
||||
MAX_EXIT=0
|
||||
|
||||
for fault in "${FAULTS[@]}"; do
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo " Fault: $fault"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
|
||||
# Record log position before injection
|
||||
pre_lines=$(log_line_count)
|
||||
|
||||
# Check QEMU is still alive
|
||||
if ! kill -0 "$QEMU_PID" 2>/dev/null; then
|
||||
echo " ERROR: QEMU process died before fault injection"
|
||||
FAULT_RESULTS+=("${fault}:3")
|
||||
MAX_EXIT=3
|
||||
break
|
||||
fi
|
||||
|
||||
# Inject the fault
|
||||
case "$fault" in
|
||||
wifi_kill) inject_wifi_kill ;;
|
||||
ring_flood) inject_ring_flood ;;
|
||||
heap_exhaust) inject_heap_exhaust ;;
|
||||
timer_starvation) inject_timer_starvation ;;
|
||||
corrupt_frame) inject_corrupt_frame ;;
|
||||
nvs_corrupt) inject_nvs_corrupt ;;
|
||||
*)
|
||||
echo " ERROR: Unknown fault type: $fault"
|
||||
FAULT_RESULTS+=("${fault}:2")
|
||||
continue
|
||||
;;
|
||||
esac
|
||||
|
||||
# Wait for firmware to respond/recover
|
||||
echo " [recovery] Waiting ${FAULT_WAIT}s for recovery..."
|
||||
sleep "$FAULT_WAIT"
|
||||
|
||||
# Extract post-fault log segment
|
||||
post_lines=$(log_line_count)
|
||||
new_lines=$((post_lines - pre_lines))
|
||||
fault_log="$LOG_DIR/fault_${fault}.log"
|
||||
|
||||
if [ "$new_lines" -gt 0 ]; then
|
||||
tail -n "$new_lines" "$UART_LOG" > "$fault_log"
|
||||
else
|
||||
# Grab last 50 lines as context
|
||||
tail -n 50 "$UART_LOG" > "$fault_log"
|
||||
fi
|
||||
|
||||
echo " [check] Captured $new_lines new log lines"
|
||||
|
||||
# Health check
|
||||
fault_exit=0
|
||||
python3 "$SCRIPT_DIR/check_health.py" \
|
||||
--log "$fault_log" \
|
||||
--after-fault "$fault" || fault_exit=$?
|
||||
|
||||
case "$fault_exit" in
|
||||
0) echo " [result] HEALTHY — firmware recovered gracefully" ;;
|
||||
1) echo " [result] DEGRADED — firmware running but with issues" ;;
|
||||
*) echo " [result] UNHEALTHY — firmware in bad state" ;;
|
||||
esac
|
||||
|
||||
FAULT_RESULTS+=("${fault}:${fault_exit}")
|
||||
if [ "$fault_exit" -gt "$MAX_EXIT" ]; then
|
||||
MAX_EXIT=$fault_exit
|
||||
fi
|
||||
|
||||
echo ""
|
||||
done
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Summary
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
echo "── Chaos Test Results ──"
|
||||
echo ""
|
||||
|
||||
PASS=0
|
||||
DEGRADED=0
|
||||
FAIL=0
|
||||
|
||||
for result in "${FAULT_RESULTS[@]}"; do
|
||||
name="${result%%:*}"
|
||||
code="${result##*:}"
|
||||
case "$code" in
|
||||
0) echo " [PASS] $name"; PASS=$((PASS + 1)) ;;
|
||||
1) echo " [DEGRADED] $name"; DEGRADED=$((DEGRADED + 1)) ;;
|
||||
*) echo " [FAIL] $name"; FAIL=$((FAIL + 1)) ;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo " $PASS passed, $DEGRADED degraded, $FAIL failed out of ${#FAULTS[@]} faults"
|
||||
echo ""
|
||||
|
||||
# Check if QEMU survived all faults
|
||||
if kill -0 "$QEMU_PID" 2>/dev/null; then
|
||||
echo " QEMU process survived all fault injections."
|
||||
else
|
||||
echo " WARNING: QEMU process died during fault injection."
|
||||
if [ "$MAX_EXIT" -lt 3 ]; then
|
||||
MAX_EXIT=3
|
||||
fi
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=== Chaos Test Complete (exit code: $MAX_EXIT) ==="
|
||||
exit "$MAX_EXIT"
|
||||
362
scripts/qemu-cli.sh
Normal file
362
scripts/qemu-cli.sh
Normal file
|
|
@ -0,0 +1,362 @@
|
|||
#!/usr/bin/env bash
|
||||
# ============================================================================
|
||||
# qemu-cli.sh — Unified QEMU ESP32-S3 testing CLI (ADR-061)
|
||||
# Version: 1.0.0
|
||||
#
|
||||
# Single entry point for all QEMU testing operations.
|
||||
# Run `qemu-cli.sh help` or `qemu-cli.sh --help` for usage.
|
||||
# ============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
VERSION="1.0.0"
|
||||
|
||||
# --- Colors ----------------------------------------------------------------
|
||||
if [[ -t 1 ]]; then
|
||||
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'; CYAN='\033[0;36m'; BOLD='\033[1m'; RST='\033[0m'
|
||||
else
|
||||
RED=''; GREEN=''; YELLOW=''; BLUE=''; CYAN=''; BOLD=''; RST=''
|
||||
fi
|
||||
|
||||
# --- Resolve paths ---------------------------------------------------------
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
FIRMWARE_DIR="$PROJECT_ROOT/firmware/esp32-csi-node"
|
||||
FUZZ_DIR="$FIRMWARE_DIR/test"
|
||||
|
||||
# --- Helpers ---------------------------------------------------------------
|
||||
info() { echo -e "${BLUE}[INFO]${RST} $*"; }
|
||||
ok() { echo -e "${GREEN}[OK]${RST} $*"; }
|
||||
warn() { echo -e "${YELLOW}[WARN]${RST} $*"; }
|
||||
err() { echo -e "${RED}[ERROR]${RST} $*" >&2; }
|
||||
die() { err "$@"; exit 1; }
|
||||
|
||||
need_qemu() {
|
||||
detect_qemu >/dev/null 2>&1 || \
|
||||
die "QEMU not found. Install with: ${CYAN}qemu-cli.sh install${RST}"
|
||||
}
|
||||
|
||||
detect_qemu() {
|
||||
# 1. Explicit env var
|
||||
if [[ -n "${QEMU_PATH:-}" ]] && [[ -x "$QEMU_PATH" ]]; then
|
||||
echo "$QEMU_PATH"; return 0
|
||||
fi
|
||||
# 2. On PATH
|
||||
local qemu
|
||||
qemu="$(command -v qemu-system-xtensa 2>/dev/null || true)"
|
||||
if [[ -n "$qemu" ]]; then echo "$qemu"; return 0; fi
|
||||
# 3. Espressif default build location
|
||||
local espressif_qemu="$HOME/.espressif/qemu/build/qemu-system-xtensa"
|
||||
if [[ -x "$espressif_qemu" ]]; then echo "$espressif_qemu"; return 0; fi
|
||||
return 1
|
||||
}
|
||||
|
||||
detect_python() {
|
||||
command -v python3 2>/dev/null || command -v python 2>/dev/null || echo "python3"
|
||||
}
|
||||
|
||||
# --- Command: help ---------------------------------------------------------
|
||||
cmd_help() {
|
||||
cat <<EOF
|
||||
${BOLD}qemu-cli.sh${RST} v${VERSION} — Unified QEMU ESP32-S3 testing CLI
|
||||
|
||||
${BOLD}USAGE${RST}
|
||||
qemu-cli.sh <command> [options]
|
||||
|
||||
${BOLD}COMMANDS${RST}
|
||||
${CYAN}install${RST} Install QEMU with ESP32-S3 support
|
||||
${CYAN}test${RST} Run single-node firmware test
|
||||
${CYAN}mesh${RST} [N] Run multi-node mesh test (default: 3 nodes)
|
||||
${CYAN}swarm${RST} [args] Run swarm configurator (qemu_swarm.py)
|
||||
${CYAN}snapshot${RST} [args] Run snapshot-based tests
|
||||
${CYAN}chaos${RST} [args] Run chaos / fault injection tests
|
||||
${CYAN}fuzz${RST} [--duration N] Run all 3 fuzz targets (clang libFuzzer)
|
||||
${CYAN}nvs${RST} [args] Generate NVS test matrix
|
||||
${CYAN}health${RST} <logfile> Check firmware health from QEMU log
|
||||
${CYAN}status${RST} Show installation status and versions
|
||||
${CYAN}help${RST} Show this help message
|
||||
|
||||
${BOLD}EXAMPLES${RST}
|
||||
qemu-cli.sh install # Install QEMU
|
||||
qemu-cli.sh test # Run basic firmware test
|
||||
qemu-cli.sh test --timeout 120 # Test with longer timeout
|
||||
qemu-cli.sh swarm --preset smoke # Quick swarm test
|
||||
qemu-cli.sh swarm --preset standard # Standard 3-node test
|
||||
qemu-cli.sh swarm --list-presets # List available presets
|
||||
qemu-cli.sh mesh 3 # 3-node mesh test
|
||||
qemu-cli.sh chaos # Run chaos tests
|
||||
qemu-cli.sh fuzz --duration 60 # Fuzz for 60 seconds
|
||||
qemu-cli.sh nvs --list # List NVS configs
|
||||
qemu-cli.sh health build/qemu_output.log
|
||||
qemu-cli.sh status # Show what's installed
|
||||
|
||||
${BOLD}TAB COMPLETION${RST}
|
||||
Source the completions in your shell:
|
||||
eval "\$(qemu-cli.sh --completions)"
|
||||
|
||||
${BOLD}ENVIRONMENT${RST}
|
||||
QEMU_PATH Path to qemu-system-xtensa binary (auto-detected)
|
||||
FUZZ_DURATION Override fuzz duration in seconds (default: 30)
|
||||
FUZZ_JOBS Parallel fuzzing jobs (default: 1)
|
||||
|
||||
EOF
|
||||
}
|
||||
|
||||
# --- Command: install ------------------------------------------------------
|
||||
cmd_install() {
|
||||
if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
|
||||
echo "Usage: qemu-cli.sh install"
|
||||
echo "Install QEMU with Espressif ESP32-S3 support."
|
||||
return 0
|
||||
fi
|
||||
local installer="$SCRIPT_DIR/install-qemu.sh"
|
||||
if [[ -f "$installer" ]]; then
|
||||
info "Running install-qemu.sh ..."
|
||||
bash "$installer" "$@"
|
||||
else
|
||||
info "No install-qemu.sh found. Showing manual install steps."
|
||||
cat <<EOF
|
||||
|
||||
${BOLD}Manual QEMU ESP32-S3 installation:${RST}
|
||||
1. git clone https://github.com/espressif/qemu.git ~/.espressif/qemu-src
|
||||
2. cd ~/.espressif/qemu-src
|
||||
3. ./configure --target-list=xtensa-softmmu --prefix=\$HOME/.espressif/qemu/build \\
|
||||
--enable-gcrypt --disable-bsd-user --disable-docs
|
||||
4. make -j\$(nproc) && make install
|
||||
5. Add to PATH: export PATH="\$HOME/.espressif/qemu/build/bin:\$PATH"
|
||||
|
||||
EOF
|
||||
fi
|
||||
}
|
||||
|
||||
# --- Command: test ----------------------------------------------------------
|
||||
cmd_test() {
|
||||
if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
|
||||
echo "Usage: qemu-cli.sh test [--timeout N] [extra args...]"
|
||||
echo "Run single-node QEMU ESP32-S3 firmware test."
|
||||
return 0
|
||||
fi
|
||||
need_qemu
|
||||
info "Running single-node firmware test ..."
|
||||
bash "$SCRIPT_DIR/qemu-esp32s3-test.sh" "$@"
|
||||
}
|
||||
|
||||
# --- Command: mesh ----------------------------------------------------------
|
||||
cmd_mesh() {
|
||||
if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
|
||||
echo "Usage: qemu-cli.sh mesh [N] [extra args...]"
|
||||
echo "Run multi-node mesh test. N = number of nodes (default: 3)."
|
||||
return 0
|
||||
fi
|
||||
need_qemu
|
||||
local nodes="${1:-3}"
|
||||
shift 2>/dev/null || true
|
||||
info "Running ${nodes}-node mesh test ..."
|
||||
bash "$SCRIPT_DIR/qemu-mesh-test.sh" "$nodes" "$@"
|
||||
}
|
||||
|
||||
# --- Command: swarm ---------------------------------------------------------
|
||||
cmd_swarm() {
|
||||
if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
|
||||
echo "Usage: qemu-cli.sh swarm [--preset NAME] [--list-presets] [args...]"
|
||||
echo "Run QEMU swarm configurator (qemu_swarm.py)."
|
||||
echo ""
|
||||
echo "Presets: smoke, standard, full, stress"
|
||||
echo "List: qemu-cli.sh swarm --list-presets"
|
||||
return 0
|
||||
fi
|
||||
need_qemu
|
||||
local py; py="$(detect_python)"
|
||||
info "Running swarm configurator ..."
|
||||
"$py" "$SCRIPT_DIR/qemu_swarm.py" "$@"
|
||||
}
|
||||
|
||||
# --- Command: snapshot ------------------------------------------------------
|
||||
cmd_snapshot() {
|
||||
if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
|
||||
echo "Usage: qemu-cli.sh snapshot [args...]"
|
||||
echo "Run snapshot-based QEMU tests."
|
||||
return 0
|
||||
fi
|
||||
need_qemu
|
||||
info "Running snapshot tests ..."
|
||||
bash "$SCRIPT_DIR/qemu-snapshot-test.sh" "$@"
|
||||
}
|
||||
|
||||
# --- Command: chaos ---------------------------------------------------------
|
||||
cmd_chaos() {
|
||||
if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
|
||||
echo "Usage: qemu-cli.sh chaos [args...]"
|
||||
echo "Run chaos / fault injection tests."
|
||||
return 0
|
||||
fi
|
||||
need_qemu
|
||||
info "Running chaos tests ..."
|
||||
bash "$SCRIPT_DIR/qemu-chaos-test.sh" "$@"
|
||||
}
|
||||
|
||||
# --- Command: fuzz ----------------------------------------------------------
|
||||
cmd_fuzz() {
|
||||
local duration="${FUZZ_DURATION:-30}"
|
||||
if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
|
||||
echo "Usage: qemu-cli.sh fuzz [--duration N]"
|
||||
echo "Build and run all 3 fuzz targets (clang libFuzzer)."
|
||||
echo "Requires: clang with libFuzzer support."
|
||||
return 0
|
||||
fi
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--duration) duration="$2"; shift 2 ;;
|
||||
*) warn "Unknown fuzz option: $1"; shift ;;
|
||||
esac
|
||||
done
|
||||
if ! command -v clang >/dev/null 2>&1; then
|
||||
die "clang not found. Fuzz targets require clang with libFuzzer."
|
||||
fi
|
||||
info "Building and running fuzz targets (${duration}s each) ..."
|
||||
make -C "$FUZZ_DIR" run_all FUZZ_DURATION="$duration"
|
||||
ok "Fuzz testing complete."
|
||||
}
|
||||
|
||||
# --- Command: nvs -----------------------------------------------------------
|
||||
cmd_nvs() {
|
||||
if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
|
||||
echo "Usage: qemu-cli.sh nvs [--list] [args...]"
|
||||
echo "Generate NVS test configuration matrix."
|
||||
return 0
|
||||
fi
|
||||
local py; py="$(detect_python)"
|
||||
info "Running NVS matrix generator ..."
|
||||
"$py" "$SCRIPT_DIR/generate_nvs_matrix.py" "$@"
|
||||
}
|
||||
|
||||
# --- Command: health --------------------------------------------------------
|
||||
cmd_health() {
|
||||
if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
|
||||
echo "Usage: qemu-cli.sh health <logfile>"
|
||||
echo "Analyze firmware health from a QEMU output log."
|
||||
return 0
|
||||
fi
|
||||
local logfile="${1:-}"
|
||||
if [[ -z "$logfile" ]]; then
|
||||
die "Usage: qemu-cli.sh health <logfile>"
|
||||
fi
|
||||
if [[ ! -f "$logfile" ]]; then
|
||||
die "Log file not found: $logfile"
|
||||
fi
|
||||
local py; py="$(detect_python)"
|
||||
info "Analyzing health from: $logfile"
|
||||
"$py" "$SCRIPT_DIR/check_health.py" --log "$logfile" --after-fault manual
|
||||
}
|
||||
|
||||
# --- Command: status --------------------------------------------------------
|
||||
cmd_status() {
|
||||
# Status should never fail — disable errexit locally
|
||||
set +e
|
||||
echo -e "${BOLD}=== QEMU ESP32-S3 Testing Status ===${RST}"
|
||||
echo ""
|
||||
|
||||
# QEMU
|
||||
local qemu_bin
|
||||
qemu_bin="$(detect_qemu 2>/dev/null)"
|
||||
if [[ -n "$qemu_bin" ]]; then
|
||||
local qemu_ver
|
||||
qemu_ver="$("$qemu_bin" --version 2>/dev/null | head -1 || echo "unknown")"
|
||||
ok "QEMU: ${GREEN}installed${RST} ($qemu_ver)"
|
||||
echo " Path: $qemu_bin"
|
||||
else
|
||||
warn "QEMU: ${YELLOW}not found${RST} (run: qemu-cli.sh install)"
|
||||
fi
|
||||
|
||||
# ESP-IDF
|
||||
if [[ -n "${IDF_PATH:-}" ]] && [[ -d "$IDF_PATH" ]]; then
|
||||
ok "ESP-IDF: ${GREEN}available${RST} ($IDF_PATH)"
|
||||
else
|
||||
warn "ESP-IDF: ${YELLOW}IDF_PATH not set${RST}"
|
||||
fi
|
||||
|
||||
# Python
|
||||
local py; py="$(detect_python)"
|
||||
if command -v "$py" >/dev/null 2>&1; then
|
||||
ok "Python: ${GREEN}$("$py" --version 2>&1)${RST}"
|
||||
else
|
||||
warn "Python: ${YELLOW}not found${RST}"
|
||||
fi
|
||||
|
||||
# Clang (for fuzz)
|
||||
if command -v clang >/dev/null 2>&1; then
|
||||
ok "Clang: ${GREEN}$(clang --version 2>/dev/null | head -1)${RST}"
|
||||
else
|
||||
warn "Clang: ${YELLOW}not found${RST} (needed for fuzz targets only)"
|
||||
fi
|
||||
|
||||
# Firmware binary
|
||||
local fw_bin="$FIRMWARE_DIR/build/esp32-csi-node.bin"
|
||||
if [[ -f "$fw_bin" ]]; then
|
||||
local fw_size
|
||||
fw_size="$(stat -c%s "$fw_bin" 2>/dev/null || stat -f%z "$fw_bin" 2>/dev/null || echo "?")"
|
||||
ok "Firmware: ${GREEN}built${RST} ($fw_bin, ${fw_size} bytes)"
|
||||
else
|
||||
warn "Firmware: ${YELLOW}not built${RST} (expected at $fw_bin)"
|
||||
fi
|
||||
|
||||
# Swarm presets
|
||||
local preset_dir="$SCRIPT_DIR/swarm_presets"
|
||||
if [[ -d "$preset_dir" ]]; then
|
||||
local presets
|
||||
presets="$(ls "$preset_dir"/ 2>/dev/null | \
|
||||
sed 's/\.\(yaml\|json\)$//' | sort -u | tr '\n' ', ' | sed 's/,$//')"
|
||||
if [[ -n "$presets" ]]; then
|
||||
ok "Presets: ${GREEN}${presets}${RST}"
|
||||
else
|
||||
warn "Presets: ${YELLOW}none found${RST} in $preset_dir"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo ""
|
||||
set -e
|
||||
}
|
||||
|
||||
# --- Completions output -----------------------------------------------------
|
||||
print_completions() {
|
||||
cat <<'COMP'
|
||||
_qemu_cli_completions() {
|
||||
local cmds="install test mesh swarm snapshot chaos fuzz nvs health status help"
|
||||
local cur="${COMP_WORDS[COMP_CWORD]}"
|
||||
if [[ $COMP_CWORD -eq 1 ]]; then
|
||||
COMPREPLY=( $(compgen -W "$cmds" -- "$cur") )
|
||||
fi
|
||||
}
|
||||
complete -F _qemu_cli_completions qemu-cli.sh
|
||||
COMP
|
||||
}
|
||||
|
||||
# --- Main dispatch ----------------------------------------------------------
|
||||
main() {
|
||||
local cmd="${1:-help}"
|
||||
shift 2>/dev/null || true
|
||||
|
||||
case "$cmd" in
|
||||
install) cmd_install "$@" ;;
|
||||
test) cmd_test "$@" ;;
|
||||
mesh) cmd_mesh "$@" ;;
|
||||
swarm) cmd_swarm "$@" ;;
|
||||
snapshot) cmd_snapshot "$@" ;;
|
||||
chaos) cmd_chaos "$@" ;;
|
||||
fuzz) cmd_fuzz "$@" ;;
|
||||
nvs) cmd_nvs "$@" ;;
|
||||
health) cmd_health "$@" ;;
|
||||
status) cmd_status "$@" ;;
|
||||
help|-h|--help) cmd_help ;;
|
||||
--version) echo "qemu-cli.sh v${VERSION}" ;;
|
||||
--completions) print_completions ;;
|
||||
*)
|
||||
err "Unknown command: ${BOLD}${cmd}${RST}"
|
||||
echo ""
|
||||
cmd_help
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
main "$@"
|
||||
212
scripts/qemu-esp32s3-test.sh
Executable file
212
scripts/qemu-esp32s3-test.sh
Executable file
|
|
@ -0,0 +1,212 @@
|
|||
#!/bin/bash
|
||||
# QEMU ESP32-S3 Firmware Test Runner (ADR-061)
|
||||
#
|
||||
# Builds the firmware with mock CSI enabled, merges binaries into a single
|
||||
# flash image, optionally injects a pre-provisioned NVS partition, runs the
|
||||
# image under QEMU with a timeout, and validates the UART output.
|
||||
#
|
||||
# Environment variables:
|
||||
# QEMU_PATH - Path to qemu-system-xtensa (default: qemu-system-xtensa)
|
||||
# QEMU_TIMEOUT - Timeout in seconds (default: 60)
|
||||
# SKIP_BUILD - Set to "1" to skip the idf.py build step
|
||||
# NVS_BIN - Path to a pre-built NVS binary to inject (optional)
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 PASS — all checks passed
|
||||
# 1 WARN — non-critical checks failed
|
||||
# 2 FAIL — critical checks failed
|
||||
# 3 FATAL — build error, crash, or infrastructure failure
|
||||
|
||||
# ── Help ──────────────────────────────────────────────────────────────
|
||||
usage() {
|
||||
cat <<'HELP'
|
||||
Usage: qemu-esp32s3-test.sh [OPTIONS]
|
||||
|
||||
Build ESP32-S3 firmware with mock CSI, merge binaries into a single flash
|
||||
image, run under QEMU with a timeout, and validate the UART output.
|
||||
|
||||
Options:
|
||||
-h, --help Show this help message and exit
|
||||
|
||||
Environment variables:
|
||||
QEMU_PATH Path to qemu-system-xtensa (default: qemu-system-xtensa)
|
||||
QEMU_TIMEOUT Timeout in seconds (default: 60)
|
||||
SKIP_BUILD Set to "1" to skip idf.py build (default: unset)
|
||||
NVS_BIN Path to pre-built NVS binary (optional)
|
||||
QEMU_NET Set to "0" to disable networking (default: 1)
|
||||
|
||||
Examples:
|
||||
./qemu-esp32s3-test.sh
|
||||
SKIP_BUILD=1 ./qemu-esp32s3-test.sh
|
||||
QEMU_PATH=/opt/qemu/bin/qemu-system-xtensa QEMU_TIMEOUT=120 ./qemu-esp32s3-test.sh
|
||||
|
||||
Exit codes:
|
||||
0 PASS — all checks passed
|
||||
1 WARN — non-critical checks failed
|
||||
2 FAIL — critical checks failed
|
||||
3 FATAL — build error, crash, or infrastructure failure
|
||||
HELP
|
||||
exit 0
|
||||
}
|
||||
|
||||
case "${1:-}" in -h|--help) usage ;; esac
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
|
||||
FIRMWARE_DIR="$PROJECT_ROOT/firmware/esp32-csi-node"
|
||||
BUILD_DIR="$FIRMWARE_DIR/build"
|
||||
QEMU_BIN="${QEMU_PATH:-qemu-system-xtensa}"
|
||||
FLASH_IMAGE="$BUILD_DIR/qemu_flash.bin"
|
||||
LOG_FILE="$BUILD_DIR/qemu_output.log"
|
||||
TIMEOUT_SEC="${QEMU_TIMEOUT:-60}"
|
||||
|
||||
echo "=== QEMU ESP32-S3 Firmware Test (ADR-061) ==="
|
||||
echo "Firmware dir: $FIRMWARE_DIR"
|
||||
echo "QEMU binary: $QEMU_BIN"
|
||||
echo "Timeout: ${TIMEOUT_SEC}s"
|
||||
echo ""
|
||||
|
||||
# ── Prerequisite checks ───────────────────────────────────────────────
|
||||
if ! command -v "$QEMU_BIN" &>/dev/null; then
|
||||
echo "ERROR: QEMU binary not found: $QEMU_BIN"
|
||||
echo " Install: sudo apt install qemu-system-misc # Debian/Ubuntu"
|
||||
echo " Install: brew install qemu # macOS"
|
||||
echo " Or set QEMU_PATH to the qemu-system-xtensa binary."
|
||||
exit 3
|
||||
fi
|
||||
|
||||
if ! command -v python3 &>/dev/null; then
|
||||
echo "ERROR: python3 not found."
|
||||
echo " Install: sudo apt install python3 # Debian/Ubuntu"
|
||||
echo " Install: brew install python # macOS"
|
||||
exit 3
|
||||
fi
|
||||
|
||||
if ! python3 -m esptool version &>/dev/null 2>&1; then
|
||||
echo "ERROR: esptool not found (needed to merge flash binaries)."
|
||||
echo " Install: pip install esptool"
|
||||
exit 3
|
||||
fi
|
||||
|
||||
# ── SKIP_BUILD precheck ──────────────────────────────────────────────
|
||||
if [ "${SKIP_BUILD:-}" = "1" ] && [ ! -f "$BUILD_DIR/esp32-csi-node.bin" ]; then
|
||||
echo "ERROR: SKIP_BUILD=1 but flash image not found: $BUILD_DIR/esp32-csi-node.bin"
|
||||
echo "Build the firmware first: ./qemu-esp32s3-test.sh (without SKIP_BUILD)"
|
||||
echo "Or unset SKIP_BUILD to build automatically."
|
||||
exit 3
|
||||
fi
|
||||
|
||||
# 1. Build with mock CSI enabled (skip if already built)
|
||||
if [ "${SKIP_BUILD:-}" != "1" ]; then
|
||||
echo "[1/4] Building firmware (mock CSI mode)..."
|
||||
idf.py -C "$FIRMWARE_DIR" \
|
||||
-D SDKCONFIG_DEFAULTS="sdkconfig.defaults;sdkconfig.qemu" \
|
||||
build
|
||||
echo ""
|
||||
else
|
||||
echo "[1/4] Skipping build (SKIP_BUILD=1)"
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# Verify build artifacts exist
|
||||
for artifact in \
|
||||
"$BUILD_DIR/bootloader/bootloader.bin" \
|
||||
"$BUILD_DIR/partition_table/partition-table.bin" \
|
||||
"$BUILD_DIR/esp32-csi-node.bin"; do
|
||||
if [ ! -f "$artifact" ]; then
|
||||
echo "ERROR: Build artifact not found: $artifact"
|
||||
echo "Run without SKIP_BUILD=1 or build the firmware first."
|
||||
exit 3
|
||||
fi
|
||||
done
|
||||
|
||||
# 2. Merge binaries into single flash image
|
||||
echo "[2/4] Creating merged flash image..."
|
||||
|
||||
# Check for ota_data_initial.bin; some builds don't produce it
|
||||
OTA_DATA_ARGS=""
|
||||
if [ -f "$BUILD_DIR/ota_data_initial.bin" ]; then
|
||||
OTA_DATA_ARGS="0xf000 $BUILD_DIR/ota_data_initial.bin"
|
||||
fi
|
||||
|
||||
python3 -m esptool --chip esp32s3 merge_bin -o "$FLASH_IMAGE" \
|
||||
--flash_mode dio --flash_freq 80m --flash_size 8MB \
|
||||
0x0 "$BUILD_DIR/bootloader/bootloader.bin" \
|
||||
0x8000 "$BUILD_DIR/partition_table/partition-table.bin" \
|
||||
$OTA_DATA_ARGS \
|
||||
0x20000 "$BUILD_DIR/esp32-csi-node.bin"
|
||||
|
||||
echo "Flash image: $FLASH_IMAGE ($(stat -c%s "$FLASH_IMAGE" 2>/dev/null || stat -f%z "$FLASH_IMAGE") bytes)"
|
||||
|
||||
# 2b. Optionally inject pre-provisioned NVS partition
|
||||
NVS_FILE="${NVS_BIN:-$BUILD_DIR/nvs_test.bin}"
|
||||
if [ -f "$NVS_FILE" ]; then
|
||||
echo "[2b] Injecting NVS partition from: $NVS_FILE"
|
||||
# NVS partition offset = 0x9000 = 36864
|
||||
dd if="$NVS_FILE" of="$FLASH_IMAGE" \
|
||||
bs=1 seek=$((0x9000)) conv=notrunc 2>/dev/null
|
||||
echo "NVS injected ($(stat -c%s "$NVS_FILE" 2>/dev/null || stat -f%z "$NVS_FILE") bytes at 0x9000)"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# 3. Run in QEMU with timeout, capture UART output
|
||||
echo "[3/4] Running QEMU (timeout: ${TIMEOUT_SEC}s)..."
|
||||
echo "------- QEMU UART output -------"
|
||||
|
||||
# Use timeout command; fall back to gtimeout on macOS
|
||||
TIMEOUT_CMD="timeout"
|
||||
if ! command -v timeout &>/dev/null; then
|
||||
if command -v gtimeout &>/dev/null; then
|
||||
TIMEOUT_CMD="gtimeout"
|
||||
else
|
||||
echo "WARNING: 'timeout' command not found. QEMU may run indefinitely."
|
||||
TIMEOUT_CMD=""
|
||||
fi
|
||||
fi
|
||||
|
||||
QEMU_EXIT=0
|
||||
|
||||
# Common QEMU arguments
|
||||
QEMU_ARGS=(
|
||||
-machine esp32s3
|
||||
-nographic
|
||||
-drive "file=$FLASH_IMAGE,if=mtd,format=raw"
|
||||
-serial mon:stdio
|
||||
-no-reboot
|
||||
)
|
||||
|
||||
# Enable SLIRP user-mode networking for UDP if available
|
||||
if [ "${QEMU_NET:-1}" != "0" ]; then
|
||||
QEMU_ARGS+=(-nic "user,model=open_eth,net=10.0.2.0/24,host=10.0.2.2")
|
||||
fi
|
||||
|
||||
if [ -n "$TIMEOUT_CMD" ]; then
|
||||
$TIMEOUT_CMD "$TIMEOUT_SEC" "$QEMU_BIN" "${QEMU_ARGS[@]}" \
|
||||
2>&1 | tee "$LOG_FILE" || QEMU_EXIT=$?
|
||||
else
|
||||
"$QEMU_BIN" "${QEMU_ARGS[@]}" \
|
||||
2>&1 | tee "$LOG_FILE" || QEMU_EXIT=$?
|
||||
fi
|
||||
|
||||
echo "------- End QEMU output -------"
|
||||
echo ""
|
||||
|
||||
# timeout returns 124 when the process is killed by timeout — that's expected
|
||||
if [ "$QEMU_EXIT" -eq 124 ]; then
|
||||
echo "QEMU exited via timeout (expected for firmware that loops forever)."
|
||||
elif [ "$QEMU_EXIT" -ne 0 ]; then
|
||||
echo "WARNING: QEMU exited with code $QEMU_EXIT"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# 4. Validate expected output
|
||||
echo "[4/4] Validating output..."
|
||||
python3 "$SCRIPT_DIR/validate_qemu_output.py" "$LOG_FILE"
|
||||
VALIDATE_EXIT=$?
|
||||
|
||||
echo ""
|
||||
echo "=== Test Complete (exit code: $VALIDATE_EXIT) ==="
|
||||
exit $VALIDATE_EXIT
|
||||
414
scripts/qemu-mesh-test.sh
Normal file
414
scripts/qemu-mesh-test.sh
Normal file
|
|
@ -0,0 +1,414 @@
|
|||
#!/bin/bash
|
||||
# QEMU ESP32-S3 Multi-Node Mesh Simulation (ADR-061 Layer 3)
|
||||
#
|
||||
# Spawns N ESP32-S3 QEMU instances connected via a Linux bridge, each with
|
||||
# unique NVS provisioning (node ID, TDM slot), and a Rust aggregator that
|
||||
# collects frames from all nodes. After a configurable timeout the script
|
||||
# tears everything down and runs validate_mesh_test.py.
|
||||
#
|
||||
# Usage:
|
||||
# sudo ./qemu-mesh-test.sh [N_NODES]
|
||||
#
|
||||
# Environment variables:
|
||||
# QEMU_PATH - Path to qemu-system-xtensa (default: qemu-system-xtensa)
|
||||
# QEMU_TIMEOUT - Timeout in seconds (default: 45)
|
||||
# MESH_TIMEOUT - Deprecated alias for QEMU_TIMEOUT
|
||||
# SKIP_BUILD - Set to "1" to skip the idf.py build step
|
||||
# BRIDGE_NAME - Bridge interface name (default: qemu-br0)
|
||||
# BRIDGE_SUBNET - Bridge IP/mask (default: 10.0.0.1/24)
|
||||
# AGGREGATOR_PORT - UDP port the aggregator listens on (default: 5005)
|
||||
#
|
||||
# Prerequisites:
|
||||
# - Linux with bridge-utils and iproute2
|
||||
# - QEMU with ESP32-S3 machine support (qemu-system-xtensa)
|
||||
# - provision.py capable of --dry-run NVS generation
|
||||
# - Rust workspace with wifi-densepose-hardware crate (aggregator binary)
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 PASS — all checks passed
|
||||
# 1 WARN — non-critical checks failed
|
||||
# 2 FAIL — critical checks failed
|
||||
# 3 FATAL — build error, crash, or infrastructure failure
|
||||
|
||||
# ── Help ──────────────────────────────────────────────────────────────
|
||||
usage() {
|
||||
cat <<'HELP'
|
||||
Usage: sudo ./qemu-mesh-test.sh [OPTIONS] [N_NODES]
|
||||
|
||||
Spawn N ESP32-S3 QEMU instances connected via a Linux bridge, each with
|
||||
unique NVS provisioning (node ID, TDM slot), and a Rust aggregator that
|
||||
collects frames from all nodes.
|
||||
|
||||
NOTE: Requires root/sudo for TAP/bridge creation.
|
||||
|
||||
Options:
|
||||
-h, --help Show this help message and exit
|
||||
|
||||
Positional:
|
||||
N_NODES Number of mesh nodes (default: 3, minimum: 2)
|
||||
|
||||
Environment variables:
|
||||
QEMU_PATH Path to qemu-system-xtensa (default: qemu-system-xtensa)
|
||||
QEMU_TIMEOUT Timeout in seconds (default: 45)
|
||||
MESH_TIMEOUT Alias for QEMU_TIMEOUT (deprecated)(default: 45)
|
||||
SKIP_BUILD Set to "1" to skip idf.py build (default: unset)
|
||||
BRIDGE_NAME Bridge interface name (default: qemu-br0)
|
||||
BRIDGE_SUBNET Bridge IP/mask (default: 10.0.0.1/24)
|
||||
AGGREGATOR_PORT UDP port for aggregator (default: 5005)
|
||||
|
||||
Examples:
|
||||
sudo ./qemu-mesh-test.sh
|
||||
sudo QEMU_TIMEOUT=90 ./qemu-mesh-test.sh 5
|
||||
sudo SKIP_BUILD=1 ./qemu-mesh-test.sh 4
|
||||
|
||||
Exit codes:
|
||||
0 PASS — all checks passed
|
||||
1 WARN — non-critical checks failed
|
||||
2 FAIL — critical checks failed
|
||||
3 FATAL — build error, crash, or infrastructure failure
|
||||
HELP
|
||||
exit 0
|
||||
}
|
||||
|
||||
case "${1:-}" in -h|--help) usage ;; esac
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Paths
|
||||
# ---------------------------------------------------------------------------
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
|
||||
FIRMWARE_DIR="$PROJECT_ROOT/firmware/esp32-csi-node"
|
||||
BUILD_DIR="$FIRMWARE_DIR/build"
|
||||
RUST_DIR="$PROJECT_ROOT/rust-port/wifi-densepose-rs"
|
||||
PROVISION_SCRIPT="$FIRMWARE_DIR/provision.py"
|
||||
VALIDATE_SCRIPT="$SCRIPT_DIR/validate_mesh_test.py"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Configuration
|
||||
# ---------------------------------------------------------------------------
|
||||
N_NODES="${1:-3}"
|
||||
QEMU_BIN="${QEMU_PATH:-qemu-system-xtensa}"
|
||||
TIMEOUT="${QEMU_TIMEOUT:-${MESH_TIMEOUT:-45}}"
|
||||
BRIDGE="${BRIDGE_NAME:-qemu-br0}"
|
||||
BRIDGE_IP="${BRIDGE_SUBNET:-10.0.0.1/24}"
|
||||
AGG_PORT="${AGGREGATOR_PORT:-5005}"
|
||||
RESULTS_FILE="$BUILD_DIR/mesh_test_results.json"
|
||||
|
||||
echo "=== QEMU Multi-Node Mesh Test (ADR-061 Layer 3) ==="
|
||||
echo "Nodes: $N_NODES"
|
||||
echo "Bridge: $BRIDGE ($BRIDGE_IP)"
|
||||
echo "Aggregator: 0.0.0.0:$AGG_PORT"
|
||||
echo "QEMU binary: $QEMU_BIN"
|
||||
echo "Timeout: ${TIMEOUT}s"
|
||||
echo ""
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Preflight checks
|
||||
# ---------------------------------------------------------------------------
|
||||
if [ "$N_NODES" -lt 2 ]; then
|
||||
echo "ERROR: Need at least 2 nodes for mesh simulation (got $N_NODES)"
|
||||
exit 3
|
||||
fi
|
||||
|
||||
if ! command -v "$QEMU_BIN" &>/dev/null; then
|
||||
echo "ERROR: QEMU binary not found: $QEMU_BIN"
|
||||
echo " Install: sudo apt install qemu-system-misc # Debian/Ubuntu"
|
||||
echo " Install: brew install qemu # macOS"
|
||||
echo " Or set QEMU_PATH to the qemu-system-xtensa binary."
|
||||
exit 3
|
||||
fi
|
||||
|
||||
if ! command -v python3 &>/dev/null; then
|
||||
echo "ERROR: python3 not found."
|
||||
echo " Install: sudo apt install python3 # Debian/Ubuntu"
|
||||
echo " Install: brew install python # macOS"
|
||||
exit 3
|
||||
fi
|
||||
|
||||
if ! command -v ip &>/dev/null; then
|
||||
echo "ERROR: 'ip' command not found."
|
||||
echo " Install: sudo apt install iproute2 # Debian/Ubuntu"
|
||||
exit 3
|
||||
fi
|
||||
|
||||
if ! command -v brctl &>/dev/null && ! ip link help bridge &>/dev/null 2>&1; then
|
||||
echo "WARNING: bridge-utils not found; will use 'ip link' for bridge creation."
|
||||
fi
|
||||
|
||||
if command -v socat &>/dev/null; then
|
||||
true # optional, available
|
||||
else
|
||||
echo "NOTE: socat not found (optional, used for advanced monitor communication)."
|
||||
echo " Install: sudo apt install socat # Debian/Ubuntu"
|
||||
echo " Install: brew install socat # macOS"
|
||||
fi
|
||||
|
||||
if ! command -v cargo &>/dev/null; then
|
||||
echo "ERROR: cargo not found (needed to build the Rust aggregator)."
|
||||
echo " Install: curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh"
|
||||
exit 3
|
||||
fi
|
||||
|
||||
if [ "$(id -u)" -ne 0 ]; then
|
||||
echo "ERROR: This script must be run as root (for TAP/bridge creation)."
|
||||
echo "Usage: sudo $0 [N_NODES]"
|
||||
exit 3
|
||||
fi
|
||||
|
||||
mkdir -p "$BUILD_DIR"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Cleanup trap — runs on EXIT regardless of success/failure
|
||||
# ---------------------------------------------------------------------------
|
||||
QEMU_PIDS=()
|
||||
AGG_PID=""
|
||||
|
||||
cleanup() {
|
||||
echo ""
|
||||
echo "--- Cleaning up ---"
|
||||
|
||||
# Kill QEMU instances
|
||||
for pid in "${QEMU_PIDS[@]}"; do
|
||||
if kill -0 "$pid" 2>/dev/null; then
|
||||
kill "$pid" 2>/dev/null || true
|
||||
wait "$pid" 2>/dev/null || true
|
||||
fi
|
||||
done
|
||||
|
||||
# Kill aggregator
|
||||
if [ -n "$AGG_PID" ] && kill -0 "$AGG_PID" 2>/dev/null; then
|
||||
kill "$AGG_PID" 2>/dev/null || true
|
||||
wait "$AGG_PID" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Tear down TAP interfaces and bridge
|
||||
for i in $(seq 0 $((N_NODES - 1))); do
|
||||
local tap="tap${i}"
|
||||
if ip link show "$tap" &>/dev/null; then
|
||||
ip link set "$tap" down 2>/dev/null || true
|
||||
ip link delete "$tap" 2>/dev/null || true
|
||||
fi
|
||||
done
|
||||
|
||||
if ip link show "$BRIDGE" &>/dev/null; then
|
||||
ip link set "$BRIDGE" down 2>/dev/null || true
|
||||
ip link delete "$BRIDGE" type bridge 2>/dev/null || true
|
||||
fi
|
||||
|
||||
echo "Cleanup complete."
|
||||
}
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 1. Build flash image (if not already built)
|
||||
# ---------------------------------------------------------------------------
|
||||
if [ "${SKIP_BUILD:-}" != "1" ]; then
|
||||
echo "[1/6] Building firmware (mock CSI + QEMU overlay)..."
|
||||
idf.py -C "$FIRMWARE_DIR" \
|
||||
-D SDKCONFIG_DEFAULTS="sdkconfig.defaults;sdkconfig.qemu" \
|
||||
build
|
||||
echo ""
|
||||
else
|
||||
echo "[1/6] Skipping build (SKIP_BUILD=1)"
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# Verify build artifacts
|
||||
FLASH_IMAGE_BASE="$BUILD_DIR/qemu_flash_base.bin"
|
||||
for artifact in \
|
||||
"$BUILD_DIR/bootloader/bootloader.bin" \
|
||||
"$BUILD_DIR/partition_table/partition-table.bin" \
|
||||
"$BUILD_DIR/esp32-csi-node.bin"; do
|
||||
if [ ! -f "$artifact" ]; then
|
||||
echo "ERROR: Build artifact not found: $artifact"
|
||||
echo "Run without SKIP_BUILD=1 or build the firmware first."
|
||||
exit 3
|
||||
fi
|
||||
done
|
||||
|
||||
# Merge into base flash image
|
||||
echo "[2/6] Creating base flash image..."
|
||||
OTA_DATA_ARGS=""
|
||||
if [ -f "$BUILD_DIR/ota_data_initial.bin" ]; then
|
||||
OTA_DATA_ARGS="0xf000 $BUILD_DIR/ota_data_initial.bin"
|
||||
fi
|
||||
|
||||
python3 -m esptool --chip esp32s3 merge_bin -o "$FLASH_IMAGE_BASE" \
|
||||
--flash_mode dio --flash_freq 80m --flash_size 8MB \
|
||||
0x0 "$BUILD_DIR/bootloader/bootloader.bin" \
|
||||
0x8000 "$BUILD_DIR/partition_table/partition-table.bin" \
|
||||
$OTA_DATA_ARGS \
|
||||
0x20000 "$BUILD_DIR/esp32-csi-node.bin"
|
||||
|
||||
echo "Base flash image: $FLASH_IMAGE_BASE ($(stat -c%s "$FLASH_IMAGE_BASE" 2>/dev/null || stat -f%z "$FLASH_IMAGE_BASE") bytes)"
|
||||
echo ""
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 3. Generate per-node NVS and flash images
|
||||
# ---------------------------------------------------------------------------
|
||||
echo "[3/6] Generating per-node NVS images..."
|
||||
|
||||
# Extract the aggregator IP from the bridge subnet (first host)
|
||||
AGG_IP="${BRIDGE_IP%%/*}"
|
||||
|
||||
for i in $(seq 0 $((N_NODES - 1))); do
|
||||
NVS_BIN="$BUILD_DIR/nvs_node${i}.bin"
|
||||
NODE_FLASH="$BUILD_DIR/qemu_flash_node${i}.bin"
|
||||
|
||||
# Generate NVS with provision.py --dry-run
|
||||
# --port is required by argparse but unused in dry-run; pass a dummy
|
||||
python3 "$PROVISION_SCRIPT" \
|
||||
--port /dev/null \
|
||||
--dry-run \
|
||||
--node-id "$i" \
|
||||
--tdm-slot "$i" \
|
||||
--tdm-total "$N_NODES" \
|
||||
--target-ip "$AGG_IP" \
|
||||
--target-port "$AGG_PORT"
|
||||
|
||||
# provision.py --dry-run writes to nvs_provision.bin in CWD
|
||||
if [ -f "nvs_provision.bin" ]; then
|
||||
mv "nvs_provision.bin" "$NVS_BIN"
|
||||
else
|
||||
echo "ERROR: provision.py did not produce nvs_provision.bin for node $i"
|
||||
exit 3
|
||||
fi
|
||||
|
||||
# Copy base image and inject NVS at 0x9000
|
||||
cp "$FLASH_IMAGE_BASE" "$NODE_FLASH"
|
||||
dd if="$NVS_BIN" of="$NODE_FLASH" \
|
||||
bs=1 seek=$((0x9000)) conv=notrunc 2>/dev/null
|
||||
|
||||
echo " Node $i: flash=$NODE_FLASH nvs=$NVS_BIN (TDM slot $i/$N_NODES)"
|
||||
done
|
||||
echo ""
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 4. Create bridge and TAP interfaces
|
||||
# ---------------------------------------------------------------------------
|
||||
echo "[4/6] Setting up network bridge and TAP interfaces..."
|
||||
|
||||
# Create bridge
|
||||
ip link add name "$BRIDGE" type bridge 2>/dev/null || true
|
||||
ip addr add "$BRIDGE_IP" dev "$BRIDGE" 2>/dev/null || true
|
||||
ip link set "$BRIDGE" up
|
||||
|
||||
# Create TAP interfaces and attach to bridge
|
||||
for i in $(seq 0 $((N_NODES - 1))); do
|
||||
TAP="tap${i}"
|
||||
ip tuntap add dev "$TAP" mode tap 2>/dev/null || true
|
||||
ip link set "$TAP" master "$BRIDGE"
|
||||
ip link set "$TAP" up
|
||||
echo " $TAP -> $BRIDGE"
|
||||
done
|
||||
echo ""
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 5. Start aggregator and QEMU instances
|
||||
# ---------------------------------------------------------------------------
|
||||
echo "[5/6] Starting aggregator and $N_NODES QEMU nodes..."
|
||||
|
||||
# Start Rust aggregator in background
|
||||
echo " Starting aggregator: listen=0.0.0.0:$AGG_PORT expect-nodes=$N_NODES"
|
||||
cargo run --manifest-path "$RUST_DIR/Cargo.toml" \
|
||||
-p wifi-densepose-hardware --bin aggregator -- \
|
||||
--listen "0.0.0.0:$AGG_PORT" \
|
||||
--expect-nodes "$N_NODES" \
|
||||
--output "$RESULTS_FILE" \
|
||||
> "$BUILD_DIR/aggregator.log" 2>&1 &
|
||||
AGG_PID=$!
|
||||
echo " Aggregator PID: $AGG_PID"
|
||||
|
||||
# Give aggregator a moment to bind
|
||||
sleep 1
|
||||
|
||||
if ! kill -0 "$AGG_PID" 2>/dev/null; then
|
||||
echo "ERROR: Aggregator failed to start. Check $BUILD_DIR/aggregator.log"
|
||||
cat "$BUILD_DIR/aggregator.log" 2>/dev/null || true
|
||||
exit 3
|
||||
fi
|
||||
|
||||
# Launch QEMU instances
|
||||
for i in $(seq 0 $((N_NODES - 1))); do
|
||||
TAP="tap${i}"
|
||||
NODE_FLASH="$BUILD_DIR/qemu_flash_node${i}.bin"
|
||||
NODE_LOG="$BUILD_DIR/qemu_node${i}.log"
|
||||
NODE_MAC=$(printf "52:54:00:00:00:%02x" "$i")
|
||||
|
||||
echo " Starting QEMU node $i (tap=$TAP, mac=$NODE_MAC)..."
|
||||
|
||||
"$QEMU_BIN" \
|
||||
-machine esp32s3 \
|
||||
-nographic \
|
||||
-drive "file=$NODE_FLASH,if=mtd,format=raw" \
|
||||
-serial "file:$NODE_LOG" \
|
||||
-no-reboot \
|
||||
-nic "tap,ifname=$TAP,script=no,downscript=no,mac=$NODE_MAC" \
|
||||
> /dev/null 2>&1 &
|
||||
|
||||
QEMU_PIDS+=($!)
|
||||
echo " PID: ${QEMU_PIDS[-1]}, log: $NODE_LOG"
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "All nodes launched. Waiting ${TIMEOUT}s for mesh simulation..."
|
||||
echo ""
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Wait for timeout
|
||||
# ---------------------------------------------------------------------------
|
||||
sleep "$TIMEOUT"
|
||||
|
||||
echo "Timeout reached. Stopping all processes..."
|
||||
|
||||
# Kill QEMU instances (aggregator killed in cleanup)
|
||||
for pid in "${QEMU_PIDS[@]}"; do
|
||||
if kill -0 "$pid" 2>/dev/null; then
|
||||
kill "$pid" 2>/dev/null || true
|
||||
fi
|
||||
done
|
||||
|
||||
# Give aggregator a moment to flush results
|
||||
sleep 2
|
||||
|
||||
# Kill aggregator
|
||||
if [ -n "$AGG_PID" ] && kill -0 "$AGG_PID" 2>/dev/null; then
|
||||
kill "$AGG_PID" 2>/dev/null || true
|
||||
wait "$AGG_PID" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
echo ""
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 6. Validate results
|
||||
# ---------------------------------------------------------------------------
|
||||
echo "[6/6] Validating mesh test results..."
|
||||
|
||||
VALIDATE_ARGS=("--nodes" "$N_NODES")
|
||||
|
||||
# Pass results file if it was produced
|
||||
if [ -f "$RESULTS_FILE" ]; then
|
||||
VALIDATE_ARGS+=("--results" "$RESULTS_FILE")
|
||||
else
|
||||
echo "WARNING: Aggregator results file not found: $RESULTS_FILE"
|
||||
echo "Validation will rely on node logs only."
|
||||
fi
|
||||
|
||||
# Pass node log files
|
||||
for i in $(seq 0 $((N_NODES - 1))); do
|
||||
NODE_LOG="$BUILD_DIR/qemu_node${i}.log"
|
||||
if [ -f "$NODE_LOG" ]; then
|
||||
VALIDATE_ARGS+=("--log" "$NODE_LOG")
|
||||
fi
|
||||
done
|
||||
|
||||
python3 "$VALIDATE_SCRIPT" "${VALIDATE_ARGS[@]}"
|
||||
VALIDATE_EXIT=$?
|
||||
|
||||
echo ""
|
||||
echo "=== Mesh Test Complete (exit code: $VALIDATE_EXIT) ==="
|
||||
exit $VALIDATE_EXIT
|
||||
373
scripts/qemu-snapshot-test.sh
Executable file
373
scripts/qemu-snapshot-test.sh
Executable file
|
|
@ -0,0 +1,373 @@
|
|||
#!/bin/bash
|
||||
# QEMU Snapshot-Based Test Runner — ADR-061 Layer 8
|
||||
#
|
||||
# Uses QEMU VM snapshots to accelerate repeated test runs.
|
||||
# Instead of rebooting and re-initializing for each test scenario,
|
||||
# we snapshot the VM state after boot and after the first CSI frame,
|
||||
# then restore from the snapshot for each individual test.
|
||||
#
|
||||
# This dramatically reduces per-test wall time from ~15s (full boot)
|
||||
# to ~2s (snapshot restore + execution).
|
||||
#
|
||||
# Environment variables:
|
||||
# QEMU_PATH - Path to qemu-system-xtensa (default: qemu-system-xtensa)
|
||||
# QEMU_TIMEOUT - Per-test timeout in seconds (default: 10)
|
||||
# FLASH_IMAGE - Path to merged flash image (default: build/qemu_flash.bin)
|
||||
# SKIP_SNAPSHOT - Set to "1" to run without snapshots (baseline timing)
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 PASS — all checks passed
|
||||
# 1 WARN — non-critical checks failed
|
||||
# 2 FAIL — critical checks failed
|
||||
# 3 FATAL — build error, crash, or infrastructure failure
|
||||
|
||||
# ── Help ──────────────────────────────────────────────────────────────
|
||||
usage() {
|
||||
cat <<'HELP'
|
||||
Usage: qemu-snapshot-test.sh [OPTIONS]
|
||||
|
||||
Use QEMU VM snapshots to accelerate repeated test runs. Snapshots the VM
|
||||
state after boot and after the first CSI frame, then restores from the
|
||||
snapshot for each individual test (~2s vs ~15s per test).
|
||||
|
||||
Options:
|
||||
-h, --help Show this help message and exit
|
||||
|
||||
Environment variables:
|
||||
QEMU_PATH Path to qemu-system-xtensa (default: qemu-system-xtensa)
|
||||
QEMU_TIMEOUT Per-test timeout in seconds (default: 10)
|
||||
FLASH_IMAGE Path to merged flash image (default: build/qemu_flash.bin)
|
||||
SKIP_SNAPSHOT Set to "1" to run without snapshots (baseline timing)
|
||||
|
||||
Examples:
|
||||
./qemu-snapshot-test.sh
|
||||
QEMU_TIMEOUT=20 ./qemu-snapshot-test.sh
|
||||
FLASH_IMAGE=/path/to/image.bin ./qemu-snapshot-test.sh
|
||||
|
||||
Exit codes:
|
||||
0 PASS — all checks passed
|
||||
1 WARN — non-critical checks failed
|
||||
2 FAIL — critical checks failed
|
||||
3 FATAL — build error, crash, or infrastructure failure
|
||||
HELP
|
||||
exit 0
|
||||
}
|
||||
|
||||
case "${1:-}" in -h|--help) usage ;; esac
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
|
||||
FIRMWARE_DIR="$PROJECT_ROOT/firmware/esp32-csi-node"
|
||||
BUILD_DIR="$FIRMWARE_DIR/build"
|
||||
QEMU_BIN="${QEMU_PATH:-qemu-system-xtensa}"
|
||||
FLASH_IMAGE="${FLASH_IMAGE:-$BUILD_DIR/qemu_flash.bin}"
|
||||
TIMEOUT_SEC="${QEMU_TIMEOUT:-10}"
|
||||
MONITOR_SOCK="$BUILD_DIR/qemu-monitor.sock"
|
||||
LOG_DIR="$BUILD_DIR/snapshot-tests"
|
||||
QEMU_PID=""
|
||||
|
||||
# Timing accumulators
|
||||
SNAPSHOT_TOTAL_MS=0
|
||||
BASELINE_TOTAL_MS=0
|
||||
|
||||
# Track test results: array of "test_name:exit_code"
|
||||
declare -a TEST_RESULTS=()
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Cleanup
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
cleanup() {
|
||||
echo ""
|
||||
echo "[cleanup] Shutting down QEMU and removing socket..."
|
||||
if [ -n "$QEMU_PID" ] && kill -0 "$QEMU_PID" 2>/dev/null; then
|
||||
kill "$QEMU_PID" 2>/dev/null || true
|
||||
wait "$QEMU_PID" 2>/dev/null || true
|
||||
fi
|
||||
rm -f "$MONITOR_SOCK"
|
||||
echo "[cleanup] Done."
|
||||
}
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Helpers
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
now_ms() {
|
||||
# Millisecond timestamp (portable: Linux date +%s%N, macOS perl fallback)
|
||||
local ns
|
||||
ns=$(date +%s%N 2>/dev/null)
|
||||
if [[ "$ns" =~ ^[0-9]+$ ]]; then
|
||||
echo $(( ns / 1000000 ))
|
||||
else
|
||||
perl -MTime::HiRes=time -e 'printf "%d\n", time()*1000' 2>/dev/null || \
|
||||
echo $(( $(date +%s) * 1000 ))
|
||||
fi
|
||||
}
|
||||
|
||||
monitor_cmd() {
|
||||
# Send a command to QEMU monitor via socat and capture response
|
||||
local cmd="$1"
|
||||
local timeout="${2:-5}"
|
||||
if ! command -v socat &>/dev/null; then
|
||||
echo "ERROR: socat not found (required for QEMU monitor)" >&2
|
||||
return 1
|
||||
fi
|
||||
echo "$cmd" | socat - "UNIX-CONNECT:$MONITOR_SOCK,connect-timeout=$timeout" 2>/dev/null
|
||||
}
|
||||
|
||||
wait_for_pattern() {
|
||||
# Wait until a pattern appears in the log file, or timeout
|
||||
local log_file="$1"
|
||||
local pattern="$2"
|
||||
local timeout="$3"
|
||||
local elapsed=0
|
||||
while [ "$elapsed" -lt "$timeout" ]; do
|
||||
if [ -f "$log_file" ] && grep -q "$pattern" "$log_file" 2>/dev/null; then
|
||||
return 0
|
||||
fi
|
||||
sleep 1
|
||||
elapsed=$((elapsed + 1))
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
start_qemu() {
|
||||
# Launch QEMU in background with monitor socket
|
||||
echo "[qemu] Launching QEMU with monitor socket..."
|
||||
|
||||
rm -f "$MONITOR_SOCK"
|
||||
|
||||
local qemu_args=(
|
||||
-machine esp32s3
|
||||
-nographic
|
||||
-drive "file=$FLASH_IMAGE,if=mtd,format=raw"
|
||||
-serial "file:$LOG_DIR/qemu_uart.log"
|
||||
-no-reboot
|
||||
-monitor "unix:$MONITOR_SOCK,server,nowait"
|
||||
)
|
||||
|
||||
"$QEMU_BIN" "${qemu_args[@]}" &
|
||||
QEMU_PID=$!
|
||||
echo "[qemu] PID=$QEMU_PID"
|
||||
|
||||
# Wait for monitor socket to appear
|
||||
local waited=0
|
||||
while [ ! -S "$MONITOR_SOCK" ] && [ "$waited" -lt 10 ]; do
|
||||
sleep 1
|
||||
waited=$((waited + 1))
|
||||
done
|
||||
|
||||
if [ ! -S "$MONITOR_SOCK" ]; then
|
||||
echo "ERROR: QEMU monitor socket did not appear after 10s"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Verify QEMU is still running
|
||||
if ! kill -0 "$QEMU_PID" 2>/dev/null; then
|
||||
echo "ERROR: QEMU process exited prematurely"
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo "[qemu] Monitor socket ready: $MONITOR_SOCK"
|
||||
}
|
||||
|
||||
save_snapshot() {
|
||||
local name="$1"
|
||||
echo "[snapshot] Saving snapshot: $name"
|
||||
monitor_cmd "savevm $name" 5
|
||||
echo "[snapshot] Saved: $name"
|
||||
}
|
||||
|
||||
restore_snapshot() {
|
||||
local name="$1"
|
||||
echo "[snapshot] Restoring snapshot: $name"
|
||||
monitor_cmd "loadvm $name" 5
|
||||
echo "[snapshot] Restored: $name"
|
||||
}
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Pre-flight checks
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
echo "=== QEMU Snapshot Test Runner — ADR-061 Layer 8 ==="
|
||||
echo "QEMU binary: $QEMU_BIN"
|
||||
echo "Flash image: $FLASH_IMAGE"
|
||||
echo "Timeout/test: ${TIMEOUT_SEC}s"
|
||||
echo ""
|
||||
|
||||
if ! command -v "$QEMU_BIN" &>/dev/null; then
|
||||
echo "ERROR: QEMU binary not found: $QEMU_BIN"
|
||||
echo " Install: sudo apt install qemu-system-misc # Debian/Ubuntu"
|
||||
echo " Install: brew install qemu # macOS"
|
||||
echo " Or set QEMU_PATH to the qemu-system-xtensa binary."
|
||||
exit 3
|
||||
fi
|
||||
|
||||
if ! command -v qemu-img &>/dev/null; then
|
||||
echo "ERROR: qemu-img not found (needed for snapshot disk management)."
|
||||
echo " Install: sudo apt install qemu-utils # Debian/Ubuntu"
|
||||
echo " Install: brew install qemu # macOS"
|
||||
exit 3
|
||||
fi
|
||||
|
||||
if ! command -v socat &>/dev/null; then
|
||||
echo "ERROR: socat not found (needed for QEMU monitor communication)."
|
||||
echo " Install: sudo apt install socat # Debian/Ubuntu"
|
||||
echo " Install: brew install socat # macOS"
|
||||
exit 3
|
||||
fi
|
||||
|
||||
if [ ! -f "$FLASH_IMAGE" ]; then
|
||||
echo "ERROR: Flash image not found: $FLASH_IMAGE"
|
||||
echo "Run qemu-esp32s3-test.sh first to build the flash image."
|
||||
exit 3
|
||||
fi
|
||||
|
||||
mkdir -p "$LOG_DIR"
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Phase 1: Boot and create snapshots
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
echo "── Phase 1: Boot and snapshot creation ──"
|
||||
echo ""
|
||||
|
||||
# Clear any previous UART log
|
||||
> "$LOG_DIR/qemu_uart.log"
|
||||
|
||||
start_qemu
|
||||
|
||||
# Wait for boot (look for boot indicators, max 5s)
|
||||
echo "[boot] Waiting for firmware boot (up to 5s)..."
|
||||
if wait_for_pattern "$LOG_DIR/qemu_uart.log" "app_main\|main_task\|ESP32-S3" 5; then
|
||||
echo "[boot] Firmware booted successfully."
|
||||
else
|
||||
echo "[boot] No boot indicator found after 5s (continuing anyway)."
|
||||
fi
|
||||
|
||||
# Save post-boot snapshot
|
||||
save_snapshot "post_boot"
|
||||
echo ""
|
||||
|
||||
# Wait for first mock CSI frame (additional 5s)
|
||||
echo "[frame] Waiting for first CSI frame (up to 5s)..."
|
||||
if wait_for_pattern "$LOG_DIR/qemu_uart.log" "frame\|CSI\|mock_csi\|iq_data\|subcarrier" 5; then
|
||||
echo "[frame] First CSI frame detected."
|
||||
else
|
||||
echo "[frame] No frame indicator found after 5s (continuing anyway)."
|
||||
fi
|
||||
|
||||
# Save post-first-frame snapshot
|
||||
save_snapshot "post_first_frame"
|
||||
echo ""
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Phase 2: Run tests from snapshot
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
echo "── Phase 2: Running tests from snapshot ──"
|
||||
echo ""
|
||||
|
||||
TESTS=("test_presence" "test_fall" "test_multi_person")
|
||||
MAX_EXIT=0
|
||||
|
||||
for test_name in "${TESTS[@]}"; do
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo " Test: $test_name"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
|
||||
test_log="$LOG_DIR/${test_name}.log"
|
||||
t_start=$(now_ms)
|
||||
|
||||
# Restore to post_first_frame state
|
||||
restore_snapshot "post_first_frame"
|
||||
|
||||
# Record current log length so we can extract only new lines
|
||||
pre_lines=$(wc -l < "$LOG_DIR/qemu_uart.log" 2>/dev/null || echo 0)
|
||||
|
||||
# Let execution continue for TIMEOUT_SEC seconds
|
||||
echo "[test] Running for ${TIMEOUT_SEC}s..."
|
||||
sleep "$TIMEOUT_SEC"
|
||||
|
||||
# Capture only the new log lines produced during this test
|
||||
tail -n +$((pre_lines + 1)) "$LOG_DIR/qemu_uart.log" > "$test_log"
|
||||
|
||||
t_end=$(now_ms)
|
||||
elapsed_ms=$((t_end - t_start))
|
||||
SNAPSHOT_TOTAL_MS=$((SNAPSHOT_TOTAL_MS + elapsed_ms))
|
||||
|
||||
echo "[test] Captured $(wc -l < "$test_log") lines in ${elapsed_ms}ms"
|
||||
|
||||
# Validate
|
||||
echo "[test] Validating..."
|
||||
test_exit=0
|
||||
python3 "$SCRIPT_DIR/validate_qemu_output.py" "$test_log" || test_exit=$?
|
||||
|
||||
TEST_RESULTS+=("${test_name}:${test_exit}")
|
||||
if [ "$test_exit" -gt "$MAX_EXIT" ]; then
|
||||
MAX_EXIT=$test_exit
|
||||
fi
|
||||
|
||||
echo ""
|
||||
done
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Phase 3: Baseline timing (without snapshots) for comparison
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
echo "── Phase 3: Timing comparison ──"
|
||||
echo ""
|
||||
|
||||
# Estimate baseline: full boot (5s) + frame wait (5s) + test run per test
|
||||
BASELINE_PER_TEST=$((5 + 5 + TIMEOUT_SEC))
|
||||
BASELINE_TOTAL_MS=$((BASELINE_PER_TEST * ${#TESTS[@]} * 1000))
|
||||
SNAPSHOT_PER_TEST=$((SNAPSHOT_TOTAL_MS / ${#TESTS[@]}))
|
||||
|
||||
echo "Timing Summary:"
|
||||
echo " Tests run: ${#TESTS[@]}"
|
||||
echo " With snapshots:"
|
||||
echo " Total wall time: ${SNAPSHOT_TOTAL_MS}ms"
|
||||
echo " Per-test average: ${SNAPSHOT_PER_TEST}ms"
|
||||
echo " Without snapshots (estimated):"
|
||||
echo " Total wall time: ${BASELINE_TOTAL_MS}ms"
|
||||
echo " Per-test average: $((BASELINE_PER_TEST * 1000))ms"
|
||||
echo ""
|
||||
|
||||
if [ "$SNAPSHOT_TOTAL_MS" -gt 0 ] && [ "$BASELINE_TOTAL_MS" -gt 0 ]; then
|
||||
SPEEDUP=$((BASELINE_TOTAL_MS * 100 / SNAPSHOT_TOTAL_MS))
|
||||
echo " Speedup: ${SPEEDUP}% (${SPEEDUP}x/100)"
|
||||
else
|
||||
echo " Speedup: N/A (insufficient data)"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Summary
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
echo "── Test Results Summary ──"
|
||||
echo ""
|
||||
PASS_COUNT=0
|
||||
FAIL_COUNT=0
|
||||
for result in "${TEST_RESULTS[@]}"; do
|
||||
name="${result%%:*}"
|
||||
code="${result##*:}"
|
||||
if [ "$code" -le 1 ]; then
|
||||
echo " [PASS] $name (exit=$code)"
|
||||
PASS_COUNT=$((PASS_COUNT + 1))
|
||||
else
|
||||
echo " [FAIL] $name (exit=$code)"
|
||||
FAIL_COUNT=$((FAIL_COUNT + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo " $PASS_COUNT passed, $FAIL_COUNT failed out of ${#TESTS[@]} tests"
|
||||
echo ""
|
||||
echo "=== Snapshot Test Complete (exit code: $MAX_EXIT) ==="
|
||||
exit "$MAX_EXIT"
|
||||
1134
scripts/qemu_swarm.py
Normal file
1134
scripts/qemu_swarm.py
Normal file
File diff suppressed because it is too large
Load diff
671
scripts/swarm_health.py
Normal file
671
scripts/swarm_health.py
Normal file
|
|
@ -0,0 +1,671 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
QEMU Swarm Health Oracle (ADR-062)
|
||||
|
||||
Validates collective health of a multi-node ESP32-S3 QEMU swarm.
|
||||
Checks cross-node assertions like TDM ordering, inter-node communication,
|
||||
and swarm-level frame rates.
|
||||
|
||||
Usage:
|
||||
python3 swarm_health.py --config swarm_config.yaml --log-dir build/swarm_logs/
|
||||
python3 swarm_health.py --log-dir build/swarm_logs/ --assertions all_nodes_boot no_crashes
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
try:
|
||||
import yaml
|
||||
except ImportError:
|
||||
yaml = None # type: ignore[assignment]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ANSI helpers (disabled when not a TTY)
|
||||
# ---------------------------------------------------------------------------
|
||||
USE_COLOR = sys.stdout.isatty()
|
||||
|
||||
|
||||
def _color(text: str, code: str) -> str:
|
||||
return f"\033[{code}m{text}\033[0m" if USE_COLOR else text
|
||||
|
||||
|
||||
def green(t: str) -> str:
|
||||
return _color(t, "32")
|
||||
|
||||
|
||||
def yellow(t: str) -> str:
|
||||
return _color(t, "33")
|
||||
|
||||
|
||||
def red(t: str) -> str:
|
||||
return _color(t, "1;31")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Data types
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@dataclass
|
||||
class AssertionResult:
|
||||
"""Result of a single swarm-level assertion."""
|
||||
name: str
|
||||
passed: bool
|
||||
message: str
|
||||
severity: int # 0 = pass, 1 = warn, 2 = fail
|
||||
|
||||
|
||||
@dataclass
|
||||
class NodeLog:
|
||||
"""Parsed log for a single QEMU node."""
|
||||
node_id: int
|
||||
lines: List[str]
|
||||
text: str
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Log loading
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def load_logs(log_dir: Path, node_count: int) -> List[NodeLog]:
|
||||
"""Load qemu_node{i}.log (or node_{i}.log fallback) from *log_dir*."""
|
||||
logs: List[NodeLog] = []
|
||||
for i in range(node_count):
|
||||
path = log_dir / f"qemu_node{i}.log"
|
||||
if not path.exists():
|
||||
path = log_dir / f"node_{i}.log"
|
||||
if path.exists():
|
||||
text = path.read_text(encoding="utf-8", errors="replace")
|
||||
else:
|
||||
text = ""
|
||||
logs.append(NodeLog(node_id=i, lines=text.splitlines(), text=text))
|
||||
return logs
|
||||
|
||||
|
||||
def _node_count_from_dir(log_dir: Path) -> int:
|
||||
"""Auto-detect node count by scanning for qemu_node*.log (or node_*.log) files."""
|
||||
count = 0
|
||||
while (log_dir / f"qemu_node{count}.log").exists() or (log_dir / f"node_{count}.log").exists():
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Individual assertions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_BOOT_PATTERNS = [
|
||||
r"app_main\(\)", r"main_task:", r"main:", r"ESP32-S3 CSI Node",
|
||||
]
|
||||
|
||||
_CRASH_PATTERNS = [
|
||||
r"Guru Meditation", r"assert failed", r"abort\(\)", r"panic",
|
||||
r"LoadProhibited", r"StoreProhibited", r"InstrFetchProhibited",
|
||||
r"IllegalInstruction", r"Unhandled debug exception", r"Fatal exception",
|
||||
]
|
||||
|
||||
_HEAP_PATTERNS = [
|
||||
r"HEAP_ERROR", r"out of memory", r"heap_caps_alloc.*failed",
|
||||
r"malloc.*fail", r"heap corruption", r"CORRUPT HEAP",
|
||||
r"multi_heap", r"heap_lock",
|
||||
]
|
||||
|
||||
_FRAME_PATTERNS = [
|
||||
r"frame", r"CSI", r"mock_csi", r"iq_data", r"subcarrier",
|
||||
r"csi_collector", r"enqueue",
|
||||
]
|
||||
|
||||
_FALL_PATTERNS = [r"fall[=: ]+1", r"fall detected", r"fall_event"]
|
||||
|
||||
|
||||
def assert_all_nodes_boot(logs: List[NodeLog], timeout_s: float = 10.0) -> AssertionResult:
|
||||
"""Check each node's log for boot patterns."""
|
||||
missing: List[int] = []
|
||||
for nl in logs:
|
||||
found = any(
|
||||
re.search(p, nl.text) for p in _BOOT_PATTERNS
|
||||
)
|
||||
if not found:
|
||||
missing.append(nl.node_id)
|
||||
|
||||
if not missing:
|
||||
return AssertionResult(
|
||||
name="all_nodes_boot", passed=True,
|
||||
message=f"All {len(logs)} nodes booted (timeout={timeout_s}s)",
|
||||
severity=0,
|
||||
)
|
||||
return AssertionResult(
|
||||
name="all_nodes_boot", passed=False,
|
||||
message=f"Nodes missing boot indicator: {missing}",
|
||||
severity=2,
|
||||
)
|
||||
|
||||
|
||||
def assert_no_crashes(logs: List[NodeLog]) -> AssertionResult:
|
||||
"""Check no node has crash patterns."""
|
||||
crashed: List[str] = []
|
||||
for nl in logs:
|
||||
for line in nl.lines:
|
||||
for pat in _CRASH_PATTERNS:
|
||||
if re.search(pat, line):
|
||||
crashed.append(f"node_{nl.node_id}: {line.strip()[:100]}")
|
||||
break
|
||||
if crashed and crashed[-1].startswith(f"node_{nl.node_id}:"):
|
||||
break # one crash per node is enough
|
||||
|
||||
if not crashed:
|
||||
return AssertionResult(
|
||||
name="no_crashes", passed=True,
|
||||
message="No crash indicators in any node",
|
||||
severity=0,
|
||||
)
|
||||
return AssertionResult(
|
||||
name="no_crashes", passed=False,
|
||||
message=f"Crashes found: {crashed[0]}" + (
|
||||
f" (+{len(crashed)-1} more)" if len(crashed) > 1 else ""
|
||||
),
|
||||
severity=2,
|
||||
)
|
||||
|
||||
|
||||
def assert_tdm_no_collision(logs: List[NodeLog]) -> AssertionResult:
|
||||
"""Parse TDM slot assignments from logs, verify uniqueness."""
|
||||
slot_map: Dict[int, List[int]] = {} # slot -> [node_ids]
|
||||
tdm_pat = re.compile(r"tdm[_ ]?slot[=: ]+(\d+)", re.IGNORECASE)
|
||||
|
||||
for nl in logs:
|
||||
for line in nl.lines:
|
||||
m = tdm_pat.search(line)
|
||||
if m:
|
||||
slot = int(m.group(1))
|
||||
slot_map.setdefault(slot, [])
|
||||
if nl.node_id not in slot_map[slot]:
|
||||
slot_map[slot].append(nl.node_id)
|
||||
break # first occurrence per node
|
||||
|
||||
collisions = {s: nids for s, nids in slot_map.items() if len(nids) > 1}
|
||||
|
||||
if not slot_map:
|
||||
return AssertionResult(
|
||||
name="tdm_no_collision", passed=True,
|
||||
message="No TDM slot assignments found (may be N/A)",
|
||||
severity=0,
|
||||
)
|
||||
if not collisions:
|
||||
return AssertionResult(
|
||||
name="tdm_no_collision", passed=True,
|
||||
message=f"TDM slots unique across {len(slot_map)} assignments",
|
||||
severity=0,
|
||||
)
|
||||
return AssertionResult(
|
||||
name="tdm_no_collision", passed=False,
|
||||
message=f"TDM collisions: {collisions}",
|
||||
severity=2,
|
||||
)
|
||||
|
||||
|
||||
def assert_all_nodes_produce_frames(
|
||||
logs: List[NodeLog],
|
||||
sensor_ids: Optional[List[int]] = None,
|
||||
) -> AssertionResult:
|
||||
"""Each sensor node has CSI frame output.
|
||||
|
||||
Args:
|
||||
logs: Parsed node logs.
|
||||
sensor_ids: If provided, only check these node IDs (skip coordinators).
|
||||
If None, check all nodes (legacy behavior).
|
||||
"""
|
||||
silent: List[int] = []
|
||||
for nl in logs:
|
||||
if sensor_ids is not None and nl.node_id not in sensor_ids:
|
||||
continue
|
||||
found = any(
|
||||
re.search(p, line, re.IGNORECASE)
|
||||
for line in nl.lines for p in _FRAME_PATTERNS
|
||||
)
|
||||
if not found:
|
||||
silent.append(nl.node_id)
|
||||
|
||||
checked = len(sensor_ids) if sensor_ids is not None else len(logs)
|
||||
if not silent:
|
||||
return AssertionResult(
|
||||
name="all_nodes_produce_frames", passed=True,
|
||||
message=f"All {checked} checked nodes show frame activity",
|
||||
severity=0,
|
||||
)
|
||||
return AssertionResult(
|
||||
name="all_nodes_produce_frames", passed=False,
|
||||
message=f"Nodes with no frame activity: {silent}",
|
||||
severity=1,
|
||||
)
|
||||
|
||||
|
||||
def assert_coordinator_receives_from_all(
|
||||
logs: List[NodeLog],
|
||||
coordinator_id: int = 0,
|
||||
sensor_ids: Optional[List[int]] = None,
|
||||
) -> AssertionResult:
|
||||
"""Coordinator log shows frames from each sensor's node_id."""
|
||||
coord_log = None
|
||||
for nl in logs:
|
||||
if nl.node_id == coordinator_id:
|
||||
coord_log = nl
|
||||
break
|
||||
|
||||
if coord_log is None:
|
||||
return AssertionResult(
|
||||
name="coordinator_receives_from_all", passed=False,
|
||||
message=f"Coordinator node_{coordinator_id} log not found",
|
||||
severity=2,
|
||||
)
|
||||
|
||||
if sensor_ids is None:
|
||||
sensor_ids = [nl.node_id for nl in logs if nl.node_id != coordinator_id]
|
||||
|
||||
missing: List[int] = []
|
||||
recv_pat = re.compile(r"(from|node_id|src)[=: ]+(\d+)", re.IGNORECASE)
|
||||
received_ids: set = set()
|
||||
for line in coord_log.lines:
|
||||
m = recv_pat.search(line)
|
||||
if m:
|
||||
received_ids.add(int(m.group(2)))
|
||||
|
||||
for sid in sensor_ids:
|
||||
if sid not in received_ids:
|
||||
missing.append(sid)
|
||||
|
||||
if not missing:
|
||||
return AssertionResult(
|
||||
name="coordinator_receives_from_all", passed=True,
|
||||
message=f"Coordinator received from all sensors: {sensor_ids}",
|
||||
severity=0,
|
||||
)
|
||||
return AssertionResult(
|
||||
name="coordinator_receives_from_all", passed=False,
|
||||
message=f"Coordinator missing frames from nodes: {missing}",
|
||||
severity=1,
|
||||
)
|
||||
|
||||
|
||||
def assert_fall_detected(logs: List[NodeLog], node_id: int) -> AssertionResult:
|
||||
"""Specific node reports fall detection."""
|
||||
for nl in logs:
|
||||
if nl.node_id == node_id:
|
||||
found = any(
|
||||
re.search(p, line, re.IGNORECASE)
|
||||
for line in nl.lines for p in _FALL_PATTERNS
|
||||
)
|
||||
if found:
|
||||
return AssertionResult(
|
||||
name=f"fall_detected_node_{node_id}", passed=True,
|
||||
message=f"Node {node_id} reported fall event",
|
||||
severity=0,
|
||||
)
|
||||
return AssertionResult(
|
||||
name=f"fall_detected_node_{node_id}", passed=False,
|
||||
message=f"Node {node_id} did not report fall event",
|
||||
severity=1,
|
||||
)
|
||||
|
||||
return AssertionResult(
|
||||
name=f"fall_detected_node_{node_id}", passed=False,
|
||||
message=f"Node {node_id} log not found",
|
||||
severity=2,
|
||||
)
|
||||
|
||||
|
||||
def assert_frame_rate_above(logs: List[NodeLog], min_fps: float = 10.0) -> AssertionResult:
|
||||
"""Each node meets minimum frame rate."""
|
||||
fps_pat = re.compile(r"(?:fps|frame.?rate)[=: ]+([0-9.]+)", re.IGNORECASE)
|
||||
count_pat = re.compile(r"(?:frame[_ ]?count|frames)[=: ]+(\d+)", re.IGNORECASE)
|
||||
below: List[str] = []
|
||||
|
||||
for nl in logs:
|
||||
best_fps: Optional[float] = None
|
||||
# Try explicit FPS
|
||||
for line in nl.lines:
|
||||
m = fps_pat.search(line)
|
||||
if m:
|
||||
try:
|
||||
best_fps = max(best_fps or 0.0, float(m.group(1)))
|
||||
except ValueError:
|
||||
pass
|
||||
# Fallback: estimate from frame count (assume 1-second intervals)
|
||||
if best_fps is None:
|
||||
counts = []
|
||||
for line in nl.lines:
|
||||
m = count_pat.search(line)
|
||||
if m:
|
||||
try:
|
||||
counts.append(int(m.group(1)))
|
||||
except ValueError:
|
||||
pass
|
||||
if len(counts) >= 2:
|
||||
best_fps = float(counts[-1] - counts[0]) / max(len(counts) - 1, 1)
|
||||
|
||||
if best_fps is not None and best_fps < min_fps:
|
||||
below.append(f"node_{nl.node_id}={best_fps:.1f}")
|
||||
|
||||
if not below:
|
||||
return AssertionResult(
|
||||
name="frame_rate_above", passed=True,
|
||||
message=f"All nodes meet minimum {min_fps} fps",
|
||||
severity=0,
|
||||
)
|
||||
return AssertionResult(
|
||||
name="frame_rate_above", passed=False,
|
||||
message=f"Nodes below {min_fps} fps: {', '.join(below)}",
|
||||
severity=1,
|
||||
)
|
||||
|
||||
|
||||
def assert_max_boot_time(logs: List[NodeLog], max_seconds: float = 10.0) -> AssertionResult:
|
||||
"""All nodes boot within N seconds (based on timestamp in log)."""
|
||||
boot_time_pat = re.compile(r"\((\d+)\)\s", re.IGNORECASE)
|
||||
slow: List[str] = []
|
||||
|
||||
for nl in logs:
|
||||
boot_found = False
|
||||
for line in nl.lines:
|
||||
if any(re.search(p, line) for p in _BOOT_PATTERNS):
|
||||
boot_found = True
|
||||
m = boot_time_pat.search(line)
|
||||
if m:
|
||||
ms = int(m.group(1))
|
||||
if ms > max_seconds * 1000:
|
||||
slow.append(f"node_{nl.node_id}={ms}ms")
|
||||
break
|
||||
if not boot_found:
|
||||
slow.append(f"node_{nl.node_id}=no_boot")
|
||||
|
||||
if not slow:
|
||||
return AssertionResult(
|
||||
name="max_boot_time", passed=True,
|
||||
message=f"All nodes booted within {max_seconds}s",
|
||||
severity=0,
|
||||
)
|
||||
return AssertionResult(
|
||||
name="max_boot_time", passed=False,
|
||||
message=f"Slow/missing boot: {', '.join(slow)}",
|
||||
severity=1,
|
||||
)
|
||||
|
||||
|
||||
def assert_no_heap_errors(logs: List[NodeLog]) -> AssertionResult:
|
||||
"""No OOM/heap errors in any log."""
|
||||
errors: List[str] = []
|
||||
for nl in logs:
|
||||
for line in nl.lines:
|
||||
for pat in _HEAP_PATTERNS:
|
||||
if re.search(pat, line, re.IGNORECASE):
|
||||
errors.append(f"node_{nl.node_id}: {line.strip()[:100]}")
|
||||
break
|
||||
if errors and errors[-1].startswith(f"node_{nl.node_id}:"):
|
||||
break
|
||||
|
||||
if not errors:
|
||||
return AssertionResult(
|
||||
name="no_heap_errors", passed=True,
|
||||
message="No heap errors in any node",
|
||||
severity=0,
|
||||
)
|
||||
return AssertionResult(
|
||||
name="no_heap_errors", passed=False,
|
||||
message=f"Heap errors: {errors[0]}" + (
|
||||
f" (+{len(errors)-1} more)" if len(errors) > 1 else ""
|
||||
),
|
||||
severity=2,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Assertion registry & dispatcher
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
ASSERTION_REGISTRY: Dict[str, Any] = {
|
||||
"all_nodes_boot": assert_all_nodes_boot,
|
||||
"no_crashes": assert_no_crashes,
|
||||
"tdm_no_collision": assert_tdm_no_collision,
|
||||
"all_nodes_produce_frames": assert_all_nodes_produce_frames,
|
||||
"coordinator_receives_from_all": assert_coordinator_receives_from_all,
|
||||
"frame_rate_above": assert_frame_rate_above,
|
||||
"max_boot_time": assert_max_boot_time,
|
||||
"no_heap_errors": assert_no_heap_errors,
|
||||
# fall_detected is parameterized, handled separately
|
||||
}
|
||||
|
||||
|
||||
def _parse_assertion_spec(spec: Any) -> tuple:
|
||||
"""Parse a YAML assertion entry into (name, kwargs).
|
||||
|
||||
Supported forms:
|
||||
- "all_nodes_boot" -> ("all_nodes_boot", {})
|
||||
- {"frame_rate_above": 15} -> ("frame_rate_above", {"min_fps": 15})
|
||||
- "fall_detected_by_node_2" -> ("fall_detected", {"node_id": 2})
|
||||
- {"max_boot_time_s": 10} -> ("max_boot_time", {"max_seconds": 10})
|
||||
"""
|
||||
if isinstance(spec, str):
|
||||
# Check for fall_detected_by_node_N pattern
|
||||
m = re.match(r"fall_detected_by_node_(\d+)", spec)
|
||||
if m:
|
||||
return ("fall_detected", {"node_id": int(m.group(1))})
|
||||
return (spec, {})
|
||||
|
||||
if isinstance(spec, dict):
|
||||
for key, val in spec.items():
|
||||
m = re.match(r"fall_detected_by_node_(\d+)", str(key))
|
||||
if m:
|
||||
return ("fall_detected", {"node_id": int(m.group(1))})
|
||||
if key == "frame_rate_above":
|
||||
return ("frame_rate_above", {"min_fps": float(val)})
|
||||
if key == "max_boot_time_s":
|
||||
return ("max_boot_time", {"max_seconds": float(val)})
|
||||
if key == "coordinator_receives_from_all":
|
||||
return ("coordinator_receives_from_all", {})
|
||||
return (str(key), {})
|
||||
|
||||
return (str(spec), {})
|
||||
|
||||
|
||||
def run_assertions(
|
||||
logs: List[NodeLog],
|
||||
assertion_specs: List[Any],
|
||||
config: Optional[Dict] = None,
|
||||
) -> List[AssertionResult]:
|
||||
"""Run all requested assertions against loaded logs."""
|
||||
results: List[AssertionResult] = []
|
||||
|
||||
# Derive coordinator/sensor IDs from config if available
|
||||
coordinator_id = 0
|
||||
sensor_ids: Optional[List[int]] = None
|
||||
if config and "nodes" in config:
|
||||
for node_def in config["nodes"]:
|
||||
if node_def.get("role") == "coordinator":
|
||||
coordinator_id = node_def.get("node_id", 0)
|
||||
sensor_ids = [
|
||||
n["node_id"] for n in config["nodes"]
|
||||
if n.get("role") == "sensor"
|
||||
]
|
||||
|
||||
for spec in assertion_specs:
|
||||
name, kwargs = _parse_assertion_spec(spec)
|
||||
|
||||
if name == "fall_detected":
|
||||
results.append(assert_fall_detected(logs, **kwargs))
|
||||
elif name == "coordinator_receives_from_all":
|
||||
results.append(assert_coordinator_receives_from_all(
|
||||
logs, coordinator_id=coordinator_id, sensor_ids=sensor_ids,
|
||||
))
|
||||
elif name == "all_nodes_produce_frames":
|
||||
results.append(assert_all_nodes_produce_frames(
|
||||
logs, sensor_ids=sensor_ids, **kwargs,
|
||||
))
|
||||
elif name in ASSERTION_REGISTRY:
|
||||
fn = ASSERTION_REGISTRY[name]
|
||||
results.append(fn(logs, **kwargs))
|
||||
else:
|
||||
results.append(AssertionResult(
|
||||
name=name, passed=False,
|
||||
message=f"Unknown assertion: {name}",
|
||||
severity=1,
|
||||
))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Report printing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def print_report(results: List[AssertionResult], swarm_name: str = "") -> int:
|
||||
"""Print the assertion report and return max severity."""
|
||||
header = "QEMU Swarm Health Report (ADR-062)"
|
||||
if swarm_name:
|
||||
header += f" - {swarm_name}"
|
||||
|
||||
print()
|
||||
print("=" * 60)
|
||||
print(f" {header}")
|
||||
print("=" * 60)
|
||||
print()
|
||||
|
||||
max_sev = 0
|
||||
for r in results:
|
||||
if r.severity == 0:
|
||||
icon = green("PASS")
|
||||
elif r.severity == 1:
|
||||
icon = yellow("WARN")
|
||||
else:
|
||||
icon = red("FAIL")
|
||||
|
||||
print(f" [{icon}] {r.name}: {r.message}")
|
||||
max_sev = max(max_sev, r.severity)
|
||||
|
||||
print()
|
||||
passed = sum(1 for r in results if r.passed)
|
||||
total = len(results)
|
||||
summary = f" {passed}/{total} assertions passed"
|
||||
|
||||
if max_sev == 0:
|
||||
print(green(summary))
|
||||
elif max_sev == 1:
|
||||
print(yellow(summary + " (with warnings)"))
|
||||
else:
|
||||
print(red(summary + " (with failures)"))
|
||||
|
||||
print()
|
||||
return max_sev
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="QEMU Swarm Health Oracle (ADR-062)",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog=(
|
||||
"Example:\n"
|
||||
" python3 swarm_health.py --config scripts/swarm_presets/standard.yaml \\\n"
|
||||
" --log-dir build/swarm_logs/\n"
|
||||
"\n"
|
||||
" python3 swarm_health.py --log-dir build/swarm_logs/ \\\n"
|
||||
" --assertions all_nodes_boot no_crashes\n"
|
||||
"\n"
|
||||
"Example output:\n"
|
||||
" ============================================================\n"
|
||||
" QEMU Swarm Health Report (ADR-062) - standard\n"
|
||||
" ============================================================\n"
|
||||
"\n"
|
||||
" [PASS] all_nodes_boot: All 3 nodes booted (timeout=10.0s)\n"
|
||||
" [PASS] no_crashes: No crash indicators in any node\n"
|
||||
" [PASS] tdm_no_collision: TDM slots unique across 3 assignments\n"
|
||||
" [PASS] all_nodes_produce_frames: All 3 nodes show frame activity\n"
|
||||
" [PASS] coordinator_receives_from_all: Coordinator received from all\n"
|
||||
" [WARN] fall_detected_node_2: Node 2 did not report fall event\n"
|
||||
" [PASS] frame_rate_above: All nodes meet minimum 15.0 fps\n"
|
||||
"\n"
|
||||
" 6/7 assertions passed (with warnings)\n"
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--config", type=str, default=None,
|
||||
help="Path to swarm YAML config (defines nodes and assertions)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--log-dir", type=str, required=True,
|
||||
help="Directory containing node_0.log, node_1.log, etc.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--assertions", nargs="*", default=None,
|
||||
help="Override assertions (space-separated). Ignores YAML assertion list.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--node-count", type=int, default=None,
|
||||
help="Number of nodes (auto-detected from log files if omitted)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
log_dir = Path(args.log_dir)
|
||||
if not log_dir.is_dir():
|
||||
print(f"ERROR: Log directory not found: {log_dir}", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
# Load YAML config if provided
|
||||
config: Optional[Dict] = None
|
||||
swarm_name = ""
|
||||
yaml_assertions: List[Any] = []
|
||||
|
||||
if args.config:
|
||||
if yaml is None:
|
||||
print("ERROR: PyYAML is required for --config. Install with: pip install pyyaml",
|
||||
file=sys.stderr)
|
||||
sys.exit(2)
|
||||
config_path = Path(args.config)
|
||||
if not config_path.exists():
|
||||
print(f"ERROR: Config file not found: {config_path}", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
with open(config_path, "r") as f:
|
||||
config = yaml.safe_load(f)
|
||||
swarm_name = config.get("swarm", {}).get("name", "")
|
||||
yaml_assertions = config.get("assertions", [])
|
||||
|
||||
# Determine node count
|
||||
if args.node_count is not None:
|
||||
node_count = args.node_count
|
||||
elif config and "nodes" in config:
|
||||
node_count = len(config["nodes"])
|
||||
else:
|
||||
node_count = _node_count_from_dir(log_dir)
|
||||
|
||||
if node_count == 0:
|
||||
print("ERROR: No node logs found and node count not specified.", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
# Load logs
|
||||
logs = load_logs(log_dir, node_count)
|
||||
|
||||
# Determine which assertions to run
|
||||
if args.assertions is not None:
|
||||
assertion_specs = args.assertions
|
||||
elif yaml_assertions:
|
||||
assertion_specs = yaml_assertions
|
||||
else:
|
||||
# Default set
|
||||
assertion_specs = ["all_nodes_boot", "no_crashes", "no_heap_errors"]
|
||||
|
||||
# Run assertions
|
||||
results = run_assertions(logs, assertion_specs, config)
|
||||
|
||||
# Print report and exit
|
||||
max_sev = print_report(results, swarm_name)
|
||||
sys.exit(max_sev)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
31
scripts/swarm_presets/ci_matrix.yaml
Normal file
31
scripts/swarm_presets/ci_matrix.yaml
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
# CI-optimized preset: 3 nodes, star topology, 30s, minimal assertions
|
||||
swarm:
|
||||
name: ci-matrix
|
||||
duration_s: 30
|
||||
topology: star
|
||||
aggregator_port: 5005
|
||||
|
||||
nodes:
|
||||
- role: coordinator
|
||||
node_id: 0
|
||||
scenario: 0
|
||||
channel: 6
|
||||
edge_tier: 1
|
||||
|
||||
- role: sensor
|
||||
node_id: 1
|
||||
scenario: 1
|
||||
channel: 6
|
||||
tdm_slot: 1
|
||||
|
||||
- role: sensor
|
||||
node_id: 2
|
||||
scenario: 2
|
||||
channel: 6
|
||||
tdm_slot: 2
|
||||
|
||||
assertions:
|
||||
- all_nodes_boot
|
||||
- no_crashes
|
||||
- tdm_no_collision
|
||||
- max_boot_time_s: 10
|
||||
49
scripts/swarm_presets/heterogeneous.yaml
Normal file
49
scripts/swarm_presets/heterogeneous.yaml
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
# Mixed scenarios: 5 nodes with different CSI scenarios, star topology, 90s
|
||||
swarm:
|
||||
name: heterogeneous
|
||||
duration_s: 90
|
||||
topology: star
|
||||
aggregator_port: 5005
|
||||
|
||||
nodes:
|
||||
- role: coordinator
|
||||
node_id: 0
|
||||
scenario: 0
|
||||
channel: 6
|
||||
edge_tier: 2
|
||||
is_gateway: true
|
||||
|
||||
- role: sensor
|
||||
node_id: 1
|
||||
scenario: 1
|
||||
channel: 6
|
||||
tdm_slot: 1
|
||||
|
||||
- role: sensor
|
||||
node_id: 2
|
||||
scenario: 2
|
||||
channel: 6
|
||||
tdm_slot: 2
|
||||
|
||||
- role: sensor
|
||||
node_id: 3
|
||||
scenario: 3
|
||||
channel: 6
|
||||
tdm_slot: 3
|
||||
|
||||
- role: sensor
|
||||
node_id: 4
|
||||
scenario: 5
|
||||
channel: 11
|
||||
tdm_slot: 4
|
||||
|
||||
assertions:
|
||||
- all_nodes_boot
|
||||
- no_crashes
|
||||
- tdm_no_collision
|
||||
- all_nodes_produce_frames
|
||||
- coordinator_receives_from_all
|
||||
- fall_detected_by_node_3
|
||||
- no_heap_errors
|
||||
- frame_rate_above: 12
|
||||
- max_boot_time_s: 12
|
||||
54
scripts/swarm_presets/large_mesh.yaml
Normal file
54
scripts/swarm_presets/large_mesh.yaml
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
# Scale test: 6 fully-connected nodes in mesh topology, 90s
|
||||
swarm:
|
||||
name: large-mesh
|
||||
duration_s: 90
|
||||
topology: mesh
|
||||
aggregator_port: 5005
|
||||
|
||||
nodes:
|
||||
- role: coordinator
|
||||
node_id: 0
|
||||
scenario: 0
|
||||
channel: 6
|
||||
edge_tier: 2
|
||||
is_gateway: true
|
||||
|
||||
- role: sensor
|
||||
node_id: 1
|
||||
scenario: 1
|
||||
channel: 6
|
||||
tdm_slot: 1
|
||||
|
||||
- role: sensor
|
||||
node_id: 2
|
||||
scenario: 2
|
||||
channel: 6
|
||||
tdm_slot: 2
|
||||
|
||||
- role: sensor
|
||||
node_id: 3
|
||||
scenario: 3
|
||||
channel: 6
|
||||
tdm_slot: 3
|
||||
|
||||
- role: sensor
|
||||
node_id: 4
|
||||
scenario: 4
|
||||
channel: 6
|
||||
tdm_slot: 4
|
||||
|
||||
- role: sensor
|
||||
node_id: 5
|
||||
scenario: 5
|
||||
channel: 6
|
||||
tdm_slot: 5
|
||||
|
||||
assertions:
|
||||
- all_nodes_boot
|
||||
- no_crashes
|
||||
- tdm_no_collision
|
||||
- all_nodes_produce_frames
|
||||
- coordinator_receives_from_all
|
||||
- no_heap_errors
|
||||
- frame_rate_above: 10
|
||||
- max_boot_time_s: 15
|
||||
39
scripts/swarm_presets/line_relay.yaml
Normal file
39
scripts/swarm_presets/line_relay.yaml
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
# Multi-hop relay chain: 4 nodes in line topology, 60s
|
||||
swarm:
|
||||
name: line-relay
|
||||
duration_s: 60
|
||||
topology: line
|
||||
aggregator_port: 5005
|
||||
|
||||
nodes:
|
||||
- role: gateway
|
||||
node_id: 0
|
||||
scenario: 0
|
||||
channel: 6
|
||||
edge_tier: 2
|
||||
is_gateway: true
|
||||
|
||||
- role: coordinator
|
||||
node_id: 1
|
||||
scenario: 0
|
||||
channel: 6
|
||||
edge_tier: 1
|
||||
|
||||
- role: sensor
|
||||
node_id: 2
|
||||
scenario: 2
|
||||
channel: 6
|
||||
tdm_slot: 2
|
||||
|
||||
- role: sensor
|
||||
node_id: 3
|
||||
scenario: 1
|
||||
channel: 6
|
||||
tdm_slot: 3
|
||||
|
||||
assertions:
|
||||
- all_nodes_boot
|
||||
- no_crashes
|
||||
- tdm_no_collision
|
||||
- all_nodes_produce_frames
|
||||
- max_boot_time_s: 12
|
||||
41
scripts/swarm_presets/ring_fault.yaml
Normal file
41
scripts/swarm_presets/ring_fault.yaml
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
# Ring topology with fault injection: 4 nodes, 75s
|
||||
swarm:
|
||||
name: ring-fault
|
||||
duration_s: 75
|
||||
topology: ring
|
||||
aggregator_port: 5005
|
||||
|
||||
nodes:
|
||||
- role: coordinator
|
||||
node_id: 0
|
||||
scenario: 0
|
||||
channel: 6
|
||||
edge_tier: 2
|
||||
is_gateway: true
|
||||
|
||||
- role: sensor
|
||||
node_id: 1
|
||||
scenario: 1
|
||||
channel: 6
|
||||
tdm_slot: 1
|
||||
|
||||
- role: sensor
|
||||
node_id: 2
|
||||
scenario: 2
|
||||
channel: 6
|
||||
tdm_slot: 2
|
||||
|
||||
- role: sensor
|
||||
node_id: 3
|
||||
scenario: 3
|
||||
channel: 6
|
||||
tdm_slot: 3
|
||||
|
||||
assertions:
|
||||
- all_nodes_boot
|
||||
- no_crashes
|
||||
- tdm_no_collision
|
||||
- all_nodes_produce_frames
|
||||
- coordinator_receives_from_all
|
||||
- no_heap_errors
|
||||
- max_boot_time_s: 12
|
||||
24
scripts/swarm_presets/smoke.yaml
Normal file
24
scripts/swarm_presets/smoke.yaml
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
# Quick CI smoke test: 2 nodes, star topology, 15s duration
|
||||
swarm:
|
||||
name: smoke
|
||||
duration_s: 15
|
||||
topology: star
|
||||
aggregator_port: 5005
|
||||
|
||||
nodes:
|
||||
- role: coordinator
|
||||
node_id: 0
|
||||
scenario: 0
|
||||
channel: 6
|
||||
edge_tier: 1
|
||||
|
||||
- role: sensor
|
||||
node_id: 1
|
||||
scenario: 1
|
||||
channel: 6
|
||||
tdm_slot: 1
|
||||
|
||||
assertions:
|
||||
- all_nodes_boot
|
||||
- no_crashes
|
||||
- max_boot_time_s: 10
|
||||
36
scripts/swarm_presets/standard.yaml
Normal file
36
scripts/swarm_presets/standard.yaml
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
# Standard 3-node test: 2 sensors + 1 coordinator, star topology, 60s
|
||||
swarm:
|
||||
name: standard
|
||||
duration_s: 60
|
||||
topology: star
|
||||
aggregator_port: 5005
|
||||
|
||||
nodes:
|
||||
- role: coordinator
|
||||
node_id: 0
|
||||
scenario: 0
|
||||
channel: 6
|
||||
edge_tier: 2
|
||||
is_gateway: true
|
||||
|
||||
- role: sensor
|
||||
node_id: 1
|
||||
scenario: 2
|
||||
channel: 6
|
||||
tdm_slot: 1
|
||||
|
||||
- role: sensor
|
||||
node_id: 2
|
||||
scenario: 3
|
||||
channel: 6
|
||||
tdm_slot: 2
|
||||
|
||||
assertions:
|
||||
- all_nodes_boot
|
||||
- no_crashes
|
||||
- tdm_no_collision
|
||||
- all_nodes_produce_frames
|
||||
- coordinator_receives_from_all
|
||||
- fall_detected_by_node_2
|
||||
- frame_rate_above: 15
|
||||
- max_boot_time_s: 10
|
||||
504
scripts/validate_mesh_test.py
Normal file
504
scripts/validate_mesh_test.py
Normal file
|
|
@ -0,0 +1,504 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
QEMU Multi-Node Mesh Validation (ADR-061 Layer 3)
|
||||
|
||||
Validates the output of a multi-node mesh simulation run by qemu-mesh-test.sh.
|
||||
Parses the aggregator results JSON and per-node UART logs, then runs 6 checks:
|
||||
|
||||
1. All nodes booted - every node log contains a boot indicator
|
||||
2. TDM ordering - slot assignments are sequential 0..N-1
|
||||
3. No slot collision - no two nodes share a TDM slot
|
||||
4. Frame count balance - per-node frame counts within +/-10%
|
||||
5. ADR-018 compliance - magic 0xC5110001 present in frames
|
||||
6. Vitals per node - each node produced vitals output
|
||||
|
||||
Usage:
|
||||
python3 validate_mesh_test.py --nodes N [results.json] [--log node0.log] ...
|
||||
|
||||
Exit codes:
|
||||
0 All checks passed (or only SKIP-level)
|
||||
1 Warnings (non-critical checks failed)
|
||||
2 Errors (critical checks failed)
|
||||
3 Fatal (crash or missing nodes)
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
from enum import IntEnum
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Severity / reporting (matches validate_qemu_output.py pattern)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class Severity(IntEnum):
|
||||
PASS = 0
|
||||
SKIP = 1
|
||||
WARN = 2
|
||||
ERROR = 3
|
||||
FATAL = 4
|
||||
|
||||
|
||||
USE_COLOR = sys.stdout.isatty()
|
||||
|
||||
|
||||
def color(text: str, code: str) -> str:
|
||||
if not USE_COLOR:
|
||||
return text
|
||||
return f"\033[{code}m{text}\033[0m"
|
||||
|
||||
|
||||
def green(text: str) -> str:
|
||||
return color(text, "32")
|
||||
|
||||
|
||||
def yellow(text: str) -> str:
|
||||
return color(text, "33")
|
||||
|
||||
|
||||
def red(text: str) -> str:
|
||||
return color(text, "31")
|
||||
|
||||
|
||||
def bold_red(text: str) -> str:
|
||||
return color(text, "1;31")
|
||||
|
||||
|
||||
@dataclass
|
||||
class CheckResult:
|
||||
name: str
|
||||
severity: Severity
|
||||
message: str
|
||||
count: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValidationReport:
|
||||
checks: List[CheckResult] = field(default_factory=list)
|
||||
|
||||
def add(self, name: str, severity: Severity, message: str, count: int = 0):
|
||||
self.checks.append(CheckResult(name, severity, message, count))
|
||||
|
||||
@property
|
||||
def max_severity(self) -> Severity:
|
||||
if not self.checks:
|
||||
return Severity.PASS
|
||||
return max(c.severity for c in self.checks)
|
||||
|
||||
def print_report(self):
|
||||
print("\n" + "=" * 60)
|
||||
print(" Multi-Node Mesh Validation Report (ADR-061 Layer 3)")
|
||||
print("=" * 60 + "\n")
|
||||
|
||||
for check in self.checks:
|
||||
if check.severity == Severity.PASS:
|
||||
icon = green("PASS")
|
||||
elif check.severity == Severity.SKIP:
|
||||
icon = yellow("SKIP")
|
||||
elif check.severity == Severity.WARN:
|
||||
icon = yellow("WARN")
|
||||
elif check.severity == Severity.ERROR:
|
||||
icon = red("FAIL")
|
||||
else:
|
||||
icon = bold_red("FATAL")
|
||||
|
||||
count_str = f" (count={check.count})" if check.count > 0 else ""
|
||||
print(f" [{icon}] {check.name}: {check.message}{count_str}")
|
||||
|
||||
print()
|
||||
|
||||
passed = sum(1 for c in self.checks if c.severity <= Severity.SKIP)
|
||||
total = len(self.checks)
|
||||
summary = f" {passed}/{total} checks passed"
|
||||
|
||||
max_sev = self.max_severity
|
||||
if max_sev <= Severity.SKIP:
|
||||
print(green(summary))
|
||||
elif max_sev == Severity.WARN:
|
||||
print(yellow(summary + " (with warnings)"))
|
||||
elif max_sev == Severity.ERROR:
|
||||
print(red(summary + " (with errors)"))
|
||||
else:
|
||||
print(bold_red(summary + " (FATAL issues detected)"))
|
||||
|
||||
print()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Log parsing helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def check_node_booted(log_text: str) -> bool:
|
||||
"""Return True if the log shows a boot indicator."""
|
||||
boot_patterns = [r"app_main\(\)", r"main_task:", r"main:", r"ESP32-S3 CSI Node"]
|
||||
return any(re.search(p, log_text) for p in boot_patterns)
|
||||
|
||||
|
||||
def check_node_crashed(log_text: str) -> Optional[str]:
|
||||
"""Return first crash line or None."""
|
||||
crash_patterns = [
|
||||
r"Guru Meditation", r"assert failed", r"abort\(\)",
|
||||
r"panic", r"LoadProhibited", r"StoreProhibited",
|
||||
r"InstrFetchProhibited", r"IllegalInstruction",
|
||||
]
|
||||
for line in log_text.splitlines():
|
||||
for pat in crash_patterns:
|
||||
if re.search(pat, line):
|
||||
return line.strip()[:120]
|
||||
return None
|
||||
|
||||
|
||||
def extract_node_id_from_log(log_text: str) -> Optional[int]:
|
||||
"""Try to extract the node_id from UART log lines."""
|
||||
patterns = [
|
||||
r"node_id[=: ]+(\d+)",
|
||||
r"Node ID[=: ]+(\d+)",
|
||||
r"TDM slot[=: ]+(\d+)",
|
||||
]
|
||||
for line in log_text.splitlines():
|
||||
for pat in patterns:
|
||||
m = re.search(pat, line, re.IGNORECASE)
|
||||
if m:
|
||||
try:
|
||||
return int(m.group(1))
|
||||
except (ValueError, IndexError):
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def check_vitals_in_log(log_text: str) -> bool:
|
||||
"""Return True if the log contains vitals output."""
|
||||
vitals_patterns = [r"vitals", r"breathing", r"breathing_bpm",
|
||||
r"heart_rate", r"heartrate"]
|
||||
return any(
|
||||
re.search(p, line, re.IGNORECASE)
|
||||
for line in log_text.splitlines()
|
||||
for p in vitals_patterns
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Validation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def validate_mesh(
|
||||
n_nodes: int,
|
||||
results_path: Optional[Path],
|
||||
log_paths: List[Path],
|
||||
) -> ValidationReport:
|
||||
"""Run all 6 mesh validation checks."""
|
||||
report = ValidationReport()
|
||||
|
||||
# Load aggregator results if available
|
||||
results: Optional[dict] = None
|
||||
if results_path:
|
||||
if not results_path.exists():
|
||||
print(f"WARNING: Aggregator results file not found: {results_path}",
|
||||
file=sys.stderr)
|
||||
report.add("Results JSON", Severity.WARN,
|
||||
f"Results file not found: {results_path}")
|
||||
else:
|
||||
try:
|
||||
results = json.loads(results_path.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError) as exc:
|
||||
report.add("Results JSON", Severity.ERROR,
|
||||
f"Failed to parse results: {exc}")
|
||||
|
||||
# Load per-node logs
|
||||
node_logs: Dict[int, str] = {}
|
||||
for idx, lp in enumerate(log_paths):
|
||||
if lp.exists():
|
||||
node_logs[idx] = lp.read_text(encoding="utf-8", errors="replace")
|
||||
else:
|
||||
node_logs[idx] = ""
|
||||
|
||||
# ---- Check 1: All nodes booted ----
|
||||
booted = []
|
||||
not_booted = []
|
||||
crashed = []
|
||||
for idx in range(n_nodes):
|
||||
log_text = node_logs.get(idx, "")
|
||||
if not log_text.strip():
|
||||
not_booted.append(idx)
|
||||
continue
|
||||
crash_line = check_node_crashed(log_text)
|
||||
if crash_line:
|
||||
crashed.append((idx, crash_line))
|
||||
if check_node_booted(log_text):
|
||||
booted.append(idx)
|
||||
else:
|
||||
not_booted.append(idx)
|
||||
|
||||
if crashed:
|
||||
crash_desc = "; ".join(f"node {i}: {msg}" for i, msg in crashed)
|
||||
report.add("All nodes booted", Severity.FATAL,
|
||||
f"Crash detected: {crash_desc}", count=len(crashed))
|
||||
elif len(booted) == n_nodes:
|
||||
report.add("All nodes booted", Severity.PASS,
|
||||
f"All {n_nodes} nodes booted successfully", count=n_nodes)
|
||||
elif len(booted) == 0:
|
||||
report.add("All nodes booted", Severity.FATAL,
|
||||
f"No nodes booted (expected {n_nodes})")
|
||||
else:
|
||||
missing = ", ".join(str(i) for i in not_booted)
|
||||
report.add("All nodes booted", Severity.ERROR,
|
||||
f"{len(booted)}/{n_nodes} booted; missing: [{missing}]",
|
||||
count=len(booted))
|
||||
|
||||
# ---- Check 2: TDM ordering ----
|
||||
# Extract TDM slots either from aggregator results or from logs
|
||||
tdm_slots: Dict[int, int] = {}
|
||||
|
||||
# Try aggregator results first
|
||||
if results and "nodes" in results:
|
||||
for node_entry in results["nodes"]:
|
||||
nid = node_entry.get("node_id")
|
||||
slot = node_entry.get("tdm_slot")
|
||||
if nid is not None and slot is not None:
|
||||
tdm_slots[int(nid)] = int(slot)
|
||||
|
||||
# Fall back to log extraction
|
||||
if not tdm_slots:
|
||||
for idx in range(n_nodes):
|
||||
log_text = node_logs.get(idx, "")
|
||||
nid = extract_node_id_from_log(log_text)
|
||||
if nid is not None:
|
||||
tdm_slots[idx] = nid
|
||||
|
||||
if len(tdm_slots) == n_nodes:
|
||||
expected = list(range(n_nodes))
|
||||
actual = [tdm_slots.get(i, -1) for i in range(n_nodes)]
|
||||
if actual == expected:
|
||||
report.add("TDM ordering", Severity.PASS,
|
||||
f"Slots sequential 0..{n_nodes - 1}")
|
||||
else:
|
||||
report.add("TDM ordering", Severity.ERROR,
|
||||
f"Expected slots {expected}, got {actual}")
|
||||
elif len(tdm_slots) > 0:
|
||||
report.add("TDM ordering", Severity.WARN,
|
||||
f"Only {len(tdm_slots)}/{n_nodes} TDM slots detected",
|
||||
count=len(tdm_slots))
|
||||
else:
|
||||
report.add("TDM ordering", Severity.SKIP,
|
||||
"No TDM slot info found in results or logs")
|
||||
|
||||
# ---- Check 3: No slot collision ----
|
||||
if tdm_slots:
|
||||
slot_to_nodes: Dict[int, List[int]] = {}
|
||||
for nid, slot in tdm_slots.items():
|
||||
slot_to_nodes.setdefault(slot, []).append(nid)
|
||||
|
||||
collisions = {s: nodes for s, nodes in slot_to_nodes.items() if len(nodes) > 1}
|
||||
if not collisions:
|
||||
report.add("No slot collision", Severity.PASS,
|
||||
f"All {len(tdm_slots)} slots unique")
|
||||
else:
|
||||
desc = "; ".join(f"slot {s}: nodes {ns}" for s, ns in collisions.items())
|
||||
report.add("No slot collision", Severity.ERROR,
|
||||
f"Slot collisions: {desc}", count=len(collisions))
|
||||
else:
|
||||
report.add("No slot collision", Severity.SKIP,
|
||||
"No TDM slot data to check for collisions")
|
||||
|
||||
# ---- Check 4: Frame count balance (within +/-10%) ----
|
||||
frame_counts: Dict[int, int] = {}
|
||||
|
||||
# Try aggregator results
|
||||
if results and "nodes" in results:
|
||||
for node_entry in results["nodes"]:
|
||||
nid = node_entry.get("node_id")
|
||||
fc = node_entry.get("frame_count", node_entry.get("frames", 0))
|
||||
if nid is not None:
|
||||
frame_counts[int(nid)] = int(fc)
|
||||
|
||||
# Fall back to log extraction
|
||||
if not frame_counts:
|
||||
for idx in range(n_nodes):
|
||||
log_text = node_logs.get(idx, "")
|
||||
frame_pats = [
|
||||
r"frame[_ ]count[=: ]+(\d+)",
|
||||
r"frames?[=: ]+(\d+)",
|
||||
r"emitted[=: ]+(\d+)",
|
||||
]
|
||||
max_fc = 0
|
||||
for line in log_text.splitlines():
|
||||
for pat in frame_pats:
|
||||
m = re.search(pat, line, re.IGNORECASE)
|
||||
if m:
|
||||
try:
|
||||
max_fc = max(max_fc, int(m.group(1)))
|
||||
except (ValueError, IndexError):
|
||||
pass
|
||||
if max_fc > 0:
|
||||
frame_counts[idx] = max_fc
|
||||
|
||||
if len(frame_counts) >= 2:
|
||||
counts = list(frame_counts.values())
|
||||
avg = sum(counts) / len(counts)
|
||||
if avg > 0:
|
||||
max_deviation = max(abs(c - avg) / avg for c in counts)
|
||||
details = ", ".join(f"node {nid}={fc}" for nid, fc in sorted(frame_counts.items()))
|
||||
if max_deviation <= 0.10:
|
||||
report.add("Frame count balance", Severity.PASS,
|
||||
f"Within +/-10% (avg={avg:.0f}): {details}",
|
||||
count=int(avg))
|
||||
elif max_deviation <= 0.25:
|
||||
report.add("Frame count balance", Severity.WARN,
|
||||
f"Deviation {max_deviation:.0%} exceeds 10%: {details}",
|
||||
count=int(avg))
|
||||
else:
|
||||
report.add("Frame count balance", Severity.ERROR,
|
||||
f"Severe imbalance {max_deviation:.0%}: {details}",
|
||||
count=int(avg))
|
||||
else:
|
||||
report.add("Frame count balance", Severity.ERROR,
|
||||
"All frame counts are zero")
|
||||
elif len(frame_counts) == 1:
|
||||
report.add("Frame count balance", Severity.WARN,
|
||||
f"Only 1 node reported frames: {frame_counts}")
|
||||
else:
|
||||
report.add("Frame count balance", Severity.WARN,
|
||||
"No frame count data found")
|
||||
|
||||
# ---- Check 5: ADR-018 compliance (magic 0xC5110001) ----
|
||||
ADR018_MAGIC = "c5110001"
|
||||
magic_found = False
|
||||
|
||||
# Check aggregator results
|
||||
if results:
|
||||
results_str = json.dumps(results).lower()
|
||||
if ADR018_MAGIC in results_str or "0xc5110001" in results_str:
|
||||
magic_found = True
|
||||
# Also check a dedicated field
|
||||
if results.get("adr018_magic") or results.get("magic"):
|
||||
magic_found = True
|
||||
# Check per-node entries
|
||||
if "nodes" in results:
|
||||
for node_entry in results["nodes"]:
|
||||
magic = node_entry.get("magic", "")
|
||||
if isinstance(magic, str) and ADR018_MAGIC in magic.lower():
|
||||
magic_found = True
|
||||
elif isinstance(magic, int) and magic == 0xC5110001:
|
||||
magic_found = True
|
||||
|
||||
# Check logs for serialization/ADR-018 markers
|
||||
if not magic_found:
|
||||
for idx in range(n_nodes):
|
||||
log_text = node_logs.get(idx, "")
|
||||
adr018_pats = [
|
||||
r"0xC5110001",
|
||||
r"c5110001",
|
||||
r"ADR-018",
|
||||
r"magic[=: ]+0x[Cc]5110001",
|
||||
]
|
||||
if any(re.search(p, log_text, re.IGNORECASE) for p in adr018_pats):
|
||||
magic_found = True
|
||||
break
|
||||
|
||||
if magic_found:
|
||||
report.add("ADR-018 compliance", Severity.PASS,
|
||||
"Magic 0xC5110001 found in frame data")
|
||||
else:
|
||||
report.add("ADR-018 compliance", Severity.WARN,
|
||||
"Magic 0xC5110001 not found (may require deeper frame inspection)")
|
||||
|
||||
# ---- Check 6: Vitals per node ----
|
||||
vitals_nodes = []
|
||||
no_vitals_nodes = []
|
||||
for idx in range(n_nodes):
|
||||
log_text = node_logs.get(idx, "")
|
||||
if check_vitals_in_log(log_text):
|
||||
vitals_nodes.append(idx)
|
||||
else:
|
||||
no_vitals_nodes.append(idx)
|
||||
|
||||
# Also check aggregator results for vitals data
|
||||
if results and "nodes" in results:
|
||||
for node_entry in results["nodes"]:
|
||||
nid = node_entry.get("node_id")
|
||||
has_vitals = (
|
||||
node_entry.get("vitals") is not None
|
||||
or node_entry.get("breathing_bpm") is not None
|
||||
or node_entry.get("heart_rate") is not None
|
||||
)
|
||||
if has_vitals and nid is not None and int(nid) not in vitals_nodes:
|
||||
vitals_nodes.append(int(nid))
|
||||
if int(nid) in no_vitals_nodes:
|
||||
no_vitals_nodes.remove(int(nid))
|
||||
|
||||
if len(vitals_nodes) == n_nodes:
|
||||
report.add("Vitals per node", Severity.PASS,
|
||||
f"All {n_nodes} nodes produced vitals output",
|
||||
count=n_nodes)
|
||||
elif len(vitals_nodes) > 0:
|
||||
missing = ", ".join(str(i) for i in no_vitals_nodes)
|
||||
report.add("Vitals per node", Severity.WARN,
|
||||
f"{len(vitals_nodes)}/{n_nodes} nodes have vitals; "
|
||||
f"missing: [{missing}]",
|
||||
count=len(vitals_nodes))
|
||||
else:
|
||||
report.add("Vitals per node", Severity.WARN,
|
||||
"No vitals output found from any node")
|
||||
|
||||
return report
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Validate multi-node mesh QEMU test output (ADR-061 Layer 3)",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog=(
|
||||
"Examples:\n"
|
||||
" python3 validate_mesh_test.py --nodes 3 --results mesh_results.json\n"
|
||||
" python3 validate_mesh_test.py --nodes 4 --log node0.log --log node1.log"
|
||||
),
|
||||
)
|
||||
parser.add_argument("--results", default=None,
|
||||
help="Path to mesh_test_results.json from aggregator")
|
||||
parser.add_argument("--nodes", "-n", type=int, required=True,
|
||||
help="Expected number of mesh nodes")
|
||||
parser.add_argument("--log", action="append", default=[],
|
||||
help="Path to a per-node QEMU log (can be repeated)")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.nodes < 2:
|
||||
print("ERROR: --nodes must be >= 2", file=sys.stderr)
|
||||
sys.exit(3)
|
||||
|
||||
results_path = Path(args.results) if args.results else None
|
||||
log_paths = [Path(lp) for lp in args.log]
|
||||
|
||||
# If no log files given, try the conventional paths
|
||||
if not log_paths:
|
||||
for i in range(args.nodes):
|
||||
candidate = Path(f"build/qemu_node{i}.log")
|
||||
if candidate.exists():
|
||||
log_paths.append(candidate)
|
||||
|
||||
report = validate_mesh(args.nodes, results_path, log_paths)
|
||||
report.print_report()
|
||||
|
||||
# Map max severity to exit code
|
||||
max_sev = report.max_severity
|
||||
if max_sev <= Severity.SKIP:
|
||||
sys.exit(0)
|
||||
elif max_sev == Severity.WARN:
|
||||
sys.exit(1)
|
||||
elif max_sev == Severity.ERROR:
|
||||
sys.exit(2)
|
||||
else:
|
||||
sys.exit(3)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
408
scripts/validate_qemu_output.py
Normal file
408
scripts/validate_qemu_output.py
Normal file
|
|
@ -0,0 +1,408 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
QEMU ESP32-S3 UART Output Validator (ADR-061)
|
||||
|
||||
Parses the UART log captured from a QEMU firmware run and validates
|
||||
16 checks covering boot, NVS, mock CSI, edge processing, vitals,
|
||||
presence/fall detection, serialization, crash indicators, scenario
|
||||
completion, and frame rate sanity.
|
||||
|
||||
Usage:
|
||||
python3 validate_qemu_output.py <log_file>
|
||||
|
||||
Exit codes:
|
||||
0 All checks passed (or only INFO-level skips)
|
||||
1 Warnings (non-critical checks failed)
|
||||
2 Errors (critical checks failed)
|
||||
3 Fatal (crash or corruption detected)
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
from enum import IntEnum
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
class Severity(IntEnum):
|
||||
PASS = 0
|
||||
SKIP = 1
|
||||
WARN = 2
|
||||
ERROR = 3
|
||||
FATAL = 4
|
||||
|
||||
|
||||
# ANSI color codes (disabled if not a TTY)
|
||||
USE_COLOR = sys.stdout.isatty()
|
||||
|
||||
|
||||
def color(text: str, code: str) -> str:
|
||||
if not USE_COLOR:
|
||||
return text
|
||||
return f"\033[{code}m{text}\033[0m"
|
||||
|
||||
|
||||
def green(text: str) -> str:
|
||||
return color(text, "32")
|
||||
|
||||
|
||||
def yellow(text: str) -> str:
|
||||
return color(text, "33")
|
||||
|
||||
|
||||
def red(text: str) -> str:
|
||||
return color(text, "31")
|
||||
|
||||
|
||||
def bold_red(text: str) -> str:
|
||||
return color(text, "1;31")
|
||||
|
||||
|
||||
@dataclass
|
||||
class CheckResult:
|
||||
name: str
|
||||
severity: Severity
|
||||
message: str
|
||||
count: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValidationReport:
|
||||
checks: List[CheckResult] = field(default_factory=list)
|
||||
|
||||
def add(self, name: str, severity: Severity, message: str, count: int = 0):
|
||||
self.checks.append(CheckResult(name, severity, message, count))
|
||||
|
||||
@property
|
||||
def max_severity(self) -> Severity:
|
||||
if not self.checks:
|
||||
return Severity.PASS
|
||||
return max(c.severity for c in self.checks)
|
||||
|
||||
def print_report(self):
|
||||
print("\n" + "=" * 60)
|
||||
print(" QEMU Firmware Validation Report (ADR-061)")
|
||||
print("=" * 60 + "\n")
|
||||
|
||||
for check in self.checks:
|
||||
if check.severity == Severity.PASS:
|
||||
icon = green("PASS")
|
||||
elif check.severity == Severity.SKIP:
|
||||
icon = yellow("SKIP")
|
||||
elif check.severity == Severity.WARN:
|
||||
icon = yellow("WARN")
|
||||
elif check.severity == Severity.ERROR:
|
||||
icon = red("FAIL")
|
||||
else:
|
||||
icon = bold_red("FATAL")
|
||||
|
||||
count_str = f" (count={check.count})" if check.count > 0 else ""
|
||||
print(f" [{icon}] {check.name}: {check.message}{count_str}")
|
||||
|
||||
print()
|
||||
|
||||
passed = sum(1 for c in self.checks if c.severity <= Severity.SKIP)
|
||||
total = len(self.checks)
|
||||
summary = f" {passed}/{total} checks passed"
|
||||
|
||||
max_sev = self.max_severity
|
||||
if max_sev <= Severity.SKIP:
|
||||
print(green(summary))
|
||||
elif max_sev == Severity.WARN:
|
||||
print(yellow(summary + " (with warnings)"))
|
||||
elif max_sev == Severity.ERROR:
|
||||
print(red(summary + " (with errors)"))
|
||||
else:
|
||||
print(bold_red(summary + " (FATAL issues detected)"))
|
||||
|
||||
print()
|
||||
|
||||
|
||||
def validate_log(log_text: str) -> ValidationReport:
|
||||
"""Run all 16 validation checks against the UART log text."""
|
||||
report = ValidationReport()
|
||||
lines = log_text.splitlines()
|
||||
log_lower = log_text.lower()
|
||||
|
||||
# ---- Check 1: Boot ----
|
||||
# Look for app_main() entry or main_task: tag
|
||||
boot_patterns = [r"app_main\(\)", r"main_task:", r"main:", r"ESP32-S3 CSI Node"]
|
||||
boot_found = any(re.search(p, log_text) for p in boot_patterns)
|
||||
if boot_found:
|
||||
report.add("Boot", Severity.PASS, "Firmware booted successfully")
|
||||
else:
|
||||
report.add("Boot", Severity.FATAL, "No boot indicator found (app_main / main_task)")
|
||||
|
||||
# ---- Check 2: NVS load ----
|
||||
nvs_patterns = [r"nvs_config:", r"nvs_config_load", r"NVS", r"csi_cfg"]
|
||||
nvs_found = any(re.search(p, log_text) for p in nvs_patterns)
|
||||
if nvs_found:
|
||||
report.add("NVS load", Severity.PASS, "NVS configuration loaded")
|
||||
else:
|
||||
report.add("NVS load", Severity.WARN, "No NVS load indicator found")
|
||||
|
||||
# ---- Check 3: Mock CSI init ----
|
||||
mock_patterns = [r"mock_csi:", r"mock_csi_init", r"Mock CSI", r"MOCK_CSI"]
|
||||
mock_found = any(re.search(p, log_text) for p in mock_patterns)
|
||||
if mock_found:
|
||||
report.add("Mock CSI init", Severity.PASS, "Mock CSI generator initialized")
|
||||
else:
|
||||
# This is only expected when mock is enabled
|
||||
report.add("Mock CSI init", Severity.SKIP,
|
||||
"No mock CSI indicator (expected if mock not enabled)")
|
||||
|
||||
# ---- Check 4: Frame generation ----
|
||||
# Count frame-related log lines
|
||||
frame_patterns = [
|
||||
r"frame[_ ]count[=: ]+(\d+)",
|
||||
r"frames?[=: ]+(\d+)",
|
||||
r"emitted[=: ]+(\d+)",
|
||||
r"mock_csi:.*frame",
|
||||
r"csi_collector:.*frame",
|
||||
r"CSI frame",
|
||||
]
|
||||
frame_count = 0
|
||||
for line in lines:
|
||||
for pat in frame_patterns:
|
||||
m = re.search(pat, line, re.IGNORECASE)
|
||||
if m:
|
||||
if m.lastindex and m.lastindex >= 1:
|
||||
try:
|
||||
frame_count = max(frame_count, int(m.group(1)))
|
||||
except (ValueError, IndexError):
|
||||
frame_count = max(frame_count, 1)
|
||||
else:
|
||||
frame_count = max(frame_count, 1)
|
||||
|
||||
if frame_count > 0:
|
||||
report.add("Frame generation", Severity.PASS,
|
||||
f"Frames detected", count=frame_count)
|
||||
else:
|
||||
# Also count lines mentioning IQ data or subcarriers
|
||||
iq_lines = sum(1 for line in lines
|
||||
if re.search(r"(iq_data|subcarrier|I/Q|enqueue)", line, re.IGNORECASE))
|
||||
if iq_lines > 0:
|
||||
report.add("Frame generation", Severity.PASS,
|
||||
"I/Q data activity detected", count=iq_lines)
|
||||
else:
|
||||
report.add("Frame generation", Severity.WARN,
|
||||
"No frame generation activity detected")
|
||||
|
||||
# ---- Check 5: Edge pipeline ----
|
||||
edge_patterns = [r"edge_processing:", r"DSP task", r"edge_init", r"edge_tier"]
|
||||
edge_found = any(re.search(p, log_text) for p in edge_patterns)
|
||||
if edge_found:
|
||||
report.add("Edge pipeline", Severity.PASS, "Edge processing pipeline active")
|
||||
else:
|
||||
report.add("Edge pipeline", Severity.WARN,
|
||||
"No edge processing indicator found")
|
||||
|
||||
# ---- Check 6: Vitals output ----
|
||||
vitals_patterns = [r"vitals", r"breathing", r"presence", r"heartrate",
|
||||
r"breathing_bpm", r"heart_rate"]
|
||||
vitals_count = sum(1 for line in lines
|
||||
if any(re.search(p, line, re.IGNORECASE) for p in vitals_patterns))
|
||||
if vitals_count > 0:
|
||||
report.add("Vitals output", Severity.PASS,
|
||||
"Vitals/breathing/presence output detected", count=vitals_count)
|
||||
else:
|
||||
report.add("Vitals output", Severity.WARN,
|
||||
"No vitals output lines found")
|
||||
|
||||
# ---- Check 7: Presence detection ----
|
||||
presence_patterns = [
|
||||
r"presence[=: ]+1",
|
||||
r"presence_score[=: ]+([0-9.]+)",
|
||||
r"presence detected",
|
||||
]
|
||||
presence_found = False
|
||||
for line in lines:
|
||||
for pat in presence_patterns:
|
||||
m = re.search(pat, line, re.IGNORECASE)
|
||||
if m:
|
||||
if m.lastindex and m.lastindex >= 1:
|
||||
try:
|
||||
score = float(m.group(1))
|
||||
if score > 0:
|
||||
presence_found = True
|
||||
except (ValueError, IndexError):
|
||||
presence_found = True
|
||||
else:
|
||||
presence_found = True
|
||||
|
||||
if presence_found:
|
||||
report.add("Presence detection", Severity.PASS, "Presence detected in output")
|
||||
else:
|
||||
report.add("Presence detection", Severity.WARN,
|
||||
"No presence=1 or presence_score>0 found")
|
||||
|
||||
# ---- Check 8: Fall detection ----
|
||||
fall_patterns = [r"fall[=: ]+1", r"fall detected", r"fall_event"]
|
||||
fall_found = any(
|
||||
re.search(p, line, re.IGNORECASE)
|
||||
for line in lines for p in fall_patterns
|
||||
)
|
||||
if fall_found:
|
||||
report.add("Fall detection", Severity.PASS, "Fall event detected in output")
|
||||
else:
|
||||
report.add("Fall detection", Severity.SKIP,
|
||||
"No fall event (expected if fall scenario not run)")
|
||||
|
||||
# ---- Check 9: MAC filter ----
|
||||
mac_patterns = [r"MAC filter", r"mac_filter", r"dropped.*MAC",
|
||||
r"filter_mac", r"filtered"]
|
||||
mac_found = any(
|
||||
re.search(p, line, re.IGNORECASE)
|
||||
for line in lines for p in mac_patterns
|
||||
)
|
||||
if mac_found:
|
||||
report.add("MAC filter", Severity.PASS, "MAC filter activity detected")
|
||||
else:
|
||||
report.add("MAC filter", Severity.SKIP,
|
||||
"No MAC filter activity (expected if filter scenario not run)")
|
||||
|
||||
# ---- Check 10: ADR-018 serialize ----
|
||||
serialize_patterns = [r"[Ss]erializ", r"ADR-018", r"stream_sender",
|
||||
r"UDP.*send", r"udp.*sent"]
|
||||
serialize_count = sum(1 for line in lines
|
||||
if any(re.search(p, line) for p in serialize_patterns))
|
||||
if serialize_count > 0:
|
||||
report.add("ADR-018 serialize", Severity.PASS,
|
||||
"Serialization/streaming activity detected", count=serialize_count)
|
||||
else:
|
||||
report.add("ADR-018 serialize", Severity.WARN,
|
||||
"No serialization activity detected")
|
||||
|
||||
# ---- Check 11: No crash ----
|
||||
crash_patterns = [r"Guru Meditation", r"assert failed", r"abort\(\)",
|
||||
r"panic", r"LoadProhibited", r"StoreProhibited",
|
||||
r"InstrFetchProhibited", r"IllegalInstruction"]
|
||||
crash_found = []
|
||||
for line in lines:
|
||||
for pat in crash_patterns:
|
||||
if re.search(pat, line):
|
||||
crash_found.append(line.strip()[:120])
|
||||
|
||||
if not crash_found:
|
||||
report.add("No crash", Severity.PASS, "No crash indicators found")
|
||||
else:
|
||||
report.add("No crash", Severity.FATAL,
|
||||
f"Crash detected: {crash_found[0]}",
|
||||
count=len(crash_found))
|
||||
|
||||
# ---- Check 12: Heap OK ----
|
||||
heap_patterns = [r"HEAP_ERROR", r"out of memory", r"heap_caps_alloc.*failed",
|
||||
r"malloc.*fail", r"heap corruption"]
|
||||
heap_errors = [line.strip()[:120] for line in lines
|
||||
if any(re.search(p, line, re.IGNORECASE) for p in heap_patterns)]
|
||||
if not heap_errors:
|
||||
report.add("Heap OK", Severity.PASS, "No heap errors found")
|
||||
else:
|
||||
report.add("Heap OK", Severity.ERROR,
|
||||
f"Heap error: {heap_errors[0]}",
|
||||
count=len(heap_errors))
|
||||
|
||||
# ---- Check 13: Stack OK ----
|
||||
stack_patterns = [r"[Ss]tack overflow", r"stack_overflow",
|
||||
r"vApplicationStackOverflowHook"]
|
||||
stack_errors = [line.strip()[:120] for line in lines
|
||||
if any(re.search(p, line) for p in stack_patterns)]
|
||||
if not stack_errors:
|
||||
report.add("Stack OK", Severity.PASS, "No stack overflow detected")
|
||||
else:
|
||||
report.add("Stack OK", Severity.FATAL,
|
||||
f"Stack overflow: {stack_errors[0]}",
|
||||
count=len(stack_errors))
|
||||
|
||||
# ---- Check 14: Clean exit ----
|
||||
reboot_patterns = [r"Rebooting\.\.\.", r"rst:0x"]
|
||||
reboot_found = any(
|
||||
re.search(p, line)
|
||||
for line in lines for p in reboot_patterns
|
||||
)
|
||||
if not reboot_found:
|
||||
report.add("Clean exit", Severity.PASS,
|
||||
"No unexpected reboot detected")
|
||||
else:
|
||||
report.add("Clean exit", Severity.WARN,
|
||||
"Reboot detected (may indicate crash or watchdog)")
|
||||
|
||||
# ---- Check 15: Scenario completion (when running all scenarios) ----
|
||||
all_scenarios_pattern = r"All (\d+) scenarios complete"
|
||||
scenario_match = re.search(all_scenarios_pattern, log_text)
|
||||
if scenario_match:
|
||||
n_scenarios = int(scenario_match.group(1))
|
||||
report.add("Scenario completion", Severity.PASS,
|
||||
f"All {n_scenarios} scenarios completed", count=n_scenarios)
|
||||
else:
|
||||
# Check if individual scenario started indicators exist
|
||||
scenario_starts = re.findall(r"=== Scenario (\d+) started ===", log_text)
|
||||
if scenario_starts:
|
||||
report.add("Scenario completion", Severity.WARN,
|
||||
f"Started {len(scenario_starts)} scenarios but no completion marker",
|
||||
count=len(scenario_starts))
|
||||
else:
|
||||
report.add("Scenario completion", Severity.SKIP,
|
||||
"No scenario tracking (single scenario or mock not enabled)")
|
||||
|
||||
# ---- Check 16: Frame rate sanity ----
|
||||
# Extract scenario frame counts and check they're reasonable
|
||||
frame_reports = re.findall(r"scenario=\d+ frames=(\d+)", log_text)
|
||||
if frame_reports:
|
||||
max_frames = max(int(f) for f in frame_reports)
|
||||
if max_frames > 0:
|
||||
report.add("Frame rate", Severity.PASS,
|
||||
f"Peak frame counter: {max_frames}", count=max_frames)
|
||||
else:
|
||||
report.add("Frame rate", Severity.ERROR,
|
||||
"Frame counters are all zero")
|
||||
else:
|
||||
report.add("Frame rate", Severity.SKIP,
|
||||
"No periodic frame reports found")
|
||||
|
||||
return report
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Validate QEMU ESP32-S3 UART output (ADR-061)",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="Example: python3 validate_qemu_output.py build/qemu_output.log",
|
||||
)
|
||||
parser.add_argument(
|
||||
"log_file",
|
||||
help="Path to QEMU UART log file",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
log_path = Path(args.log_file)
|
||||
if not log_path.exists():
|
||||
print(f"ERROR: Log file not found: {log_path}", file=sys.stderr)
|
||||
sys.exit(3)
|
||||
|
||||
log_text = log_path.read_text(encoding="utf-8", errors="replace")
|
||||
|
||||
if not log_text.strip():
|
||||
print("ERROR: Log file is empty. QEMU may have failed to start.",
|
||||
file=sys.stderr)
|
||||
sys.exit(3)
|
||||
|
||||
report = validate_log(log_text)
|
||||
report.print_report()
|
||||
|
||||
# Map max severity to exit code
|
||||
max_sev = report.max_severity
|
||||
if max_sev <= Severity.SKIP:
|
||||
sys.exit(0)
|
||||
elif max_sev == Severity.WARN:
|
||||
sys.exit(1)
|
||||
elif max_sev == Severity.ERROR:
|
||||
sys.exit(2)
|
||||
else:
|
||||
sys.exit(3)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue