mirror of
https://github.com/ruvnet/RuVector.git
synced 2026-05-23 12:55:26 +00:00
Training pipeline: - generate-deobfuscation-data.mjs: 1,200+ training pairs from fixtures + synthetic - train-deobfuscator.py: 6M param transformer (3 layers, 4 heads, 128 embed) - export-to-rvf.py: PyTorch → ONNX → GGUF Q4 → RVF OVERLAY - launch-gpu-training.sh: GCloud L4 GPU (--local, --cloud-run, --spot) - Dockerfile.deobfuscator: pytorch/pytorch:2.2.0-cuda12.1 Decompiler integration: - NeuralInferrer behind optional `neural` feature flag - model_path in DecompileConfig - Falls through to pattern-based when model unavailable - Zero binary impact without feature flag All tests pass, cargo check clean with and without neural feature. Co-Authored-By: claude-flow <ruv@ruv.net>
58 lines
1.6 KiB
Text
58 lines
1.6 KiB
Text
FROM pytorch/pytorch:2.2.0-cuda12.1-cudnn8-runtime
|
|
|
|
WORKDIR /app
|
|
|
|
# Install additional Python dependencies.
|
|
RUN pip install --no-cache-dir onnx onnxruntime numpy
|
|
|
|
# Copy training and export scripts.
|
|
COPY train-deobfuscator.py .
|
|
COPY export-to-rvf.py .
|
|
|
|
# Entrypoint: download data from GCS, train, export, upload results.
|
|
# Environment variables: DATA_PATH, OUTPUT_DIR, GCS_BUCKET
|
|
COPY <<'ENTRYPOINT_SH' /app/entrypoint.sh
|
|
#!/bin/bash
|
|
set -euo pipefail
|
|
|
|
DATA_PATH="${DATA_PATH:-/tmp/data.jsonl}"
|
|
OUTPUT_DIR="${OUTPUT_DIR:-/tmp/model}"
|
|
GCS_BUCKET="${GCS_BUCKET:-}"
|
|
|
|
echo "[entrypoint] Starting deobfuscator training pipeline"
|
|
|
|
# Download data from GCS if bucket is set.
|
|
if [ -n "$GCS_BUCKET" ] && [ ! -f "$DATA_PATH" ]; then
|
|
echo "[entrypoint] Downloading training data from ${GCS_BUCKET}..."
|
|
pip install --no-cache-dir google-cloud-storage 2>/dev/null
|
|
gsutil cp "${GCS_BUCKET}/deobfuscation-data.jsonl" "$DATA_PATH"
|
|
fi
|
|
|
|
# Train.
|
|
echo "[entrypoint] Training model..."
|
|
python train-deobfuscator.py \
|
|
--data "$DATA_PATH" \
|
|
--output "$OUTPUT_DIR" \
|
|
--epochs 30 \
|
|
--batch-size 64 \
|
|
--export-onnx
|
|
|
|
# Export to GGUF Q4 + RVF.
|
|
echo "[entrypoint] Exporting to GGUF Q4..."
|
|
python export-to-rvf.py \
|
|
--checkpoint "${OUTPUT_DIR}/best_model.pt" \
|
|
--output "${OUTPUT_DIR}/deobfuscator" \
|
|
--quantize q4
|
|
|
|
# Upload to GCS if bucket is set.
|
|
if [ -n "$GCS_BUCKET" ]; then
|
|
echo "[entrypoint] Uploading results to ${GCS_BUCKET}/models/deobfuscator/..."
|
|
gsutil -m cp "${OUTPUT_DIR}"/* "${GCS_BUCKET}/models/deobfuscator/"
|
|
fi
|
|
|
|
echo "[entrypoint] Done."
|
|
ENTRYPOINT_SH
|
|
|
|
RUN chmod +x /app/entrypoint.sh
|
|
|
|
CMD ["/app/entrypoint.sh"]
|