ruvector/examples/google-cloud/cloudrun.yaml

# =============================================================================
# RuVector Cloud Run Service Configuration
# Multi-service deployment with GPU, Raft, and Replication support
# =============================================================================

# -----------------------------------------------------------------------------
# Benchmark Service (GPU-enabled)
# -----------------------------------------------------------------------------
apiVersion: serving.knative.dev/v1
kind: Service
metadata:
  name: ruvector-benchmark
  labels:
    app: ruvector
    component: benchmark
  annotations:
    run.googleapis.com/description: "RuVector GPU Benchmark Service"
    run.googleapis.com/launch-stage: BETA
spec:
  template:
    metadata:
      annotations:
        # GPU Configuration
        run.googleapis.com/execution-environment: gen2
        run.googleapis.com/gpu-type: nvidia-l4
        run.googleapis.com/gpu-count: "1"

        # Scaling Configuration
        autoscaling.knative.dev/minScale: "0"
        autoscaling.knative.dev/maxScale: "10"

        # Performance Configuration
        run.googleapis.com/cpu-throttling: "false"
        run.googleapis.com/startup-cpu-boost: "true"
    spec:
      containerConcurrency: 80
      timeoutSeconds: 3600
      serviceAccountName: ruvector-sa
      containers:
        - name: ruvector
          image: gcr.io/PROJECT_ID/ruvector-benchmark:latest
          ports:
            - containerPort: 8080
          resources:
            limits:
              cpu: "4"
              memory: "8Gi"
              nvidia.com/gpu: "1"
          env:
            - name: RUVECTOR_GPU_ENABLED
              value: "true"
            - name: RUST_LOG
              value: "info"
            - name: RUVECTOR_MODE
              value: "benchmark"
          startupProbe:
            httpGet:
              path: /health
              port: 8080
            initialDelaySeconds: 10
            periodSeconds: 10
            failureThreshold: 3
          livenessProbe:
            httpGet:
              path: /health
              port: 8080
            periodSeconds: 30
          readinessProbe:
            httpGet:
              path: /health
              port: 8080
            periodSeconds: 10

---
# -----------------------------------------------------------------------------
# Attention/GNN Service (High Memory GPU)
# -----------------------------------------------------------------------------
apiVersion: serving.knative.dev/v1
kind: Service
metadata:
  name: ruvector-attention
  labels:
    app: ruvector
    component: attention
  annotations:
    run.googleapis.com/description: "RuVector Attention/GNN Inference Service"
spec:
  template:
    metadata:
      annotations:
        run.googleapis.com/execution-environment: gen2
        run.googleapis.com/gpu-type: nvidia-l4
        run.googleapis.com/gpu-count: "1"
        autoscaling.knative.dev/minScale: "1"
        autoscaling.knative.dev/maxScale: "5"
        run.googleapis.com/cpu-throttling: "false"
    spec:
      containerConcurrency: 20
      timeoutSeconds: 3600
      containers:
        - name: ruvector
          image: gcr.io/PROJECT_ID/ruvector-benchmark:latest
          ports:
            - containerPort: 8080
          resources:
            limits:
              cpu: "8"
              memory: "16Gi"
              nvidia.com/gpu: "1"
          env:
            - name: RUVECTOR_MODE
              value: "attention"
            - name: RUVECTOR_GNN_LAYERS
              value: "3"
            - name: RUVECTOR_GNN_HEADS
              value: "8"
            - name: RUVECTOR_GNN_HIDDEN_DIM
              value: "512"
            - name: RUST_LOG
              value: "info"

---
# -----------------------------------------------------------------------------
# Raft Consensus Node (Stateful)
# -----------------------------------------------------------------------------
apiVersion: serving.knative.dev/v1
kind: Service
metadata:
  name: ruvector-raft-node-1
  labels:
    app: ruvector
    component: raft
    raft-node-id: "0"
  annotations:
    run.googleapis.com/description: "RuVector Raft Consensus Node"
spec:
  template:
    metadata:
      annotations:
        autoscaling.knative.dev/minScale: "1"
        autoscaling.knative.dev/maxScale: "1"
        run.googleapis.com/cpu-throttling: "false"
    spec:
      containerConcurrency: 100
      timeoutSeconds: 3600
      containers:
        - name: ruvector
          image: gcr.io/PROJECT_ID/ruvector-benchmark:latest
          ports:
            - containerPort: 8080
          resources:
            limits:
              cpu: "2"
              memory: "4Gi"
          env:
            - name: RUVECTOR_MODE
              value: "raft"
            - name: RUVECTOR_NODE_ID
              value: "0"
            - name: RUVECTOR_CLUSTER_SIZE
              value: "3"
            - name: RUVECTOR_RAFT_ELECTION_TIMEOUT
              value: "150"
            - name: RUVECTOR_RAFT_HEARTBEAT_INTERVAL
              value: "50"
            - name: RUST_LOG
              value: "info,raft=debug"
          volumeMounts:
            - name: raft-data
              mountPath: /data/raft
      volumes:
        - name: raft-data
          emptyDir:
            sizeLimit: "10Gi"

---
# -----------------------------------------------------------------------------
# Replication Primary Node
# -----------------------------------------------------------------------------
apiVersion: serving.knative.dev/v1
kind: Service
metadata:
  name: ruvector-primary
  labels:
    app: ruvector
    component: replication
    role: primary
  annotations:
    run.googleapis.com/description: "RuVector Primary Node (Replication)"
spec:
  template:
    metadata:
      annotations:
        run.googleapis.com/execution-environment: gen2
        run.googleapis.com/gpu-type: nvidia-l4
        run.googleapis.com/gpu-count: "1"
        autoscaling.knative.dev/minScale: "1"
        autoscaling.knative.dev/maxScale: "1"
        run.googleapis.com/cpu-throttling: "false"
    spec:
      containerConcurrency: 100
      timeoutSeconds: 3600
      containers:
        - name: ruvector
          image: gcr.io/PROJECT_ID/ruvector-benchmark:latest
          ports:
            - containerPort: 8080
          resources:
            limits:
              cpu: "4"
              memory: "8Gi"
              nvidia.com/gpu: "1"
          env:
            - name: RUVECTOR_MODE
              value: "primary"
            - name: RUVECTOR_REPLICATION_FACTOR
              value: "3"
            - name: RUVECTOR_SYNC_MODE
              value: "async"
            - name: RUST_LOG
              value: "info"

---
# -----------------------------------------------------------------------------
# Replication Replica Node
# -----------------------------------------------------------------------------
apiVersion: serving.knative.dev/v1
kind: Service
metadata:
  name: ruvector-replica
  labels:
    app: ruvector
    component: replication
    role: replica
  annotations:
    run.googleapis.com/description: "RuVector Replica Node (Replication)"
spec:
  template:
    metadata:
      annotations:
        run.googleapis.com/execution-environment: gen2
        run.googleapis.com/gpu-type: nvidia-l4
        run.googleapis.com/gpu-count: "1"
        autoscaling.knative.dev/minScale: "2"
        autoscaling.knative.dev/maxScale: "5"
        run.googleapis.com/cpu-throttling: "false"
    spec:
      containerConcurrency: 100
      timeoutSeconds: 3600
      containers:
        - name: ruvector
          image: gcr.io/PROJECT_ID/ruvector-benchmark:latest
          ports:
            - containerPort: 8080
          resources:
            limits:
              cpu: "4"
              memory: "8Gi"
              nvidia.com/gpu: "1"
          env:
            - name: RUVECTOR_MODE
              value: "replica"
            - name: RUVECTOR_PRIMARY_URL
              value: "https://ruvector-primary-HASH.run.app"
            - name: RUST_LOG
              value: "info"

---
# -----------------------------------------------------------------------------
# Service Account
# -----------------------------------------------------------------------------
apiVersion: iam.cnrm.cloud.google.com/v1beta1
kind: IAMServiceAccount
metadata:
  name: ruvector-sa
spec:
  displayName: "RuVector Cloud Run Service Account"