mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-04-28 03:20:11 +00:00
Update docs and helm chart for agent health endpoints
- Add health-addr config option to UNIFIED_AGENT.md - Document /healthz, /readyz, /metrics endpoints - Add Kubernetes probe examples to docs - Add liveness/readiness probes to helm chart agent template - Add healthPort, livenessProbe, readinessProbe to values.yaml - Update values.schema.json with new agent probe options
This commit is contained in:
parent
7fc15417e4
commit
da43588189
4 changed files with 133 additions and 0 deletions
|
|
@ -96,6 +96,26 @@ spec:
|
|||
resources:
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- if .Values.agent.livenessProbe.enabled }}
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: {{ .Values.agent.livenessProbe.path }}
|
||||
port: {{ .Values.agent.healthPort }}
|
||||
initialDelaySeconds: {{ .Values.agent.livenessProbe.initialDelaySeconds }}
|
||||
periodSeconds: {{ .Values.agent.livenessProbe.periodSeconds }}
|
||||
timeoutSeconds: {{ .Values.agent.livenessProbe.timeoutSeconds }}
|
||||
failureThreshold: {{ .Values.agent.livenessProbe.failureThreshold }}
|
||||
{{- end }}
|
||||
{{- if .Values.agent.readinessProbe.enabled }}
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: {{ .Values.agent.readinessProbe.path }}
|
||||
port: {{ .Values.agent.healthPort }}
|
||||
initialDelaySeconds: {{ .Values.agent.readinessProbe.initialDelaySeconds }}
|
||||
periodSeconds: {{ .Values.agent.readinessProbe.periodSeconds }}
|
||||
timeoutSeconds: {{ .Values.agent.readinessProbe.timeoutSeconds }}
|
||||
failureThreshold: {{ .Values.agent.readinessProbe.failureThreshold }}
|
||||
{{- end }}
|
||||
{{- if or .Values.agent.dockerSocket.enabled .Values.agent.extraVolumes }}
|
||||
volumes:
|
||||
{{- if .Values.agent.dockerSocket.enabled }}
|
||||
|
|
|
|||
|
|
@ -159,6 +159,62 @@
|
|||
"type": "string",
|
||||
"enum": ["DaemonSet", "Deployment"],
|
||||
"description": "Agent deployment type"
|
||||
},
|
||||
"healthPort": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"maximum": 65535,
|
||||
"description": "Health/metrics server port"
|
||||
},
|
||||
"livenessProbe": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"type": "boolean",
|
||||
"description": "Enable liveness probe"
|
||||
},
|
||||
"path": {
|
||||
"type": "string",
|
||||
"description": "Liveness probe path"
|
||||
},
|
||||
"initialDelaySeconds": {
|
||||
"type": "integer"
|
||||
},
|
||||
"periodSeconds": {
|
||||
"type": "integer"
|
||||
},
|
||||
"timeoutSeconds": {
|
||||
"type": "integer"
|
||||
},
|
||||
"failureThreshold": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"readinessProbe": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"type": "boolean",
|
||||
"description": "Enable readiness probe"
|
||||
},
|
||||
"path": {
|
||||
"type": "string",
|
||||
"description": "Readiness probe path"
|
||||
},
|
||||
"initialDelaySeconds": {
|
||||
"type": "integer"
|
||||
},
|
||||
"periodSeconds": {
|
||||
"type": "integer"
|
||||
},
|
||||
"timeoutSeconds": {
|
||||
"type": "integer"
|
||||
},
|
||||
"failureThreshold": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
|
|
|||
|
|
@ -138,6 +138,21 @@ agent:
|
|||
hostPathType: Socket
|
||||
extraVolumes: []
|
||||
extraVolumeMounts: []
|
||||
healthPort: 9191
|
||||
livenessProbe:
|
||||
enabled: true
|
||||
path: /healthz
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 3
|
||||
failureThreshold: 3
|
||||
readinessProbe:
|
||||
enabled: true
|
||||
path: /readyz
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 3
|
||||
failureThreshold: 3
|
||||
|
||||
# Monitoring configuration
|
||||
monitoring:
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ curl -fsSL http://<pulse-ip>:7655/install.sh | \
|
|||
| `--insecure` | `PULSE_INSECURE_SKIP_VERIFY` | Skip TLS verification | `false` |
|
||||
| `--hostname` | `PULSE_HOSTNAME` | Override hostname | *(OS hostname)* |
|
||||
| `--agent-id` | `PULSE_AGENT_ID` | Unique agent identifier | *(machine-id)* |
|
||||
| `--health-addr` | `PULSE_HEALTH_ADDR` | Health/metrics server address | `:9191` |
|
||||
|
||||
## Installation Options
|
||||
|
||||
|
|
@ -105,6 +106,47 @@ The install script automatically removes legacy agents when installing the unifi
|
|||
|
||||
No manual cleanup is required.
|
||||
|
||||
## Health Checks & Metrics
|
||||
|
||||
The agent exposes HTTP endpoints for health checks and Prometheus metrics on port 9191 by default.
|
||||
|
||||
### Endpoints
|
||||
|
||||
| Endpoint | Description |
|
||||
|----------|-------------|
|
||||
| `/healthz` | Liveness probe - returns 200 if agent is running |
|
||||
| `/readyz` | Readiness probe - returns 200 when agents are initialized |
|
||||
| `/metrics` | Prometheus metrics |
|
||||
|
||||
### Prometheus Metrics
|
||||
|
||||
| Metric | Type | Description |
|
||||
|--------|------|-------------|
|
||||
| `pulse_agent_info` | Gauge | Agent info with version, host_enabled, docker_enabled labels |
|
||||
| `pulse_agent_up` | Gauge | 1 when running, 0 when shutting down |
|
||||
|
||||
### Kubernetes Probes
|
||||
|
||||
```yaml
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 9191
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /readyz
|
||||
port: 9191
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
```
|
||||
|
||||
### Disable Health Server
|
||||
|
||||
Set `--health-addr=""` or `PULSE_HEALTH_ADDR=""` to disable the health/metrics server.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Agent Not Updating
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue