Pulse/pkg/metrics/store_query_plan_test.go
2026-04-10 19:09:12 +01:00

439 lines
16 KiB
Go

package metrics
import (
"database/sql"
"fmt"
"net/url"
"strings"
"testing"
_ "modernc.org/sqlite"
)
// TestQueryPlansUseIndexes validates that all critical metrics-store SQL queries
// use indexed lookups (SEARCH) rather than full table scans (SCAN TABLE).
// This catches index regressions — if a schema migration drops an index or a
// query change breaks index eligibility, these tests fail immediately.
//
// Each sub-test runs EXPLAIN QUERY PLAN on the exact SQL the store uses at
// runtime. Queries are classified by how they should access the table:
// - searchRequired: the plan must use SEARCH (B-tree point/range lookup)
// - allowCoveringIndexScan: the plan may use a covering-index scan, which
// reads only the index without touching the table — acceptable when no
// index has the query's filter columns as a prefix
//
// In all cases, a bare "SCAN TABLE metrics" (full table scan) is rejected.
//
// NOTE: The schema and SQL here are intentionally duplicated from store.go.
// If store.go's schema or queries change, these tests must be updated in
// lockstep — a mismatch means they validate stale SQL.
func TestQueryPlansUseIndexes(t *testing.T) {
db := newPlanTestDB(t)
// farFuture is an int64 timestamp safe on 32-bit platforms.
const farFuture int64 = 9999999999
tests := []struct {
name string
query string
args []any
// wantIndex, if non-empty, asserts this index name appears on the
// same plan line as a SEARCH on the metrics table.
wantIndex string
// allowCoveringIndexScan permits "SCAN metrics USING COVERING INDEX ..."
// for queries where the planner correctly chooses a covering-index
// scan over a table scan. When false, only SEARCH is accepted.
allowCoveringIndexScan bool
// allowIndexScan permits "SCAN metrics USING INDEX ..." (index-ordered
// scan) for queries where the planner uses an index to satisfy GROUP BY
// or ORDER BY without a separate sort, but still reads table rows.
allowIndexScan bool
}{
{
name: "single metric lookup",
query: `SELECT timestamp, value, COALESCE(min_value, value), COALESCE(max_value, value)
FROM metrics
WHERE resource_type = ? AND resource_id = ? AND metric_type = ? AND tier = ?
AND timestamp >= ? AND timestamp <= ?
ORDER BY timestamp ASC`,
args: []any{"vm", "vm-1", "cpu", "raw", int64(0), farFuture},
wantIndex: "idx_metrics_lookup",
},
{
name: "single metric with downsampling",
query: `SELECT
(timestamp / ?) * ? + (? / 2) as bucket_ts,
AVG(value),
MIN(COALESCE(min_value, value)),
MAX(COALESCE(max_value, value))
FROM metrics
WHERE resource_type = ? AND resource_id = ? AND metric_type = ? AND tier = ?
AND timestamp >= ? AND timestamp <= ?
GROUP BY bucket_ts
ORDER BY bucket_ts ASC`,
args: []any{int64(60), int64(60), int64(60), "vm", "vm-1", "cpu", "raw", int64(0), farFuture},
wantIndex: "idx_metrics_lookup",
},
{
name: "multi-metric lookup (QueryAll)",
query: `SELECT metric_type, timestamp, value, COALESCE(min_value, value), COALESCE(max_value, value)
FROM metrics
WHERE resource_type = ? AND resource_id = ? AND tier = ?
AND timestamp >= ? AND timestamp <= ?
ORDER BY metric_type, timestamp ASC`,
args: []any{"vm", "vm-1", "raw", int64(0), farFuture},
// The planner uses an indexed SEARCH — it may pick idx_metrics_query_all,
// idx_metrics_unique, or idx_metrics_lookup depending on statistics.
// All are valid; the key invariant is that it does a SEARCH, not a SCAN.
},
{
name: "multi-metric with downsampling (QueryAll)",
query: `SELECT
metric_type,
(timestamp / ?) * ? + (? / 2) as bucket_ts,
AVG(value),
MIN(COALESCE(min_value, value)),
MAX(COALESCE(max_value, value))
FROM metrics
WHERE resource_type = ? AND resource_id = ? AND tier = ?
AND timestamp >= ? AND timestamp <= ?
GROUP BY metric_type, bucket_ts
ORDER BY metric_type, bucket_ts ASC`,
args: []any{int64(60), int64(60), int64(60), "vm", "vm-1", "raw", int64(0), farFuture},
},
{
name: "multi-resource multi-metric lookup (QueryAllBatch)",
query: `SELECT resource_id, metric_type, timestamp, value, COALESCE(min_value, value), COALESCE(max_value, value)
FROM metrics
WHERE resource_type = ? AND resource_id IN (?, ?, ?) AND tier = ?
AND timestamp >= ? AND timestamp <= ?
ORDER BY resource_id, metric_type, timestamp ASC`,
args: []any{"vm", "vm-1", "vm-2", "vm-3", "raw", int64(0), farFuture},
// QueryAllBatch is the anti-N+1 dashboard path. The planner may choose
// idx_metrics_query_all, idx_metrics_unique, or idx_metrics_lookup
// depending on statistics; the invariant is an indexed SEARCH rather
// than a full table scan.
},
{
name: "multi-resource multi-metric with downsampling (QueryAllBatch)",
query: `SELECT
resource_id,
metric_type,
(timestamp / ?) * ? + (? / 2) as bucket_ts,
AVG(value),
MIN(COALESCE(min_value, value)),
MAX(COALESCE(max_value, value))
FROM metrics
WHERE resource_type = ? AND resource_id IN (?, ?, ?) AND tier = ?
AND timestamp >= ? AND timestamp <= ?
GROUP BY resource_id, metric_type, bucket_ts
ORDER BY resource_id, metric_type, bucket_ts ASC`,
args: []any{int64(60), int64(60), int64(60), "vm", "vm-1", "vm-2", "vm-3", "raw", int64(0), farFuture},
},
{
name: "multi-resource filtered-metric lookup (QueryMetricTypesBatch)",
query: `SELECT resource_id, metric_type, timestamp, value, COALESCE(min_value, value), COALESCE(max_value, value)
FROM metrics
WHERE resource_type = ? AND resource_id IN (?, ?, ?) AND metric_type IN (?, ?) AND tier = ?
AND timestamp >= ? AND timestamp <= ?
ORDER BY resource_id, metric_type, timestamp ASC`,
args: []any{"vm", "vm-1", "vm-2", "vm-3", "cpu", "memory", "raw", int64(0), farFuture},
},
{
name: "retention delete by tier+time",
query: `DELETE FROM metrics WHERE tier = ? AND timestamp < ?`,
args: []any{"raw", farFuture},
wantIndex: "idx_metrics_tier_time",
},
{
name: "rollup candidate discovery (legacy per-candidate path)",
query: `SELECT DISTINCT resource_type, resource_id, metric_type
FROM metrics
WHERE tier = ? AND timestamp >= ? AND timestamp < ?`,
args: []any{"raw", int64(0), farFuture},
// This query filters on (tier, timestamp) which aren't the leading
// columns of most indexes. The planner correctly uses a covering-
// index scan — reading only the index, never the table.
allowCoveringIndexScan: true,
},
{
name: "rollup aggregation insert (legacy per-candidate path)",
query: `INSERT OR IGNORE INTO metrics (resource_type, resource_id, metric_type, value, min_value, max_value, timestamp, tier)
SELECT
resource_type, resource_id, metric_type,
AVG(value) as value,
MIN(value) as min_value,
MAX(value) as max_value,
(timestamp / ?) * ? as bucket_ts,
?
FROM metrics
WHERE resource_type = ? AND resource_id = ? AND metric_type = ?
AND tier = ? AND timestamp >= ? AND timestamp < ?
GROUP BY resource_type, resource_id, metric_type, bucket_ts`,
args: []any{int64(60), int64(60), "minute", "vm", "vm-1", "cpu", "raw", int64(0), farFuture},
wantIndex: "idx_metrics_lookup",
},
{
name: "batched rollup aggregation insert",
query: `INSERT OR IGNORE INTO metrics (resource_type, resource_id, metric_type, value, min_value, max_value, timestamp, tier)
SELECT
resource_type,
resource_id,
metric_type,
AVG(value) as value,
MIN(value) as min_value,
MAX(value) as max_value,
(timestamp / ?) * ? as bucket_ts,
?
FROM metrics
WHERE tier = ? AND timestamp >= ? AND timestamp < ?
GROUP BY resource_type, resource_id, metric_type, bucket_ts`,
args: []any{int64(60), int64(60), "minute", "raw", int64(0), farFuture},
// The SELECT filters on (tier, timestamp) without resource/metric
// columns. SQLite uses an index-ordered scan (SCAN USING INDEX)
// on idx_metrics_unique. A TEMP B-TREE may still be used for
// GROUP BY. The INSERT side uses idx_metrics_unique for ON CONFLICT.
allowIndexScan: true,
},
{
name: "max timestamp for tier (rollup scheduling)",
query: `SELECT MAX(timestamp) FROM metrics WHERE tier = ?`,
args: []any{"raw"},
wantIndex: "idx_metrics_tier_time",
},
{
name: "tier count stats (GetStats)",
query: `SELECT tier, COUNT(*) FROM metrics GROUP BY tier`,
args: []any{},
// GROUP BY tier scans all rows — a covering-index scan on
// idx_metrics_tier_time reads only the index (tier, timestamp),
// avoiding the full table row fetch. A SEARCH is also acceptable
// if the planner chooses a different strategy.
allowCoveringIndexScan: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
plan := explainQueryPlan(t, db, tt.query, tt.args)
// Reject a bare full table scan — this is always bad.
if containsFullTableScan(plan) {
t.Errorf("query uses full table scan on metrics; expected indexed access\nPlan:\n%s", plan)
}
if tt.allowIndexScan {
// Must use SEARCH, covering-index scan, OR index-ordered scan.
if !containsMetricsSearch(plan) && !containsCoveringIndexScan(plan) && !containsIndexScan(plan) {
t.Errorf("query plan has neither SEARCH on metrics nor index scan\nPlan:\n%s", plan)
}
} else if tt.allowCoveringIndexScan {
// Must use EITHER a SEARCH on metrics or a covering-index scan on metrics.
if !containsMetricsSearch(plan) && !containsCoveringIndexScan(plan) {
t.Errorf("query plan has neither SEARCH on metrics nor covering-index scan\nPlan:\n%s", plan)
}
} else {
// Must contain at least one SEARCH on the metrics table.
// We scope to "metrics" to avoid false passes from constraint-
// handling SEARCH nodes in INSERT ... ON CONFLICT plans.
if !containsMetricsSearch(plan) {
t.Errorf("query plan does not contain SEARCH on metrics table; may not be using an index\nPlan:\n%s", plan)
}
}
// If a specific index is expected, verify that the same plan line
// that does a SEARCH on metrics also references that index.
// This prevents false passes where one node SEARCHes via
// conflict handling and a different node uses the expected index.
if tt.wantIndex != "" && !containsMetricsSearchWithIndex(plan, tt.wantIndex) {
t.Errorf("expected SEARCH on metrics using index %q not found in plan\nPlan:\n%s", tt.wantIndex, plan)
}
})
}
}
// lineRefersToMetrics returns true if a plan line references the "metrics"
// table (not metrics_meta). SQLite may emit "SEARCH metrics USING ...",
// "SEARCH TABLE metrics ...", "SCAN metrics ...", or "SCAN TABLE metrics ...".
func lineRefersToMetrics(line string) bool {
// Match "metrics " (with space after), "metrics(" (function-like),
// or "metrics" at end of line. Reject "metrics_meta".
for _, needle := range []string{"metrics ", "metrics("} {
if strings.Contains(line, needle) && !strings.Contains(line, "metrics_meta") {
return true
}
}
// Handle "metrics" at end of line (e.g., bare "SCAN metrics").
if strings.HasSuffix(strings.TrimSpace(line), "metrics") && !strings.Contains(line, "metrics_meta") {
return true
}
return false
}
// containsMetricsSearch returns true if any plan line contains a SEARCH
// specifically on the metrics table (e.g., "SEARCH metrics USING INDEX ..."
// or "SEARCH TABLE metrics USING INDEX ...").
func containsMetricsSearch(plan string) bool {
for _, line := range strings.Split(plan, "\n") {
if strings.Contains(line, "SEARCH") && lineRefersToMetrics(line) {
return true
}
}
return false
}
// containsMetricsSearchWithIndex returns true if a single plan line contains
// a SEARCH on the metrics table AND the given index name. This ensures the
// expected index is used on the metrics-table read path, not on a separate node.
func containsMetricsSearchWithIndex(plan, indexName string) bool {
for _, line := range strings.Split(plan, "\n") {
if strings.Contains(line, "SEARCH") && lineRefersToMetrics(line) && strings.Contains(line, indexName) {
return true
}
}
return false
}
// containsFullTableScan returns true if the plan contains a full table scan
// on the metrics table — "SCAN TABLE metrics" or "SCAN metrics" without a
// "USING ... INDEX" qualifier. A covering-index scan is not a full table scan.
func containsFullTableScan(plan string) bool {
for _, line := range strings.Split(plan, "\n") {
if !strings.Contains(line, "SCAN") || !lineRefersToMetrics(line) {
continue
}
// A covering-index scan is acceptable — it reads only the index.
if strings.Contains(line, "USING") && strings.Contains(line, "INDEX") {
continue
}
// Bare SCAN TABLE metrics or SCAN metrics without index = full scan.
return true
}
return false
}
// containsCoveringIndexScan returns true if any plan line shows a covering-
// index scan on the metrics table (e.g., "SCAN metrics USING COVERING INDEX ...").
func containsCoveringIndexScan(plan string) bool {
for _, line := range strings.Split(plan, "\n") {
if strings.Contains(line, "SCAN") && lineRefersToMetrics(line) && strings.Contains(line, "COVERING INDEX") {
return true
}
}
return false
}
// containsIndexScan returns true if any plan line shows an index-ordered scan
// on the metrics table (e.g., "SCAN metrics USING INDEX idx_metrics_unique").
// This differs from a covering-index scan in that it reads table rows via the
// index, but still avoids a full table scan.
func containsIndexScan(plan string) bool {
for _, line := range strings.Split(plan, "\n") {
if strings.Contains(line, "SCAN") && lineRefersToMetrics(line) && strings.Contains(line, "USING") && strings.Contains(line, "INDEX") {
return true
}
}
return false
}
// newPlanTestDB creates an in-memory SQLite database with the metrics store
// schema. It uses the same schema and indexes as store.initSchema().
func newPlanTestDB(t *testing.T) *sql.DB {
t.Helper()
dsn := ":memory:?" + url.Values{
"_pragma": []string{
"busy_timeout(5000)",
"journal_mode(WAL)",
},
}.Encode()
db, err := sql.Open("sqlite", dsn)
if err != nil {
t.Fatalf("open in-memory db: %v", err)
}
// Pin to a single connection — a second connection to :memory: would get
// a different database, causing "no such table" failures.
db.SetMaxOpenConns(1)
t.Cleanup(func() { db.Close() })
schema := `
CREATE TABLE IF NOT EXISTS metrics (
id INTEGER PRIMARY KEY AUTOINCREMENT,
resource_type TEXT NOT NULL,
resource_id TEXT NOT NULL,
metric_type TEXT NOT NULL,
value REAL NOT NULL,
min_value REAL,
max_value REAL,
timestamp INTEGER NOT NULL,
tier TEXT NOT NULL DEFAULT 'raw'
);
CREATE INDEX IF NOT EXISTS idx_metrics_lookup
ON metrics(resource_type, resource_id, metric_type, tier, timestamp);
CREATE INDEX IF NOT EXISTS idx_metrics_tier_time
ON metrics(tier, timestamp);
CREATE INDEX IF NOT EXISTS idx_metrics_query_all
ON metrics(resource_type, resource_id, tier, timestamp, metric_type);
CREATE UNIQUE INDEX IF NOT EXISTS idx_metrics_unique
ON metrics(resource_type, resource_id, metric_type, timestamp, tier);
CREATE TABLE IF NOT EXISTS metrics_meta (
key TEXT PRIMARY KEY,
value TEXT NOT NULL
);
`
if _, err := db.Exec(schema); err != nil {
t.Fatalf("create schema: %v", err)
}
// Insert a small amount of seed data so the query planner has statistics
// to reason about. Without data, SQLite may choose different plans.
for i := 0; i < 100; i++ {
_, err := db.Exec(
`INSERT INTO metrics (resource_type, resource_id, metric_type, value, timestamp, tier) VALUES (?, ?, ?, ?, ?, ?)`,
"vm", fmt.Sprintf("vm-%d", i%10), "cpu", float64(i), int64(1000000+i*5), "raw",
)
if err != nil {
t.Fatalf("seed data: %v", err)
}
}
// Run ANALYZE so the planner has real statistics.
if _, err := db.Exec("ANALYZE"); err != nil {
t.Fatalf("analyze: %v", err)
}
return db
}
// explainQueryPlan runs EXPLAIN QUERY PLAN on the given query and returns
// the full plan output as a single string.
func explainQueryPlan(t *testing.T, db *sql.DB, query string, args []any) string {
t.Helper()
rows, err := db.Query("EXPLAIN QUERY PLAN "+query, args...)
if err != nil {
t.Fatalf("EXPLAIN QUERY PLAN: %v\nQuery: %s", err, query)
}
defer rows.Close()
var lines []string
for rows.Next() {
var id, parent, notused int
var detail string
if err := rows.Scan(&id, &parent, &notused, &detail); err != nil {
t.Fatalf("scan explain row: %v", err)
}
lines = append(lines, detail)
}
if err := rows.Err(); err != nil {
t.Fatalf("explain rows: %v", err)
}
return strings.Join(lines, "\n")
}