flush sqlite wal on graceful shutdown

This commit is contained in:
R.D. 2026-04-01 20:14:31 -04:00
parent b82f26366c
commit 69bbc0a2a1
9 changed files with 128 additions and 13 deletions

View file

@ -979,6 +979,7 @@ OmniRoute is available as a public Docker image on [Docker Hub](https://hub.dock
docker run -d \
--name omniroute \
--restart unless-stopped \
--stop-timeout 40 \
-p 20128:20128 \
-v omniroute-data:/app/data \
diegosouzapw/omniroute:latest
@ -993,6 +994,7 @@ cp .env.example .env
docker run -d \
--name omniroute \
--restart unless-stopped \
--stop-timeout 40 \
--env-file .env \
-p 20128:20128 \
-v omniroute-data:/app/data \
@ -1016,6 +1018,8 @@ Notes:
- Quick Tunnel URLs are temporary and change after every restart.
- Managed install currently supports Linux, macOS, and Windows on `x64` / `arm64`.
- Docker images bundle system CA roots and pass them to managed `cloudflared`, which avoids TLS trust failures when the tunnel bootstraps inside the container.
- SQLite runs in WAL mode. `docker stop` should be allowed to finish so OmniRoute can checkpoint the latest changes back into `storage.sqlite`.
- The bundled Compose files already set a 40s stop grace period. If you run the image directly, keep `--stop-timeout 40` (or similar) so manual stops do not cut off shutdown cleanup.
- Set `CLOUDFLARED_BIN=/absolute/path/to/cloudflared` if you want OmniRoute to use an existing binary instead of downloading one.
**Using Docker Compose with Caddy (HTTPS Auto-TLS):**

View file

@ -19,6 +19,7 @@ services:
target: runner-cli
image: omniroute:prod
restart: unless-stopped
stop_grace_period: 40s
env_file: .env
environment:
- NODE_ENV=production

View file

@ -17,6 +17,7 @@
x-common: &common
restart: unless-stopped
stop_grace_period: 40s
env_file: .env
environment:
- DATA_DIR=/app/data # Must match the volume mount below

View file

@ -983,6 +983,7 @@ OmniRoute is available as a public Docker image on [Docker Hub](https://hub.dock
docker run -d \
--name omniroute \
--restart unless-stopped \
--stop-timeout 40 \
-p 20128:20128 \
-v omniroute-data:/app/data \
diegosouzapw/omniroute:latest
@ -997,6 +998,7 @@ cp .env.example .env
docker run -d \
--name omniroute \
--restart unless-stopped \
--stop-timeout 40 \
--env-file .env \
-p 20128:20128 \
-v omniroute-data:/app/data \
@ -1020,6 +1022,8 @@ Notes:
- Quick Tunnel URLs are temporary and change after every restart.
- Managed install currently supports Linux, macOS, and Windows on `x64` / `arm64`.
- Docker images bundle system CA roots and pass them to managed `cloudflared`, which avoids TLS trust failures when the tunnel bootstraps inside the container.
- SQLite uses WAL mode. Let `docker stop` finish cleanly so OmniRoute can checkpoint the latest changes back into `storage.sqlite`.
- The bundled Compose files already use a 40s stop grace period. If you run the image directly, keep `--stop-timeout 40` (or similar) so manual stops do not interrupt shutdown cleanup.
- Set `CLOUDFLARED_BIN=/absolute/path/to/cloudflared` if you want OmniRoute to use an existing binary instead of downloading one.
**Using Docker Compose with Caddy (HTTPS Auto-TLS):**

View file

@ -1,9 +1,9 @@
import { NextResponse } from "next/server";
export async function POST() {
// Graceful restart: exit with code 0 so the process manager (pm2/systemd) restarts
// Graceful restart: SIGTERM flows through the shutdown handler before the process manager restarts
setTimeout(() => {
process.exit(0);
process.kill(process.pid, "SIGTERM");
}, 500);
return NextResponse.json({ status: "restarting" });

View file

@ -4,7 +4,7 @@ export async function POST() {
const response = NextResponse.json({ success: true, message: "Shutting down..." });
setTimeout(() => {
process.exit(0);
process.kill(process.pid, "SIGTERM");
}, 500);
return response;

View file

@ -12,6 +12,7 @@ import { runMigrations } from "./migrationRunner";
type SqliteDatabase = import("better-sqlite3").Database;
type JsonRecord = Record<string, unknown>;
type CheckpointMode = "PASSIVE" | "FULL" | "RESTART" | "TRUNCATE";
// ──────────────── Environment Detection ────────────────
@ -323,6 +324,12 @@ function setDb(db: SqliteDatabase | null): void {
}
}
function checkpointDb(db: SqliteDatabase, mode: CheckpointMode = "TRUNCATE"): boolean {
if (isCloud || isBuildPhase || !SQLITE_FILE) return false;
db.pragma(`wal_checkpoint(${mode})`);
return true;
}
function ensureProviderConnectionsColumns(db: SqliteDatabase) {
try {
const columns = db.prepare("PRAGMA table_info(provider_connections)").all() as Array<{
@ -523,15 +530,39 @@ export function getDbInstance(): SqliteDatabase {
return db;
}
export function closeDbInstance(options?: { checkpointMode?: CheckpointMode | null }): boolean {
const db = getDb();
if (!db) return false;
const checkpointMode = options?.checkpointMode ?? "TRUNCATE";
try {
if (checkpointMode) {
try {
if (checkpointDb(db, checkpointMode)) {
console.log(`[DB] SQLite WAL checkpoint completed (${checkpointMode}).`);
}
} catch (error: unknown) {
const message = error instanceof Error ? error.message : String(error);
console.warn(`[DB] WAL checkpoint failed during close (${checkpointMode}):`, message);
}
}
} finally {
try {
if (db.open) db.close();
} finally {
setDb(null);
}
}
return true;
}
/**
* Reset the singleton (used by restore).
*/
export function resetDbInstance() {
const db = getDb();
if (db) {
db.close();
setDb(null);
}
closeDbInstance();
}
// ──────────────── JSON → SQLite Migration ────────────────

View file

@ -96,11 +96,9 @@ async function waitForDrain(): Promise<void> {
*/
async function cleanup(): Promise<void> {
try {
const { getDbInstance } = await import("@/lib/db/core");
const db = getDbInstance();
if (db && typeof db.close === "function") {
db.close();
console.log("[Shutdown] SQLite database closed.");
const { closeDbInstance } = await import("@/lib/db/core");
if (closeDbInstance()) {
console.log("[Shutdown] SQLite database checkpointed and closed.");
}
} catch (err) {
console.error("[Shutdown] Error during cleanup:", (err as Error).message);

View file

@ -19,6 +19,8 @@ const proxyFetch = await import("../../open-sse/utils/proxyFetch.ts");
const proxyDispatcher = await import("../../open-sse/utils/proxyDispatcher.ts");
const proxySettingsRoute = await import("../../src/app/api/settings/proxy/route.ts");
const proxyTestRoute = await import("../../src/app/api/settings/proxy/test/route.ts");
const shutdownRoute = await import("../../src/app/api/shutdown/route.ts");
const restartRoute = await import("../../src/app/api/restart/route.ts");
async function withEnv(name, value, fn) {
const previous = process.env[name];
@ -141,6 +143,80 @@ test(
}
);
test("closeDbInstance checkpoints WAL changes into the primary SQLite file", async () => {
await resetStorage();
const db = core.getDbInstance();
const now = new Date().toISOString();
db.prepare(
"INSERT INTO provider_connections (id, provider, auth_type, name, is_active, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?)"
).run("checkpoint-test-conn", "openai", "apikey", "checkpoint-test", 1, now, now);
core.closeDbInstance();
const snapshotPath = path.join(TEST_DATA_DIR, "storage-snapshot.sqlite");
fs.copyFileSync(core.SQLITE_FILE, snapshotPath);
const Database = (await import("better-sqlite3")).default;
const snapshotDb = new Database(snapshotPath, { readonly: true });
try {
const row = snapshotDb
.prepare("SELECT name FROM provider_connections WHERE id = ?")
.get("checkpoint-test-conn");
assert.equal(row?.name, "checkpoint-test");
} finally {
snapshotDb.close();
}
});
test("shutdown route uses SIGTERM for graceful shutdown", async () => {
const originalKill = process.kill;
const originalSetTimeout = globalThis.setTimeout;
const calls = [];
process.kill = (pid, signal) => {
calls.push({ pid, signal });
return true;
};
globalThis.setTimeout = (callback) => {
callback();
return 0;
};
try {
const response = await shutdownRoute.POST();
assert.equal(response.status, 200);
assert.deepEqual(calls, [{ pid: process.pid, signal: "SIGTERM" }]);
} finally {
process.kill = originalKill;
globalThis.setTimeout = originalSetTimeout;
}
});
test("restart route uses SIGTERM for graceful restart", async () => {
const originalKill = process.kill;
const originalSetTimeout = globalThis.setTimeout;
const calls = [];
process.kill = (pid, signal) => {
calls.push({ pid, signal });
return true;
};
globalThis.setTimeout = (callback) => {
callback();
return 0;
};
try {
const response = await restartRoute.POST();
assert.equal(response.status, 200);
assert.deepEqual(calls, [{ pid: process.pid, signal: "SIGTERM" }]);
} finally {
process.kill = originalKill;
globalThis.setTimeout = originalSetTimeout;
}
});
test("unlinkFileWithRetry retries EBUSY/EPERM and eventually succeeds", async () => {
const target = path.join(TEST_DATA_DIR, "retry-target.tmp");
fs.writeFileSync(target, "retry-me");