diff --git a/README.md b/README.md index 016444ee..4500a337 100644 --- a/README.md +++ b/README.md @@ -979,6 +979,7 @@ OmniRoute is available as a public Docker image on [Docker Hub](https://hub.dock docker run -d \ --name omniroute \ --restart unless-stopped \ + --stop-timeout 40 \ -p 20128:20128 \ -v omniroute-data:/app/data \ diegosouzapw/omniroute:latest @@ -993,6 +994,7 @@ cp .env.example .env docker run -d \ --name omniroute \ --restart unless-stopped \ + --stop-timeout 40 \ --env-file .env \ -p 20128:20128 \ -v omniroute-data:/app/data \ @@ -1016,6 +1018,8 @@ Notes: - Quick Tunnel URLs are temporary and change after every restart. - Managed install currently supports Linux, macOS, and Windows on `x64` / `arm64`. - Docker images bundle system CA roots and pass them to managed `cloudflared`, which avoids TLS trust failures when the tunnel bootstraps inside the container. +- SQLite runs in WAL mode. `docker stop` should be allowed to finish so OmniRoute can checkpoint the latest changes back into `storage.sqlite`. +- The bundled Compose files already set a 40s stop grace period. If you run the image directly, keep `--stop-timeout 40` (or similar) so manual stops do not cut off shutdown cleanup. - Set `CLOUDFLARED_BIN=/absolute/path/to/cloudflared` if you want OmniRoute to use an existing binary instead of downloading one. **Using Docker Compose with Caddy (HTTPS Auto-TLS):** diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 5674b10c..cb86dc57 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -19,6 +19,7 @@ services: target: runner-cli image: omniroute:prod restart: unless-stopped + stop_grace_period: 40s env_file: .env environment: - NODE_ENV=production diff --git a/docker-compose.yml b/docker-compose.yml index 8bc975b3..96e5be2c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -17,6 +17,7 @@ x-common: &common restart: unless-stopped + stop_grace_period: 40s env_file: .env environment: - DATA_DIR=/app/data # Must match the volume mount below diff --git a/docs/i18n/zh-CN/README.md b/docs/i18n/zh-CN/README.md index a28f829e..56779901 100644 --- a/docs/i18n/zh-CN/README.md +++ b/docs/i18n/zh-CN/README.md @@ -983,6 +983,7 @@ OmniRoute is available as a public Docker image on [Docker Hub](https://hub.dock docker run -d \ --name omniroute \ --restart unless-stopped \ + --stop-timeout 40 \ -p 20128:20128 \ -v omniroute-data:/app/data \ diegosouzapw/omniroute:latest @@ -997,6 +998,7 @@ cp .env.example .env docker run -d \ --name omniroute \ --restart unless-stopped \ + --stop-timeout 40 \ --env-file .env \ -p 20128:20128 \ -v omniroute-data:/app/data \ @@ -1020,6 +1022,8 @@ Notes: - Quick Tunnel URLs are temporary and change after every restart. - Managed install currently supports Linux, macOS, and Windows on `x64` / `arm64`. - Docker images bundle system CA roots and pass them to managed `cloudflared`, which avoids TLS trust failures when the tunnel bootstraps inside the container. +- SQLite uses WAL mode. Let `docker stop` finish cleanly so OmniRoute can checkpoint the latest changes back into `storage.sqlite`. +- The bundled Compose files already use a 40s stop grace period. If you run the image directly, keep `--stop-timeout 40` (or similar) so manual stops do not interrupt shutdown cleanup. - Set `CLOUDFLARED_BIN=/absolute/path/to/cloudflared` if you want OmniRoute to use an existing binary instead of downloading one. **Using Docker Compose with Caddy (HTTPS Auto-TLS):** diff --git a/src/app/api/restart/route.ts b/src/app/api/restart/route.ts index 909ccf44..74f1d3f9 100644 --- a/src/app/api/restart/route.ts +++ b/src/app/api/restart/route.ts @@ -1,9 +1,9 @@ import { NextResponse } from "next/server"; export async function POST() { - // Graceful restart: exit with code 0 so the process manager (pm2/systemd) restarts + // Graceful restart: SIGTERM flows through the shutdown handler before the process manager restarts setTimeout(() => { - process.exit(0); + process.kill(process.pid, "SIGTERM"); }, 500); return NextResponse.json({ status: "restarting" }); diff --git a/src/app/api/shutdown/route.ts b/src/app/api/shutdown/route.ts index ce713401..eddf43c4 100644 --- a/src/app/api/shutdown/route.ts +++ b/src/app/api/shutdown/route.ts @@ -4,7 +4,7 @@ export async function POST() { const response = NextResponse.json({ success: true, message: "Shutting down..." }); setTimeout(() => { - process.exit(0); + process.kill(process.pid, "SIGTERM"); }, 500); return response; diff --git a/src/lib/db/core.ts b/src/lib/db/core.ts index 14d9ee84..90e66d44 100644 --- a/src/lib/db/core.ts +++ b/src/lib/db/core.ts @@ -12,6 +12,7 @@ import { runMigrations } from "./migrationRunner"; type SqliteDatabase = import("better-sqlite3").Database; type JsonRecord = Record; +type CheckpointMode = "PASSIVE" | "FULL" | "RESTART" | "TRUNCATE"; // ──────────────── Environment Detection ──────────────── @@ -323,6 +324,12 @@ function setDb(db: SqliteDatabase | null): void { } } +function checkpointDb(db: SqliteDatabase, mode: CheckpointMode = "TRUNCATE"): boolean { + if (isCloud || isBuildPhase || !SQLITE_FILE) return false; + db.pragma(`wal_checkpoint(${mode})`); + return true; +} + function ensureProviderConnectionsColumns(db: SqliteDatabase) { try { const columns = db.prepare("PRAGMA table_info(provider_connections)").all() as Array<{ @@ -523,15 +530,39 @@ export function getDbInstance(): SqliteDatabase { return db; } +export function closeDbInstance(options?: { checkpointMode?: CheckpointMode | null }): boolean { + const db = getDb(); + if (!db) return false; + + const checkpointMode = options?.checkpointMode ?? "TRUNCATE"; + + try { + if (checkpointMode) { + try { + if (checkpointDb(db, checkpointMode)) { + console.log(`[DB] SQLite WAL checkpoint completed (${checkpointMode}).`); + } + } catch (error: unknown) { + const message = error instanceof Error ? error.message : String(error); + console.warn(`[DB] WAL checkpoint failed during close (${checkpointMode}):`, message); + } + } + } finally { + try { + if (db.open) db.close(); + } finally { + setDb(null); + } + } + + return true; +} + /** * Reset the singleton (used by restore). */ export function resetDbInstance() { - const db = getDb(); - if (db) { - db.close(); - setDb(null); - } + closeDbInstance(); } // ──────────────── JSON → SQLite Migration ──────────────── diff --git a/src/lib/gracefulShutdown.ts b/src/lib/gracefulShutdown.ts index 88291777..8bc09f5d 100644 --- a/src/lib/gracefulShutdown.ts +++ b/src/lib/gracefulShutdown.ts @@ -96,11 +96,9 @@ async function waitForDrain(): Promise { */ async function cleanup(): Promise { try { - const { getDbInstance } = await import("@/lib/db/core"); - const db = getDbInstance(); - if (db && typeof db.close === "function") { - db.close(); - console.log("[Shutdown] SQLite database closed."); + const { closeDbInstance } = await import("@/lib/db/core"); + if (closeDbInstance()) { + console.log("[Shutdown] SQLite database checkpointed and closed."); } } catch (err) { console.error("[Shutdown] Error during cleanup:", (err as Error).message); diff --git a/tests/unit/fixes-p1.test.mjs b/tests/unit/fixes-p1.test.mjs index 636543b0..69e2c7e8 100644 --- a/tests/unit/fixes-p1.test.mjs +++ b/tests/unit/fixes-p1.test.mjs @@ -19,6 +19,8 @@ const proxyFetch = await import("../../open-sse/utils/proxyFetch.ts"); const proxyDispatcher = await import("../../open-sse/utils/proxyDispatcher.ts"); const proxySettingsRoute = await import("../../src/app/api/settings/proxy/route.ts"); const proxyTestRoute = await import("../../src/app/api/settings/proxy/test/route.ts"); +const shutdownRoute = await import("../../src/app/api/shutdown/route.ts"); +const restartRoute = await import("../../src/app/api/restart/route.ts"); async function withEnv(name, value, fn) { const previous = process.env[name]; @@ -141,6 +143,80 @@ test( } ); +test("closeDbInstance checkpoints WAL changes into the primary SQLite file", async () => { + await resetStorage(); + + const db = core.getDbInstance(); + const now = new Date().toISOString(); + db.prepare( + "INSERT INTO provider_connections (id, provider, auth_type, name, is_active, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?)" + ).run("checkpoint-test-conn", "openai", "apikey", "checkpoint-test", 1, now, now); + + core.closeDbInstance(); + + const snapshotPath = path.join(TEST_DATA_DIR, "storage-snapshot.sqlite"); + fs.copyFileSync(core.SQLITE_FILE, snapshotPath); + + const Database = (await import("better-sqlite3")).default; + const snapshotDb = new Database(snapshotPath, { readonly: true }); + try { + const row = snapshotDb + .prepare("SELECT name FROM provider_connections WHERE id = ?") + .get("checkpoint-test-conn"); + assert.equal(row?.name, "checkpoint-test"); + } finally { + snapshotDb.close(); + } +}); + +test("shutdown route uses SIGTERM for graceful shutdown", async () => { + const originalKill = process.kill; + const originalSetTimeout = globalThis.setTimeout; + const calls = []; + + process.kill = (pid, signal) => { + calls.push({ pid, signal }); + return true; + }; + globalThis.setTimeout = (callback) => { + callback(); + return 0; + }; + + try { + const response = await shutdownRoute.POST(); + assert.equal(response.status, 200); + assert.deepEqual(calls, [{ pid: process.pid, signal: "SIGTERM" }]); + } finally { + process.kill = originalKill; + globalThis.setTimeout = originalSetTimeout; + } +}); + +test("restart route uses SIGTERM for graceful restart", async () => { + const originalKill = process.kill; + const originalSetTimeout = globalThis.setTimeout; + const calls = []; + + process.kill = (pid, signal) => { + calls.push({ pid, signal }); + return true; + }; + globalThis.setTimeout = (callback) => { + callback(); + return 0; + }; + + try { + const response = await restartRoute.POST(); + assert.equal(response.status, 200); + assert.deepEqual(calls, [{ pid: process.pid, signal: "SIGTERM" }]); + } finally { + process.kill = originalKill; + globalThis.setTimeout = originalSetTimeout; + } +}); + test("unlinkFileWithRetry retries EBUSY/EPERM and eventually succeeds", async () => { const target = path.join(TEST_DATA_DIR, "retry-target.tmp"); fs.writeFileSync(target, "retry-me");