diff --git a/plugins/ms-ai-architect/scripts/kb-update/lib/lock-file.mjs b/plugins/ms-ai-architect/scripts/kb-update/lib/lock-file.mjs new file mode 100644 index 0000000..850562d --- /dev/null +++ b/plugins/ms-ai-architect/scripts/kb-update/lib/lock-file.mjs @@ -0,0 +1,166 @@ +// lock-file.mjs — Exclusive lock with PID + mtime stale-detection. +// Zero dependencies. Uses fs.writeFileSync('wx') for atomic exclusive create. +// Stale-detection is OR-based: stale if PID is dead OR mtime exceeds threshold. +// Either condition alone is enough to break the lock — handles SIGKILL orphans +// (mtime alone) and PID-reuse races (mtime alone) and crashed-then-replaced +// runs (PID alone). Long runs may opt-in to mtime refresh via refreshIntervalMs. + +import { writeFileSync, readFileSync, statSync, unlinkSync, utimesSync } from 'node:fs'; +import { hostname } from 'node:os'; +import { join } from 'node:path'; +import { getCacheDir } from './cross-platform-paths.mjs'; + +const DEFAULT_STALE_THRESHOLD_MS = 60 * 60 * 1000; // 1 hour +const DEFAULT_LOCK_NAME = 'kb-update.lock'; + +/** + * Check whether a PID identifies a live process. + * @param {number} pid — POSIX process id + * @returns {boolean} + */ +export function isPidAlive(pid) { + if (typeof pid !== 'number' || !Number.isFinite(pid) || pid <= 0) { + return false; + } + try { + process.kill(pid, 0); + return true; + } catch (err) { + // EPERM means the process exists but we lack signal permission — still alive. + return err && err.code === 'EPERM'; + } +} + +function safeReadLock(lockPath) { + try { + return JSON.parse(readFileSync(lockPath, 'utf8')); + } catch { + return null; + } +} + +function lockMtimeMs(lockPath) { + try { + return statSync(lockPath).mtimeMs; + } catch { + return null; + } +} + +function writeLockFile(lockPath) { + writeFileSync( + lockPath, + JSON.stringify({ + pid: process.pid, + started: Date.now(), + host: hostname(), + version: 1, + }), + { flag: 'wx', encoding: 'utf8' } + ); +} + +/** + * Acquire an exclusive lock. Throws ELOCKED if held by a live, fresh holder. + * Cleans up stale locks (dead PID OR mtime older than staleThresholdMs). + * + * @param {string} [lockPath] — absolute lock-file path; defaults to /kb-update.lock + * @param {object} [opts] + * @param {number} [opts.staleThresholdMs] — default 3600000 (1h) + * @param {number} [opts.refreshIntervalMs] — if > 0, periodically utimes the lock + * @param {boolean} [opts.registerCleanup] — default true; install exit/signal handlers + * @returns {{lockPath: string, release: () => void}} + */ +export function acquireLock(lockPath, opts = {}) { + const staleThresholdMs = opts.staleThresholdMs ?? DEFAULT_STALE_THRESHOLD_MS; + const refreshIntervalMs = opts.refreshIntervalMs ?? 0; + const registerCleanup = opts.registerCleanup ?? true; + const path = lockPath || join(getCacheDir('ms-ai-architect'), DEFAULT_LOCK_NAME); + + try { + writeLockFile(path); + } catch (err) { + if (!err || err.code !== 'EEXIST') throw err; + + const data = safeReadLock(path); + const mtime = lockMtimeMs(path); + const holderPid = typeof data?.pid === 'number' ? data.pid : null; + const pidAlive = holderPid != null ? isPidAlive(holderPid) : false; + const ageMs = mtime != null ? Date.now() - mtime : Infinity; + const stale = !pidAlive || ageMs > staleThresholdMs; + + if (!stale) { + const e = new Error( + `Lock held by PID ${holderPid} (started ${data?.started ?? 'unknown'})` + ); + e.code = 'ELOCKED'; + e.holderPid = holderPid; + throw e; + } + + try { + unlinkSync(path); + } catch { + // best-effort + } + writeLockFile(path); // retry once + } + + let refreshTimer = null; + let released = false; + + const release = () => { + if (released) return; + released = true; + if (refreshTimer) { + clearInterval(refreshTimer); + refreshTimer = null; + } + try { + const data = safeReadLock(path); + if (!data || data.pid === process.pid) { + unlinkSync(path); + } + } catch { + // best-effort + } + }; + + if (refreshIntervalMs > 0) { + refreshTimer = setInterval(() => { + try { + const now = new Date(); + utimesSync(path, now, now); + } catch { + // best-effort + } + }, refreshIntervalMs); + if (typeof refreshTimer.unref === 'function') { + refreshTimer.unref(); + } + } + + if (registerCleanup) { + const onExit = () => release(); + process.once('exit', onExit); + process.once('SIGINT', () => { + release(); + process.exit(130); + }); + process.once('SIGTERM', () => { + release(); + process.exit(143); + }); + process.once('SIGHUP', () => { + release(); + process.exit(129); + }); + process.once('uncaughtException', (err) => { + release(); + console.error(err); + process.exit(1); + }); + } + + return { lockPath: path, release }; +} diff --git a/plugins/ms-ai-architect/tests/kb-update/test-lock-file.test.mjs b/plugins/ms-ai-architect/tests/kb-update/test-lock-file.test.mjs new file mode 100644 index 0000000..37ae57b --- /dev/null +++ b/plugins/ms-ai-architect/tests/kb-update/test-lock-file.test.mjs @@ -0,0 +1,192 @@ +// tests/kb-update/test-lock-file.test.mjs +// Unit tests for scripts/kb-update/lib/lock-file.mjs + +import { test } from 'node:test'; +import assert from 'node:assert/strict'; +import { + mkdtempSync, + rmSync, + writeFileSync, + readFileSync, + existsSync, + utimesSync, +} from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { + acquireLock, + isPidAlive, +} from '../../scripts/kb-update/lib/lock-file.mjs'; + +const DEAD_PID = 99999999; // far above typical PID_MAX; reliably non-existent + +function withTmp(fn) { + const dir = mkdtempSync(join(tmpdir(), 'lf-test-')); + try { + return fn(dir); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +} + +function writeFakeLock(path, { pid, started, host = 'test-host', ageMs = 0 }) { + writeFileSync( + path, + JSON.stringify({ + pid, + started: started ?? Date.now() - ageMs, + host, + version: 1, + }), + 'utf8' + ); + if (ageMs > 0) { + const past = new Date(Date.now() - ageMs); + utimesSync(path, past, past); + } +} + +test('isPidAlive — current process is alive', () => { + assert.equal(isPidAlive(process.pid), true); +}); + +test('isPidAlive — non-existent PID is dead', () => { + assert.equal(isPidAlive(DEAD_PID), false); +}); + +test('isPidAlive — invalid input is dead', () => { + assert.equal(isPidAlive(0), false); + assert.equal(isPidAlive(-1), false); + assert.equal(isPidAlive(NaN), false); + assert.equal(isPidAlive(undefined), false); +}); + +test('acquireLock — creates lock file with current PID metadata', () => { + withTmp((dir) => { + const path = join(dir, 'test.lock'); + const lock = acquireLock(path, { registerCleanup: false }); + try { + assert.equal(lock.lockPath, path); + assert.equal(existsSync(path), true); + const data = JSON.parse(readFileSync(path, 'utf8')); + assert.equal(data.pid, process.pid); + assert.equal(data.version, 1); + assert.equal(typeof data.started, 'number'); + assert.equal(typeof data.host, 'string'); + } finally { + lock.release(); + } + }); +}); + +test('acquireLock — second call same process throws ELOCKED', () => { + withTmp((dir) => { + const path = join(dir, 'test.lock'); + const lock = acquireLock(path, { registerCleanup: false }); + try { + assert.throws( + () => acquireLock(path, { registerCleanup: false }), + (err) => err.code === 'ELOCKED' && err.holderPid === process.pid + ); + } finally { + lock.release(); + } + }); +}); + +test('acquireLock — concurrent live holder (fixture lock-fil) throws ELOCKED', () => { + withTmp((dir) => { + const path = join(dir, 'test.lock'); + // Pre-write a lock as if held by another live process (we use process.pid + // as a stand-in for "guaranteed alive" without forking). + writeFakeLock(path, { pid: process.pid, ageMs: 0 }); + assert.throws( + () => acquireLock(path, { registerCleanup: false }), + (err) => err.code === 'ELOCKED' + ); + }); +}); + +test('acquireLock — release deletes the lock file', () => { + withTmp((dir) => { + const path = join(dir, 'test.lock'); + const lock = acquireLock(path, { registerCleanup: false }); + assert.equal(existsSync(path), true); + lock.release(); + assert.equal(existsSync(path), false); + }); +}); + +test('acquireLock — release on already-released lock is a no-op', () => { + withTmp((dir) => { + const path = join(dir, 'test.lock'); + const lock = acquireLock(path, { registerCleanup: false }); + lock.release(); + // Second release must not throw. + lock.release(); + assert.equal(existsSync(path), false); + }); +}); + +test('acquireLock — stale lock with dead PID + old mtime is cleaned', () => { + withTmp((dir) => { + const path = join(dir, 'test.lock'); + writeFakeLock(path, { pid: DEAD_PID, ageMs: 2 * 60 * 60 * 1000 }); + const lock = acquireLock(path, { registerCleanup: false }); + try { + const data = JSON.parse(readFileSync(path, 'utf8')); + assert.equal(data.pid, process.pid); + } finally { + lock.release(); + } + }); +}); + +test('acquireLock — stale lock with live PID but old mtime is also cleaned', () => { + withTmp((dir) => { + const path = join(dir, 'test.lock'); + // Live PID (us) but mtime older than default 1h threshold. + writeFakeLock(path, { pid: process.pid, ageMs: 2 * 60 * 60 * 1000 }); + const lock = acquireLock(path, { registerCleanup: false }); + try { + const data = JSON.parse(readFileSync(path, 'utf8')); + assert.equal(data.pid, process.pid); + // started is rewritten to fresh wallclock + assert.ok(Date.now() - data.started < 5000); + } finally { + lock.release(); + } + }); +}); + +test('acquireLock — fresh lock with live PID is NOT cleaned', () => { + withTmp((dir) => { + const path = join(dir, 'test.lock'); + writeFakeLock(path, { pid: process.pid, ageMs: 0 }); + assert.throws( + () => acquireLock(path, { registerCleanup: false }), + (err) => err.code === 'ELOCKED' && err.holderPid === process.pid + ); + }); +}); + +test('acquireLock — staleThresholdMs is honored', () => { + withTmp((dir) => { + const path = join(dir, 'test.lock'); + // 5s-old, live PID. Default 1h threshold → not stale → ELOCKED. + writeFakeLock(path, { pid: process.pid, ageMs: 5_000 }); + assert.throws( + () => acquireLock(path, { registerCleanup: false }), + (err) => err.code === 'ELOCKED' + ); + + // Same fixture but threshold 1s → stale → cleaned. + writeFakeLock(path, { pid: process.pid, ageMs: 5_000 }); + const lock = acquireLock(path, { + registerCleanup: false, + staleThresholdMs: 1_000, + }); + lock.release(); + assert.equal(existsSync(path), false); + }); +});