feat(ms-ai-architect): add lib/lock-file with PID+mtime stale detection [skip-docs]
Foundation lib for v1.12.0 cron rewrite. Atomic exclusive create via
fs.writeFileSync('wx'); on EEXIST resolves staleness with OR semantics:
stale if PID is dead OR mtime exceeds threshold. Either alone breaks the
lock — handles SIGKILL orphans (mtime), PID-reuse races (mtime), and
crashed-then-replaced runs (PID).
- acquireLock(lockPath, opts) → {lockPath, release()}
- staleThresholdMs default 1h; refreshIntervalMs opt-in for long runs
- registerCleanup default true (exit/SIGINT/SIGTERM/SIGHUP/uncaughtException)
- isPidAlive uses kill(pid, 0) with EPERM-as-alive nuance
12/12 tests pass: PID liveness, fixture concurrency, idempotent release,
stale variants (dead+old, live+old, fresh+live), staleThresholdMs honored.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
4aac89ca11
commit
3e26b94a27
2 changed files with 358 additions and 0 deletions
166
plugins/ms-ai-architect/scripts/kb-update/lib/lock-file.mjs
Normal file
166
plugins/ms-ai-architect/scripts/kb-update/lib/lock-file.mjs
Normal file
|
|
@ -0,0 +1,166 @@
|
|||
// lock-file.mjs — Exclusive lock with PID + mtime stale-detection.
|
||||
// Zero dependencies. Uses fs.writeFileSync('wx') for atomic exclusive create.
|
||||
// Stale-detection is OR-based: stale if PID is dead OR mtime exceeds threshold.
|
||||
// Either condition alone is enough to break the lock — handles SIGKILL orphans
|
||||
// (mtime alone) and PID-reuse races (mtime alone) and crashed-then-replaced
|
||||
// runs (PID alone). Long runs may opt-in to mtime refresh via refreshIntervalMs.
|
||||
|
||||
import { writeFileSync, readFileSync, statSync, unlinkSync, utimesSync } from 'node:fs';
|
||||
import { hostname } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { getCacheDir } from './cross-platform-paths.mjs';
|
||||
|
||||
const DEFAULT_STALE_THRESHOLD_MS = 60 * 60 * 1000; // 1 hour
|
||||
const DEFAULT_LOCK_NAME = 'kb-update.lock';
|
||||
|
||||
/**
|
||||
* Check whether a PID identifies a live process.
|
||||
* @param {number} pid — POSIX process id
|
||||
* @returns {boolean}
|
||||
*/
|
||||
export function isPidAlive(pid) {
|
||||
if (typeof pid !== 'number' || !Number.isFinite(pid) || pid <= 0) {
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
process.kill(pid, 0);
|
||||
return true;
|
||||
} catch (err) {
|
||||
// EPERM means the process exists but we lack signal permission — still alive.
|
||||
return err && err.code === 'EPERM';
|
||||
}
|
||||
}
|
||||
|
||||
function safeReadLock(lockPath) {
|
||||
try {
|
||||
return JSON.parse(readFileSync(lockPath, 'utf8'));
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function lockMtimeMs(lockPath) {
|
||||
try {
|
||||
return statSync(lockPath).mtimeMs;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function writeLockFile(lockPath) {
|
||||
writeFileSync(
|
||||
lockPath,
|
||||
JSON.stringify({
|
||||
pid: process.pid,
|
||||
started: Date.now(),
|
||||
host: hostname(),
|
||||
version: 1,
|
||||
}),
|
||||
{ flag: 'wx', encoding: 'utf8' }
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Acquire an exclusive lock. Throws ELOCKED if held by a live, fresh holder.
|
||||
* Cleans up stale locks (dead PID OR mtime older than staleThresholdMs).
|
||||
*
|
||||
* @param {string} [lockPath] — absolute lock-file path; defaults to <cache>/kb-update.lock
|
||||
* @param {object} [opts]
|
||||
* @param {number} [opts.staleThresholdMs] — default 3600000 (1h)
|
||||
* @param {number} [opts.refreshIntervalMs] — if > 0, periodically utimes the lock
|
||||
* @param {boolean} [opts.registerCleanup] — default true; install exit/signal handlers
|
||||
* @returns {{lockPath: string, release: () => void}}
|
||||
*/
|
||||
export function acquireLock(lockPath, opts = {}) {
|
||||
const staleThresholdMs = opts.staleThresholdMs ?? DEFAULT_STALE_THRESHOLD_MS;
|
||||
const refreshIntervalMs = opts.refreshIntervalMs ?? 0;
|
||||
const registerCleanup = opts.registerCleanup ?? true;
|
||||
const path = lockPath || join(getCacheDir('ms-ai-architect'), DEFAULT_LOCK_NAME);
|
||||
|
||||
try {
|
||||
writeLockFile(path);
|
||||
} catch (err) {
|
||||
if (!err || err.code !== 'EEXIST') throw err;
|
||||
|
||||
const data = safeReadLock(path);
|
||||
const mtime = lockMtimeMs(path);
|
||||
const holderPid = typeof data?.pid === 'number' ? data.pid : null;
|
||||
const pidAlive = holderPid != null ? isPidAlive(holderPid) : false;
|
||||
const ageMs = mtime != null ? Date.now() - mtime : Infinity;
|
||||
const stale = !pidAlive || ageMs > staleThresholdMs;
|
||||
|
||||
if (!stale) {
|
||||
const e = new Error(
|
||||
`Lock held by PID ${holderPid} (started ${data?.started ?? 'unknown'})`
|
||||
);
|
||||
e.code = 'ELOCKED';
|
||||
e.holderPid = holderPid;
|
||||
throw e;
|
||||
}
|
||||
|
||||
try {
|
||||
unlinkSync(path);
|
||||
} catch {
|
||||
// best-effort
|
||||
}
|
||||
writeLockFile(path); // retry once
|
||||
}
|
||||
|
||||
let refreshTimer = null;
|
||||
let released = false;
|
||||
|
||||
const release = () => {
|
||||
if (released) return;
|
||||
released = true;
|
||||
if (refreshTimer) {
|
||||
clearInterval(refreshTimer);
|
||||
refreshTimer = null;
|
||||
}
|
||||
try {
|
||||
const data = safeReadLock(path);
|
||||
if (!data || data.pid === process.pid) {
|
||||
unlinkSync(path);
|
||||
}
|
||||
} catch {
|
||||
// best-effort
|
||||
}
|
||||
};
|
||||
|
||||
if (refreshIntervalMs > 0) {
|
||||
refreshTimer = setInterval(() => {
|
||||
try {
|
||||
const now = new Date();
|
||||
utimesSync(path, now, now);
|
||||
} catch {
|
||||
// best-effort
|
||||
}
|
||||
}, refreshIntervalMs);
|
||||
if (typeof refreshTimer.unref === 'function') {
|
||||
refreshTimer.unref();
|
||||
}
|
||||
}
|
||||
|
||||
if (registerCleanup) {
|
||||
const onExit = () => release();
|
||||
process.once('exit', onExit);
|
||||
process.once('SIGINT', () => {
|
||||
release();
|
||||
process.exit(130);
|
||||
});
|
||||
process.once('SIGTERM', () => {
|
||||
release();
|
||||
process.exit(143);
|
||||
});
|
||||
process.once('SIGHUP', () => {
|
||||
release();
|
||||
process.exit(129);
|
||||
});
|
||||
process.once('uncaughtException', (err) => {
|
||||
release();
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
return { lockPath: path, release };
|
||||
}
|
||||
192
plugins/ms-ai-architect/tests/kb-update/test-lock-file.test.mjs
Normal file
192
plugins/ms-ai-architect/tests/kb-update/test-lock-file.test.mjs
Normal file
|
|
@ -0,0 +1,192 @@
|
|||
// tests/kb-update/test-lock-file.test.mjs
|
||||
// Unit tests for scripts/kb-update/lib/lock-file.mjs
|
||||
|
||||
import { test } from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import {
|
||||
mkdtempSync,
|
||||
rmSync,
|
||||
writeFileSync,
|
||||
readFileSync,
|
||||
existsSync,
|
||||
utimesSync,
|
||||
} from 'node:fs';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import {
|
||||
acquireLock,
|
||||
isPidAlive,
|
||||
} from '../../scripts/kb-update/lib/lock-file.mjs';
|
||||
|
||||
const DEAD_PID = 99999999; // far above typical PID_MAX; reliably non-existent
|
||||
|
||||
function withTmp(fn) {
|
||||
const dir = mkdtempSync(join(tmpdir(), 'lf-test-'));
|
||||
try {
|
||||
return fn(dir);
|
||||
} finally {
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
|
||||
function writeFakeLock(path, { pid, started, host = 'test-host', ageMs = 0 }) {
|
||||
writeFileSync(
|
||||
path,
|
||||
JSON.stringify({
|
||||
pid,
|
||||
started: started ?? Date.now() - ageMs,
|
||||
host,
|
||||
version: 1,
|
||||
}),
|
||||
'utf8'
|
||||
);
|
||||
if (ageMs > 0) {
|
||||
const past = new Date(Date.now() - ageMs);
|
||||
utimesSync(path, past, past);
|
||||
}
|
||||
}
|
||||
|
||||
test('isPidAlive — current process is alive', () => {
|
||||
assert.equal(isPidAlive(process.pid), true);
|
||||
});
|
||||
|
||||
test('isPidAlive — non-existent PID is dead', () => {
|
||||
assert.equal(isPidAlive(DEAD_PID), false);
|
||||
});
|
||||
|
||||
test('isPidAlive — invalid input is dead', () => {
|
||||
assert.equal(isPidAlive(0), false);
|
||||
assert.equal(isPidAlive(-1), false);
|
||||
assert.equal(isPidAlive(NaN), false);
|
||||
assert.equal(isPidAlive(undefined), false);
|
||||
});
|
||||
|
||||
test('acquireLock — creates lock file with current PID metadata', () => {
|
||||
withTmp((dir) => {
|
||||
const path = join(dir, 'test.lock');
|
||||
const lock = acquireLock(path, { registerCleanup: false });
|
||||
try {
|
||||
assert.equal(lock.lockPath, path);
|
||||
assert.equal(existsSync(path), true);
|
||||
const data = JSON.parse(readFileSync(path, 'utf8'));
|
||||
assert.equal(data.pid, process.pid);
|
||||
assert.equal(data.version, 1);
|
||||
assert.equal(typeof data.started, 'number');
|
||||
assert.equal(typeof data.host, 'string');
|
||||
} finally {
|
||||
lock.release();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
test('acquireLock — second call same process throws ELOCKED', () => {
|
||||
withTmp((dir) => {
|
||||
const path = join(dir, 'test.lock');
|
||||
const lock = acquireLock(path, { registerCleanup: false });
|
||||
try {
|
||||
assert.throws(
|
||||
() => acquireLock(path, { registerCleanup: false }),
|
||||
(err) => err.code === 'ELOCKED' && err.holderPid === process.pid
|
||||
);
|
||||
} finally {
|
||||
lock.release();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
test('acquireLock — concurrent live holder (fixture lock-fil) throws ELOCKED', () => {
|
||||
withTmp((dir) => {
|
||||
const path = join(dir, 'test.lock');
|
||||
// Pre-write a lock as if held by another live process (we use process.pid
|
||||
// as a stand-in for "guaranteed alive" without forking).
|
||||
writeFakeLock(path, { pid: process.pid, ageMs: 0 });
|
||||
assert.throws(
|
||||
() => acquireLock(path, { registerCleanup: false }),
|
||||
(err) => err.code === 'ELOCKED'
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
test('acquireLock — release deletes the lock file', () => {
|
||||
withTmp((dir) => {
|
||||
const path = join(dir, 'test.lock');
|
||||
const lock = acquireLock(path, { registerCleanup: false });
|
||||
assert.equal(existsSync(path), true);
|
||||
lock.release();
|
||||
assert.equal(existsSync(path), false);
|
||||
});
|
||||
});
|
||||
|
||||
test('acquireLock — release on already-released lock is a no-op', () => {
|
||||
withTmp((dir) => {
|
||||
const path = join(dir, 'test.lock');
|
||||
const lock = acquireLock(path, { registerCleanup: false });
|
||||
lock.release();
|
||||
// Second release must not throw.
|
||||
lock.release();
|
||||
assert.equal(existsSync(path), false);
|
||||
});
|
||||
});
|
||||
|
||||
test('acquireLock — stale lock with dead PID + old mtime is cleaned', () => {
|
||||
withTmp((dir) => {
|
||||
const path = join(dir, 'test.lock');
|
||||
writeFakeLock(path, { pid: DEAD_PID, ageMs: 2 * 60 * 60 * 1000 });
|
||||
const lock = acquireLock(path, { registerCleanup: false });
|
||||
try {
|
||||
const data = JSON.parse(readFileSync(path, 'utf8'));
|
||||
assert.equal(data.pid, process.pid);
|
||||
} finally {
|
||||
lock.release();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
test('acquireLock — stale lock with live PID but old mtime is also cleaned', () => {
|
||||
withTmp((dir) => {
|
||||
const path = join(dir, 'test.lock');
|
||||
// Live PID (us) but mtime older than default 1h threshold.
|
||||
writeFakeLock(path, { pid: process.pid, ageMs: 2 * 60 * 60 * 1000 });
|
||||
const lock = acquireLock(path, { registerCleanup: false });
|
||||
try {
|
||||
const data = JSON.parse(readFileSync(path, 'utf8'));
|
||||
assert.equal(data.pid, process.pid);
|
||||
// started is rewritten to fresh wallclock
|
||||
assert.ok(Date.now() - data.started < 5000);
|
||||
} finally {
|
||||
lock.release();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
test('acquireLock — fresh lock with live PID is NOT cleaned', () => {
|
||||
withTmp((dir) => {
|
||||
const path = join(dir, 'test.lock');
|
||||
writeFakeLock(path, { pid: process.pid, ageMs: 0 });
|
||||
assert.throws(
|
||||
() => acquireLock(path, { registerCleanup: false }),
|
||||
(err) => err.code === 'ELOCKED' && err.holderPid === process.pid
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
test('acquireLock — staleThresholdMs is honored', () => {
|
||||
withTmp((dir) => {
|
||||
const path = join(dir, 'test.lock');
|
||||
// 5s-old, live PID. Default 1h threshold → not stale → ELOCKED.
|
||||
writeFakeLock(path, { pid: process.pid, ageMs: 5_000 });
|
||||
assert.throws(
|
||||
() => acquireLock(path, { registerCleanup: false }),
|
||||
(err) => err.code === 'ELOCKED'
|
||||
);
|
||||
|
||||
// Same fixture but threshold 1s → stale → cleaned.
|
||||
writeFakeLock(path, { pid: process.pid, ageMs: 5_000 });
|
||||
const lock = acquireLock(path, {
|
||||
registerCleanup: false,
|
||||
staleThresholdMs: 1_000,
|
||||
});
|
||||
lock.release();
|
||||
assert.equal(existsSync(path), false);
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue