feat(ms-ai-architect): add lib/lock-file with PID+mtime stale detection [skip-docs]
Foundation lib for v1.12.0 cron rewrite. Atomic exclusive create via
fs.writeFileSync('wx'); on EEXIST resolves staleness with OR semantics:
stale if PID is dead OR mtime exceeds threshold. Either alone breaks the
lock — handles SIGKILL orphans (mtime), PID-reuse races (mtime), and
crashed-then-replaced runs (PID).
- acquireLock(lockPath, opts) → {lockPath, release()}
- staleThresholdMs default 1h; refreshIntervalMs opt-in for long runs
- registerCleanup default true (exit/SIGINT/SIGTERM/SIGHUP/uncaughtException)
- isPidAlive uses kill(pid, 0) with EPERM-as-alive nuance
12/12 tests pass: PID liveness, fixture concurrency, idempotent release,
stale variants (dead+old, live+old, fresh+live), staleThresholdMs honored.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
4aac89ca11
commit
3e26b94a27
2 changed files with 358 additions and 0 deletions
166
plugins/ms-ai-architect/scripts/kb-update/lib/lock-file.mjs
Normal file
166
plugins/ms-ai-architect/scripts/kb-update/lib/lock-file.mjs
Normal file
|
|
@ -0,0 +1,166 @@
|
||||||
|
// lock-file.mjs — Exclusive lock with PID + mtime stale-detection.
|
||||||
|
// Zero dependencies. Uses fs.writeFileSync('wx') for atomic exclusive create.
|
||||||
|
// Stale-detection is OR-based: stale if PID is dead OR mtime exceeds threshold.
|
||||||
|
// Either condition alone is enough to break the lock — handles SIGKILL orphans
|
||||||
|
// (mtime alone) and PID-reuse races (mtime alone) and crashed-then-replaced
|
||||||
|
// runs (PID alone). Long runs may opt-in to mtime refresh via refreshIntervalMs.
|
||||||
|
|
||||||
|
import { writeFileSync, readFileSync, statSync, unlinkSync, utimesSync } from 'node:fs';
|
||||||
|
import { hostname } from 'node:os';
|
||||||
|
import { join } from 'node:path';
|
||||||
|
import { getCacheDir } from './cross-platform-paths.mjs';
|
||||||
|
|
||||||
|
const DEFAULT_STALE_THRESHOLD_MS = 60 * 60 * 1000; // 1 hour
|
||||||
|
const DEFAULT_LOCK_NAME = 'kb-update.lock';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check whether a PID identifies a live process.
|
||||||
|
* @param {number} pid — POSIX process id
|
||||||
|
* @returns {boolean}
|
||||||
|
*/
|
||||||
|
export function isPidAlive(pid) {
|
||||||
|
if (typeof pid !== 'number' || !Number.isFinite(pid) || pid <= 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
process.kill(pid, 0);
|
||||||
|
return true;
|
||||||
|
} catch (err) {
|
||||||
|
// EPERM means the process exists but we lack signal permission — still alive.
|
||||||
|
return err && err.code === 'EPERM';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function safeReadLock(lockPath) {
|
||||||
|
try {
|
||||||
|
return JSON.parse(readFileSync(lockPath, 'utf8'));
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function lockMtimeMs(lockPath) {
|
||||||
|
try {
|
||||||
|
return statSync(lockPath).mtimeMs;
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function writeLockFile(lockPath) {
|
||||||
|
writeFileSync(
|
||||||
|
lockPath,
|
||||||
|
JSON.stringify({
|
||||||
|
pid: process.pid,
|
||||||
|
started: Date.now(),
|
||||||
|
host: hostname(),
|
||||||
|
version: 1,
|
||||||
|
}),
|
||||||
|
{ flag: 'wx', encoding: 'utf8' }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Acquire an exclusive lock. Throws ELOCKED if held by a live, fresh holder.
|
||||||
|
* Cleans up stale locks (dead PID OR mtime older than staleThresholdMs).
|
||||||
|
*
|
||||||
|
* @param {string} [lockPath] — absolute lock-file path; defaults to <cache>/kb-update.lock
|
||||||
|
* @param {object} [opts]
|
||||||
|
* @param {number} [opts.staleThresholdMs] — default 3600000 (1h)
|
||||||
|
* @param {number} [opts.refreshIntervalMs] — if > 0, periodically utimes the lock
|
||||||
|
* @param {boolean} [opts.registerCleanup] — default true; install exit/signal handlers
|
||||||
|
* @returns {{lockPath: string, release: () => void}}
|
||||||
|
*/
|
||||||
|
export function acquireLock(lockPath, opts = {}) {
|
||||||
|
const staleThresholdMs = opts.staleThresholdMs ?? DEFAULT_STALE_THRESHOLD_MS;
|
||||||
|
const refreshIntervalMs = opts.refreshIntervalMs ?? 0;
|
||||||
|
const registerCleanup = opts.registerCleanup ?? true;
|
||||||
|
const path = lockPath || join(getCacheDir('ms-ai-architect'), DEFAULT_LOCK_NAME);
|
||||||
|
|
||||||
|
try {
|
||||||
|
writeLockFile(path);
|
||||||
|
} catch (err) {
|
||||||
|
if (!err || err.code !== 'EEXIST') throw err;
|
||||||
|
|
||||||
|
const data = safeReadLock(path);
|
||||||
|
const mtime = lockMtimeMs(path);
|
||||||
|
const holderPid = typeof data?.pid === 'number' ? data.pid : null;
|
||||||
|
const pidAlive = holderPid != null ? isPidAlive(holderPid) : false;
|
||||||
|
const ageMs = mtime != null ? Date.now() - mtime : Infinity;
|
||||||
|
const stale = !pidAlive || ageMs > staleThresholdMs;
|
||||||
|
|
||||||
|
if (!stale) {
|
||||||
|
const e = new Error(
|
||||||
|
`Lock held by PID ${holderPid} (started ${data?.started ?? 'unknown'})`
|
||||||
|
);
|
||||||
|
e.code = 'ELOCKED';
|
||||||
|
e.holderPid = holderPid;
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
unlinkSync(path);
|
||||||
|
} catch {
|
||||||
|
// best-effort
|
||||||
|
}
|
||||||
|
writeLockFile(path); // retry once
|
||||||
|
}
|
||||||
|
|
||||||
|
let refreshTimer = null;
|
||||||
|
let released = false;
|
||||||
|
|
||||||
|
const release = () => {
|
||||||
|
if (released) return;
|
||||||
|
released = true;
|
||||||
|
if (refreshTimer) {
|
||||||
|
clearInterval(refreshTimer);
|
||||||
|
refreshTimer = null;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const data = safeReadLock(path);
|
||||||
|
if (!data || data.pid === process.pid) {
|
||||||
|
unlinkSync(path);
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// best-effort
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if (refreshIntervalMs > 0) {
|
||||||
|
refreshTimer = setInterval(() => {
|
||||||
|
try {
|
||||||
|
const now = new Date();
|
||||||
|
utimesSync(path, now, now);
|
||||||
|
} catch {
|
||||||
|
// best-effort
|
||||||
|
}
|
||||||
|
}, refreshIntervalMs);
|
||||||
|
if (typeof refreshTimer.unref === 'function') {
|
||||||
|
refreshTimer.unref();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (registerCleanup) {
|
||||||
|
const onExit = () => release();
|
||||||
|
process.once('exit', onExit);
|
||||||
|
process.once('SIGINT', () => {
|
||||||
|
release();
|
||||||
|
process.exit(130);
|
||||||
|
});
|
||||||
|
process.once('SIGTERM', () => {
|
||||||
|
release();
|
||||||
|
process.exit(143);
|
||||||
|
});
|
||||||
|
process.once('SIGHUP', () => {
|
||||||
|
release();
|
||||||
|
process.exit(129);
|
||||||
|
});
|
||||||
|
process.once('uncaughtException', (err) => {
|
||||||
|
release();
|
||||||
|
console.error(err);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return { lockPath: path, release };
|
||||||
|
}
|
||||||
192
plugins/ms-ai-architect/tests/kb-update/test-lock-file.test.mjs
Normal file
192
plugins/ms-ai-architect/tests/kb-update/test-lock-file.test.mjs
Normal file
|
|
@ -0,0 +1,192 @@
|
||||||
|
// tests/kb-update/test-lock-file.test.mjs
|
||||||
|
// Unit tests for scripts/kb-update/lib/lock-file.mjs
|
||||||
|
|
||||||
|
import { test } from 'node:test';
|
||||||
|
import assert from 'node:assert/strict';
|
||||||
|
import {
|
||||||
|
mkdtempSync,
|
||||||
|
rmSync,
|
||||||
|
writeFileSync,
|
||||||
|
readFileSync,
|
||||||
|
existsSync,
|
||||||
|
utimesSync,
|
||||||
|
} from 'node:fs';
|
||||||
|
import { tmpdir } from 'node:os';
|
||||||
|
import { join } from 'node:path';
|
||||||
|
import {
|
||||||
|
acquireLock,
|
||||||
|
isPidAlive,
|
||||||
|
} from '../../scripts/kb-update/lib/lock-file.mjs';
|
||||||
|
|
||||||
|
const DEAD_PID = 99999999; // far above typical PID_MAX; reliably non-existent
|
||||||
|
|
||||||
|
function withTmp(fn) {
|
||||||
|
const dir = mkdtempSync(join(tmpdir(), 'lf-test-'));
|
||||||
|
try {
|
||||||
|
return fn(dir);
|
||||||
|
} finally {
|
||||||
|
rmSync(dir, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function writeFakeLock(path, { pid, started, host = 'test-host', ageMs = 0 }) {
|
||||||
|
writeFileSync(
|
||||||
|
path,
|
||||||
|
JSON.stringify({
|
||||||
|
pid,
|
||||||
|
started: started ?? Date.now() - ageMs,
|
||||||
|
host,
|
||||||
|
version: 1,
|
||||||
|
}),
|
||||||
|
'utf8'
|
||||||
|
);
|
||||||
|
if (ageMs > 0) {
|
||||||
|
const past = new Date(Date.now() - ageMs);
|
||||||
|
utimesSync(path, past, past);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test('isPidAlive — current process is alive', () => {
|
||||||
|
assert.equal(isPidAlive(process.pid), true);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('isPidAlive — non-existent PID is dead', () => {
|
||||||
|
assert.equal(isPidAlive(DEAD_PID), false);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('isPidAlive — invalid input is dead', () => {
|
||||||
|
assert.equal(isPidAlive(0), false);
|
||||||
|
assert.equal(isPidAlive(-1), false);
|
||||||
|
assert.equal(isPidAlive(NaN), false);
|
||||||
|
assert.equal(isPidAlive(undefined), false);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('acquireLock — creates lock file with current PID metadata', () => {
|
||||||
|
withTmp((dir) => {
|
||||||
|
const path = join(dir, 'test.lock');
|
||||||
|
const lock = acquireLock(path, { registerCleanup: false });
|
||||||
|
try {
|
||||||
|
assert.equal(lock.lockPath, path);
|
||||||
|
assert.equal(existsSync(path), true);
|
||||||
|
const data = JSON.parse(readFileSync(path, 'utf8'));
|
||||||
|
assert.equal(data.pid, process.pid);
|
||||||
|
assert.equal(data.version, 1);
|
||||||
|
assert.equal(typeof data.started, 'number');
|
||||||
|
assert.equal(typeof data.host, 'string');
|
||||||
|
} finally {
|
||||||
|
lock.release();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
test('acquireLock — second call same process throws ELOCKED', () => {
|
||||||
|
withTmp((dir) => {
|
||||||
|
const path = join(dir, 'test.lock');
|
||||||
|
const lock = acquireLock(path, { registerCleanup: false });
|
||||||
|
try {
|
||||||
|
assert.throws(
|
||||||
|
() => acquireLock(path, { registerCleanup: false }),
|
||||||
|
(err) => err.code === 'ELOCKED' && err.holderPid === process.pid
|
||||||
|
);
|
||||||
|
} finally {
|
||||||
|
lock.release();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
test('acquireLock — concurrent live holder (fixture lock-fil) throws ELOCKED', () => {
|
||||||
|
withTmp((dir) => {
|
||||||
|
const path = join(dir, 'test.lock');
|
||||||
|
// Pre-write a lock as if held by another live process (we use process.pid
|
||||||
|
// as a stand-in for "guaranteed alive" without forking).
|
||||||
|
writeFakeLock(path, { pid: process.pid, ageMs: 0 });
|
||||||
|
assert.throws(
|
||||||
|
() => acquireLock(path, { registerCleanup: false }),
|
||||||
|
(err) => err.code === 'ELOCKED'
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
test('acquireLock — release deletes the lock file', () => {
|
||||||
|
withTmp((dir) => {
|
||||||
|
const path = join(dir, 'test.lock');
|
||||||
|
const lock = acquireLock(path, { registerCleanup: false });
|
||||||
|
assert.equal(existsSync(path), true);
|
||||||
|
lock.release();
|
||||||
|
assert.equal(existsSync(path), false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
test('acquireLock — release on already-released lock is a no-op', () => {
|
||||||
|
withTmp((dir) => {
|
||||||
|
const path = join(dir, 'test.lock');
|
||||||
|
const lock = acquireLock(path, { registerCleanup: false });
|
||||||
|
lock.release();
|
||||||
|
// Second release must not throw.
|
||||||
|
lock.release();
|
||||||
|
assert.equal(existsSync(path), false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
test('acquireLock — stale lock with dead PID + old mtime is cleaned', () => {
|
||||||
|
withTmp((dir) => {
|
||||||
|
const path = join(dir, 'test.lock');
|
||||||
|
writeFakeLock(path, { pid: DEAD_PID, ageMs: 2 * 60 * 60 * 1000 });
|
||||||
|
const lock = acquireLock(path, { registerCleanup: false });
|
||||||
|
try {
|
||||||
|
const data = JSON.parse(readFileSync(path, 'utf8'));
|
||||||
|
assert.equal(data.pid, process.pid);
|
||||||
|
} finally {
|
||||||
|
lock.release();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
test('acquireLock — stale lock with live PID but old mtime is also cleaned', () => {
|
||||||
|
withTmp((dir) => {
|
||||||
|
const path = join(dir, 'test.lock');
|
||||||
|
// Live PID (us) but mtime older than default 1h threshold.
|
||||||
|
writeFakeLock(path, { pid: process.pid, ageMs: 2 * 60 * 60 * 1000 });
|
||||||
|
const lock = acquireLock(path, { registerCleanup: false });
|
||||||
|
try {
|
||||||
|
const data = JSON.parse(readFileSync(path, 'utf8'));
|
||||||
|
assert.equal(data.pid, process.pid);
|
||||||
|
// started is rewritten to fresh wallclock
|
||||||
|
assert.ok(Date.now() - data.started < 5000);
|
||||||
|
} finally {
|
||||||
|
lock.release();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
test('acquireLock — fresh lock with live PID is NOT cleaned', () => {
|
||||||
|
withTmp((dir) => {
|
||||||
|
const path = join(dir, 'test.lock');
|
||||||
|
writeFakeLock(path, { pid: process.pid, ageMs: 0 });
|
||||||
|
assert.throws(
|
||||||
|
() => acquireLock(path, { registerCleanup: false }),
|
||||||
|
(err) => err.code === 'ELOCKED' && err.holderPid === process.pid
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
test('acquireLock — staleThresholdMs is honored', () => {
|
||||||
|
withTmp((dir) => {
|
||||||
|
const path = join(dir, 'test.lock');
|
||||||
|
// 5s-old, live PID. Default 1h threshold → not stale → ELOCKED.
|
||||||
|
writeFakeLock(path, { pid: process.pid, ageMs: 5_000 });
|
||||||
|
assert.throws(
|
||||||
|
() => acquireLock(path, { registerCleanup: false }),
|
||||||
|
(err) => err.code === 'ELOCKED'
|
||||||
|
);
|
||||||
|
|
||||||
|
// Same fixture but threshold 1s → stale → cleaned.
|
||||||
|
writeFakeLock(path, { pid: process.pid, ageMs: 5_000 });
|
||||||
|
const lock = acquireLock(path, {
|
||||||
|
registerCleanup: false,
|
||||||
|
staleThresholdMs: 1_000,
|
||||||
|
});
|
||||||
|
lock.release();
|
||||||
|
assert.equal(existsSync(path), false);
|
||||||
|
});
|
||||||
|
});
|
||||||
Loading…
Add table
Add a link
Reference in a new issue