feat(ms-ai-architect): add lib/lock-file with PID+mtime stale detection [skip-docs]

Foundation lib for v1.12.0 cron rewrite. Atomic exclusive create via
fs.writeFileSync('wx'); on EEXIST resolves staleness with OR semantics:
stale if PID is dead OR mtime exceeds threshold. Either alone breaks the
lock — handles SIGKILL orphans (mtime), PID-reuse races (mtime), and
crashed-then-replaced runs (PID).

- acquireLock(lockPath, opts) → {lockPath, release()}
- staleThresholdMs default 1h; refreshIntervalMs opt-in for long runs
- registerCleanup default true (exit/SIGINT/SIGTERM/SIGHUP/uncaughtException)
- isPidAlive uses kill(pid, 0) with EPERM-as-alive nuance

12/12 tests pass: PID liveness, fixture concurrency, idempotent release,
stale variants (dead+old, live+old, fresh+live), staleThresholdMs honored.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Kjell Tore Guttormsen 2026-05-05 10:47:05 +02:00
commit 3e26b94a27
2 changed files with 358 additions and 0 deletions

View file

@ -0,0 +1,166 @@
// lock-file.mjs — Exclusive lock with PID + mtime stale-detection.
// Zero dependencies. Uses fs.writeFileSync('wx') for atomic exclusive create.
// Stale-detection is OR-based: stale if PID is dead OR mtime exceeds threshold.
// Either condition alone is enough to break the lock — handles SIGKILL orphans
// (mtime alone) and PID-reuse races (mtime alone) and crashed-then-replaced
// runs (PID alone). Long runs may opt-in to mtime refresh via refreshIntervalMs.
import { writeFileSync, readFileSync, statSync, unlinkSync, utimesSync } from 'node:fs';
import { hostname } from 'node:os';
import { join } from 'node:path';
import { getCacheDir } from './cross-platform-paths.mjs';
const DEFAULT_STALE_THRESHOLD_MS = 60 * 60 * 1000; // 1 hour
const DEFAULT_LOCK_NAME = 'kb-update.lock';
/**
* Check whether a PID identifies a live process.
* @param {number} pid POSIX process id
* @returns {boolean}
*/
export function isPidAlive(pid) {
if (typeof pid !== 'number' || !Number.isFinite(pid) || pid <= 0) {
return false;
}
try {
process.kill(pid, 0);
return true;
} catch (err) {
// EPERM means the process exists but we lack signal permission — still alive.
return err && err.code === 'EPERM';
}
}
function safeReadLock(lockPath) {
try {
return JSON.parse(readFileSync(lockPath, 'utf8'));
} catch {
return null;
}
}
function lockMtimeMs(lockPath) {
try {
return statSync(lockPath).mtimeMs;
} catch {
return null;
}
}
function writeLockFile(lockPath) {
writeFileSync(
lockPath,
JSON.stringify({
pid: process.pid,
started: Date.now(),
host: hostname(),
version: 1,
}),
{ flag: 'wx', encoding: 'utf8' }
);
}
/**
* Acquire an exclusive lock. Throws ELOCKED if held by a live, fresh holder.
* Cleans up stale locks (dead PID OR mtime older than staleThresholdMs).
*
* @param {string} [lockPath] absolute lock-file path; defaults to <cache>/kb-update.lock
* @param {object} [opts]
* @param {number} [opts.staleThresholdMs] default 3600000 (1h)
* @param {number} [opts.refreshIntervalMs] if > 0, periodically utimes the lock
* @param {boolean} [opts.registerCleanup] default true; install exit/signal handlers
* @returns {{lockPath: string, release: () => void}}
*/
export function acquireLock(lockPath, opts = {}) {
const staleThresholdMs = opts.staleThresholdMs ?? DEFAULT_STALE_THRESHOLD_MS;
const refreshIntervalMs = opts.refreshIntervalMs ?? 0;
const registerCleanup = opts.registerCleanup ?? true;
const path = lockPath || join(getCacheDir('ms-ai-architect'), DEFAULT_LOCK_NAME);
try {
writeLockFile(path);
} catch (err) {
if (!err || err.code !== 'EEXIST') throw err;
const data = safeReadLock(path);
const mtime = lockMtimeMs(path);
const holderPid = typeof data?.pid === 'number' ? data.pid : null;
const pidAlive = holderPid != null ? isPidAlive(holderPid) : false;
const ageMs = mtime != null ? Date.now() - mtime : Infinity;
const stale = !pidAlive || ageMs > staleThresholdMs;
if (!stale) {
const e = new Error(
`Lock held by PID ${holderPid} (started ${data?.started ?? 'unknown'})`
);
e.code = 'ELOCKED';
e.holderPid = holderPid;
throw e;
}
try {
unlinkSync(path);
} catch {
// best-effort
}
writeLockFile(path); // retry once
}
let refreshTimer = null;
let released = false;
const release = () => {
if (released) return;
released = true;
if (refreshTimer) {
clearInterval(refreshTimer);
refreshTimer = null;
}
try {
const data = safeReadLock(path);
if (!data || data.pid === process.pid) {
unlinkSync(path);
}
} catch {
// best-effort
}
};
if (refreshIntervalMs > 0) {
refreshTimer = setInterval(() => {
try {
const now = new Date();
utimesSync(path, now, now);
} catch {
// best-effort
}
}, refreshIntervalMs);
if (typeof refreshTimer.unref === 'function') {
refreshTimer.unref();
}
}
if (registerCleanup) {
const onExit = () => release();
process.once('exit', onExit);
process.once('SIGINT', () => {
release();
process.exit(130);
});
process.once('SIGTERM', () => {
release();
process.exit(143);
});
process.once('SIGHUP', () => {
release();
process.exit(129);
});
process.once('uncaughtException', (err) => {
release();
console.error(err);
process.exit(1);
});
}
return { lockPath: path, release };
}

View file

@ -0,0 +1,192 @@
// tests/kb-update/test-lock-file.test.mjs
// Unit tests for scripts/kb-update/lib/lock-file.mjs
import { test } from 'node:test';
import assert from 'node:assert/strict';
import {
mkdtempSync,
rmSync,
writeFileSync,
readFileSync,
existsSync,
utimesSync,
} from 'node:fs';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import {
acquireLock,
isPidAlive,
} from '../../scripts/kb-update/lib/lock-file.mjs';
const DEAD_PID = 99999999; // far above typical PID_MAX; reliably non-existent
function withTmp(fn) {
const dir = mkdtempSync(join(tmpdir(), 'lf-test-'));
try {
return fn(dir);
} finally {
rmSync(dir, { recursive: true, force: true });
}
}
function writeFakeLock(path, { pid, started, host = 'test-host', ageMs = 0 }) {
writeFileSync(
path,
JSON.stringify({
pid,
started: started ?? Date.now() - ageMs,
host,
version: 1,
}),
'utf8'
);
if (ageMs > 0) {
const past = new Date(Date.now() - ageMs);
utimesSync(path, past, past);
}
}
test('isPidAlive — current process is alive', () => {
assert.equal(isPidAlive(process.pid), true);
});
test('isPidAlive — non-existent PID is dead', () => {
assert.equal(isPidAlive(DEAD_PID), false);
});
test('isPidAlive — invalid input is dead', () => {
assert.equal(isPidAlive(0), false);
assert.equal(isPidAlive(-1), false);
assert.equal(isPidAlive(NaN), false);
assert.equal(isPidAlive(undefined), false);
});
test('acquireLock — creates lock file with current PID metadata', () => {
withTmp((dir) => {
const path = join(dir, 'test.lock');
const lock = acquireLock(path, { registerCleanup: false });
try {
assert.equal(lock.lockPath, path);
assert.equal(existsSync(path), true);
const data = JSON.parse(readFileSync(path, 'utf8'));
assert.equal(data.pid, process.pid);
assert.equal(data.version, 1);
assert.equal(typeof data.started, 'number');
assert.equal(typeof data.host, 'string');
} finally {
lock.release();
}
});
});
test('acquireLock — second call same process throws ELOCKED', () => {
withTmp((dir) => {
const path = join(dir, 'test.lock');
const lock = acquireLock(path, { registerCleanup: false });
try {
assert.throws(
() => acquireLock(path, { registerCleanup: false }),
(err) => err.code === 'ELOCKED' && err.holderPid === process.pid
);
} finally {
lock.release();
}
});
});
test('acquireLock — concurrent live holder (fixture lock-fil) throws ELOCKED', () => {
withTmp((dir) => {
const path = join(dir, 'test.lock');
// Pre-write a lock as if held by another live process (we use process.pid
// as a stand-in for "guaranteed alive" without forking).
writeFakeLock(path, { pid: process.pid, ageMs: 0 });
assert.throws(
() => acquireLock(path, { registerCleanup: false }),
(err) => err.code === 'ELOCKED'
);
});
});
test('acquireLock — release deletes the lock file', () => {
withTmp((dir) => {
const path = join(dir, 'test.lock');
const lock = acquireLock(path, { registerCleanup: false });
assert.equal(existsSync(path), true);
lock.release();
assert.equal(existsSync(path), false);
});
});
test('acquireLock — release on already-released lock is a no-op', () => {
withTmp((dir) => {
const path = join(dir, 'test.lock');
const lock = acquireLock(path, { registerCleanup: false });
lock.release();
// Second release must not throw.
lock.release();
assert.equal(existsSync(path), false);
});
});
test('acquireLock — stale lock with dead PID + old mtime is cleaned', () => {
withTmp((dir) => {
const path = join(dir, 'test.lock');
writeFakeLock(path, { pid: DEAD_PID, ageMs: 2 * 60 * 60 * 1000 });
const lock = acquireLock(path, { registerCleanup: false });
try {
const data = JSON.parse(readFileSync(path, 'utf8'));
assert.equal(data.pid, process.pid);
} finally {
lock.release();
}
});
});
test('acquireLock — stale lock with live PID but old mtime is also cleaned', () => {
withTmp((dir) => {
const path = join(dir, 'test.lock');
// Live PID (us) but mtime older than default 1h threshold.
writeFakeLock(path, { pid: process.pid, ageMs: 2 * 60 * 60 * 1000 });
const lock = acquireLock(path, { registerCleanup: false });
try {
const data = JSON.parse(readFileSync(path, 'utf8'));
assert.equal(data.pid, process.pid);
// started is rewritten to fresh wallclock
assert.ok(Date.now() - data.started < 5000);
} finally {
lock.release();
}
});
});
test('acquireLock — fresh lock with live PID is NOT cleaned', () => {
withTmp((dir) => {
const path = join(dir, 'test.lock');
writeFakeLock(path, { pid: process.pid, ageMs: 0 });
assert.throws(
() => acquireLock(path, { registerCleanup: false }),
(err) => err.code === 'ELOCKED' && err.holderPid === process.pid
);
});
});
test('acquireLock — staleThresholdMs is honored', () => {
withTmp((dir) => {
const path = join(dir, 'test.lock');
// 5s-old, live PID. Default 1h threshold → not stale → ELOCKED.
writeFakeLock(path, { pid: process.pid, ageMs: 5_000 });
assert.throws(
() => acquireLock(path, { registerCleanup: false }),
(err) => err.code === 'ELOCKED'
);
// Same fixture but threshold 1s → stale → cleaned.
writeFakeLock(path, { pid: process.pid, ageMs: 5_000 });
const lock = acquireLock(path, {
registerCleanup: false,
staleThresholdMs: 1_000,
});
lock.release();
assert.equal(existsSync(path), false);
});
});