ktg-plugin-marketplace/plugins/config-audit/scanners/claude-md-linter.mjs

/**
 * CML Scanner — CLAUDE.md Linter
 * Validates structure, sections, length, @imports, frontmatter, and HTML comments.
 * Finding IDs: CA-CML-NNN
 */

import { readTextFile } from './lib/file-discovery.mjs';
import { finding, scannerResult, resetCounter } from './lib/output.mjs';
import { SEVERITY } from './lib/severity.mjs';
import { parseFrontmatter, extractSections, findImports } from './lib/yaml-parser.mjs';
import { lineCount, truncate } from './lib/string-utils.mjs';

const SCANNER = 'CML';
const MAX_RECOMMENDED_LINES = 200;
const MAX_ABSOLUTE_LINES = 500;

/** Recommended sections for a project CLAUDE.md */
const RECOMMENDED_SECTIONS = [
  { pattern: /project|overview|description|what/i, label: 'Project overview' },
  { pattern: /command|workflow|how to|getting started|usage/i, label: 'Commands/Workflows' },
  { pattern: /architect|structure|directory|layout/i, label: 'Architecture' },
  { pattern: /convention|pattern|rule|style/i, label: 'Conventions/Patterns' },
];

/**
 * Scan all CLAUDE.md files discovered.
 * @param {string} targetPath
 * @param {{ files: import('./lib/file-discovery.mjs').ConfigFile[] }} discovery
 * @returns {Promise<object>}
 */
export async function scan(targetPath, discovery) {
  const start = Date.now();
  const claudeFiles = discovery.files.filter(f => f.type === 'claude-md');

  if (claudeFiles.length === 0) {
    return scannerResult(SCANNER, 'ok', [
      finding({
        scanner: SCANNER,
        severity: SEVERITY.high,
        title: 'No CLAUDE.md found',
        description: 'No CLAUDE.md files were discovered. This is the primary configuration surface for Claude Code.',
        recommendation: 'Run `/init` to create a starter CLAUDE.md, or create one manually.',
        autoFixable: false,
      }),
    ], 0, Date.now() - start);
  }

  const findings = [];
  let filesScanned = 0;

  for (const file of claudeFiles) {
    const content = await readTextFile(file.absPath);
    if (!content) continue;
    filesScanned++;

    const lines = lineCount(content);
    const { frontmatter, body, bodyStartLine } = parseFrontmatter(content);
    const sections = extractSections(body);
    const imports = findImports(content);

    // --- Length checks ---
    if (lines > MAX_ABSOLUTE_LINES) {
      findings.push(finding({
        scanner: SCANNER,
        severity: SEVERITY.high,
        title: 'CLAUDE.md exceeds 500 lines',
        description: `${file.relPath} has ${lines} lines. Files over 500 lines significantly reduce Claude's adherence to instructions.`,
        file: file.absPath,
        evidence: `${lines} lines`,
        recommendation: 'Split into @imports and .claude/rules/ files. Keep CLAUDE.md under 200 lines.',
        autoFixable: false,
      }));
    } else if (lines > MAX_RECOMMENDED_LINES) {
      findings.push(finding({
        scanner: SCANNER,
        severity: SEVERITY.medium,
        title: 'CLAUDE.md exceeds recommended 200 lines',
        description: `${file.relPath} has ${lines} lines. Best practice is under 200 lines for optimal adherence.`,
        file: file.absPath,
        evidence: `${lines} lines`,
        recommendation: 'Consider using @imports or .claude/rules/ for detailed content.',
        autoFixable: false,
      }));
    }

    // --- Empty file ---
    if (lines < 3) {
      findings.push(finding({
        scanner: SCANNER,
        severity: SEVERITY.medium,
        title: 'CLAUDE.md is nearly empty',
        description: `${file.relPath} has only ${lines} lines.`,
        file: file.absPath,
        recommendation: 'Add project overview, commands/workflows, and conventions.',
        autoFixable: false,
      }));
      continue; // Skip further checks for empty files
    }

    // --- Section checks (only for project/user scope) ---
    if (file.scope === 'project' || file.scope === 'user') {
      const sectionHeadings = sections.map(s => s.heading);
      const missingSections = [];

      for (const rec of RECOMMENDED_SECTIONS) {
        const found = sectionHeadings.some(h => rec.pattern.test(h));
        if (!found) {
          missingSections.push(rec.label);
        }
      }

      if (missingSections.length > 0) {
        findings.push(finding({
          scanner: SCANNER,
          severity: SEVERITY.low,
          title: 'Missing recommended sections',
          description: `${file.relPath} is missing: ${missingSections.join(', ')}`,
          file: file.absPath,
          evidence: `Present sections: ${sectionHeadings.slice(0, 5).join(', ') || '(none)'}`,
          recommendation: `Add sections for: ${missingSections.join(', ')}`,
          autoFixable: false,
        }));
      }
    }

    // --- No headings at all ---
    if (sections.length === 0 && lines > 10) {
      findings.push(finding({
        scanner: SCANNER,
        severity: SEVERITY.medium,
        title: 'CLAUDE.md has no markdown headings',
        description: `${file.relPath} has ${lines} lines but no ## headings. Structured content with headers improves Claude's ability to find and follow instructions.`,
        file: file.absPath,
        recommendation: 'Add markdown headings (##) to organize content into scannable sections.',
        autoFixable: false,
      }));
    }

    // --- @import checks ---
    for (const imp of imports) {
      // Check for @imports referencing non-existent files
      // (Full resolution is in import-resolver scanner, here we just flag obvious issues)
      if (imp.path.includes('..') && imp.path.split('..').length > 3) {
        findings.push(finding({
          scanner: SCANNER,
          severity: SEVERITY.low,
          title: '@import with deep relative path',
          description: `${file.relPath}:${imp.line} imports "${truncate(imp.path, 60)}" with multiple parent traversals.`,
          file: file.absPath,
          line: imp.line,
          evidence: `@${imp.path}`,
          recommendation: 'Consider using absolute paths or moving the imported file closer.',
          autoFixable: false,
        }));
      }
    }

    // --- HTML comment info ---
    const htmlComments = (content.match(/<!--[\s\S]*?-->/g) || []).length;
    if (htmlComments > 0) {
      findings.push(finding({
        scanner: SCANNER,
        severity: SEVERITY.info,
        title: 'Uses HTML comments',
        description: `${file.relPath} uses ${htmlComments} HTML comment(s). These are stripped before injection, saving tokens.`,
        file: file.absPath,
        evidence: `${htmlComments} HTML comment(s)`,
      }));
    }

    // --- Duplicate content detection (simple: repeated lines) ---
    const lineArr = content.split('\n');
    const lineCounts = new Map();
    for (const l of lineArr) {
      const trimmed = l.trim();
      if (trimmed.length > 20 && !trimmed.startsWith('#') && !trimmed.startsWith('|') && !trimmed.startsWith('-')) {
        lineCounts.set(trimmed, (lineCounts.get(trimmed) || 0) + 1);
      }
    }
    const duplicates = [...lineCounts.entries()].filter(([, count]) => count >= 3);
    if (duplicates.length > 0) {
      findings.push(finding({
        scanner: SCANNER,
        severity: SEVERITY.low,
        title: 'Repeated content detected',
        description: `${file.relPath} has ${duplicates.length} line(s) repeated 3+ times.`,
        file: file.absPath,
        evidence: truncate(duplicates[0][0], 80),
        recommendation: 'Extract repeated content into a shared @import or rules file.',
        autoFixable: false,
      }));
    }

    // --- TODO/FIXME markers ---
    const todos = lineArr.filter(l => /\bTODO\b|\bFIXME\b|\bHACK\b/i.test(l));
    if (todos.length > 0) {
      findings.push(finding({
        scanner: SCANNER,
        severity: SEVERITY.info,
        title: 'Contains TODO/FIXME markers',
        description: `${file.relPath} has ${todos.length} TODO/FIXME/HACK marker(s).`,
        file: file.absPath,
        evidence: truncate(todos[0].trim(), 80),
      }));
    }
  }

  return scannerResult(SCANNER, 'ok', findings, filesScanned, Date.now() - start);
}