Adds a deterministic GitHub Actions / Forgejo Actions injection
scanner. Detects \${{ <dangerous-field> }} interpolations inside
\`run:\` step blocks under privileged or semi-privileged triggers.
Sink-restricted: \`if:\` / \`with:\` / \`env:\` (block-level) are
evaluated by the runner expression engine, not the shell, so they
are NOT injection sinks and are suppressed at parser level.
Why: workflow expression injection is the most prevalent SAST class
on GitHub (CodeQL preview: 800K+ findings across 158K repos). The
graduated severity matrix (HIGH for pull_request_target / discussion
/ workflow_run; MEDIUM for pull_request / workflow_dispatch) is the
community-converged calibration target — uniform HIGH causes alert
fatigue.
Components:
- scanners/lib/workflow-yaml-state.mjs — line-based YAML state
machine. Tracks indentation, parent-context stack, and
\`run: |\` / \`run: >\` block-scalar entry/exit. Zero deps.
- scanners/workflow-scanner.mjs — discoverWorkflows() probes
.github/workflows/ and .forgejo/workflows/ directly (file-discovery
has no glob include). 23-field blacklist (GHSL 17 + 6 GlueStack-
class additions). Platform encoded via file path; no schema
extension to finding(). Forgejo-specific: workflow_run advisory
emitted to stderr; recommendation text mentions Forgejo's
server-level token scoping (job-level permissions: is ignored).
- knowledge/workflow-injection-patterns.md — 23-field blacklist,
trigger taxonomy, severity matrix, Forgejo divergences, NVD CVE
corpus.
Tests (47 new):
- tests/lib/workflow-yaml-state.test.mjs (15): trigger forms
(string / inline-list / block-list / block-mapping), single-line
run, block-scalar | and > tracking, env/with sink-mismatch,
multi-line, comment stripping, line-number accuracy.
- tests/scanners/workflow-scanner.test.mjs (14): TP head_ref
pull_request_target, TP discussion.title gluestack pattern,
TP comment.body pull_request, TP issue.body block-scalar,
FP if-context, FP env-block, INFO numeric, Forgejo TP, Forgejo
workflow_run advisory, envelope shape, WFL prefix.
- 9 fixtures in tests/fixtures/workflows/{.github,.forgejo}/workflows/.
Out of scope (B4 / Batch D):
- Re-interpolation detection (env.VAR after env: from blacklisted source)
- github.actor authorization-bypass category
- WFL prefix in severity.mjs OWASP maps + scan-orchestrator
registration (B4)
- Composite-action input tracing, GITHUB_ENV poisoning (Batch D)
Test count: 1685 → 1732 (+47). Pre-compact-scan flake unchanged
(passes in isolation).
330 lines
12 KiB
JavaScript
330 lines
12 KiB
JavaScript
// workflow-scanner.mjs — E11 GitHub/Forgejo Actions injection scanner
|
|
// Detects `${{ <dangerous-field> }}` interpolations inside `run:` step
|
|
// blocks under privileged triggers. Sink-restricted (only `run:` is a
|
|
// shell sink — `if:`/`with:`/`env:` are evaluated by the runner's
|
|
// expression engine, not the shell, so they are NOT injection sinks).
|
|
//
|
|
// Discovery: explicitly probes `<target>/.github/workflows/` and
|
|
// `<target>/.forgejo/workflows/`. discoverFiles() (file-discovery.mjs)
|
|
// does not support glob include patterns, so we walk the two
|
|
// directories directly via node:fs/promises.
|
|
//
|
|
// Knowledge: knowledge/workflow-injection-patterns.md (23-field
|
|
// blacklist + severity matrix + Forgejo divergences).
|
|
//
|
|
// Out of scope (deferred):
|
|
// - Composite-action input tracing
|
|
// - Reusable-workflow call analysis
|
|
// - GITHUB_ENV poisoning detection
|
|
// - Zombie-workflow scanning across non-default branches
|
|
//
|
|
// Zero external dependencies.
|
|
|
|
import { readdir, readFile, stat } from 'node:fs/promises';
|
|
import { join, relative, basename } from 'node:path';
|
|
import { existsSync } from 'node:fs';
|
|
import { fileURLToPath } from 'node:url';
|
|
import { dirname } from 'node:path';
|
|
import { finding, scannerResult } from './lib/output.mjs';
|
|
import { SEVERITY } from './lib/severity.mjs';
|
|
import { parseWorkflow } from './lib/workflow-yaml-state.mjs';
|
|
|
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
|
|
const MAX_FILES = 100;
|
|
const MAX_FILE_SIZE = 256 * 1024;
|
|
const SCANNER_NAME = 'workflow';
|
|
const SCANNER_PREFIX = 'WFL';
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// 23-field canonical blacklist (GHSL Security Lab 17 + 6 GlueStack-class
|
|
// additions per research/01-github-forgejo-actions-injection.md). Stored
|
|
// as patterns matching the inner expression after `${{ ` and before ` }}`.
|
|
// All patterns match BOTH `github.*` and `forgejo.*` prefixes.
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const PREFIX = '(?:github|forgejo)';
|
|
|
|
const DANGEROUS_FIELDS = [
|
|
// GHSL 17
|
|
`${PREFIX}\\.event\\.issue\\.title`,
|
|
`${PREFIX}\\.event\\.issue\\.body`,
|
|
`${PREFIX}\\.event\\.pull_request\\.title`,
|
|
`${PREFIX}\\.event\\.pull_request\\.body`,
|
|
`${PREFIX}\\.event\\.pull_request\\.head\\.ref`,
|
|
`${PREFIX}\\.event\\.pull_request\\.head\\.label`,
|
|
`${PREFIX}\\.event\\.pull_request\\.head\\.repo\\.default_branch`,
|
|
`${PREFIX}\\.event\\.comment\\.body`,
|
|
`${PREFIX}\\.event\\.review\\.body`,
|
|
`${PREFIX}\\.event\\.commits\\.\\*\\.message`,
|
|
`${PREFIX}\\.event\\.commits\\.\\*\\.author\\.email`,
|
|
`${PREFIX}\\.event\\.commits\\.\\*\\.author\\.name`,
|
|
`${PREFIX}\\.event\\.head_commit\\.message`,
|
|
`${PREFIX}\\.event\\.head_commit\\.author\\.email`,
|
|
`${PREFIX}\\.event\\.head_commit\\.author\\.name`,
|
|
`${PREFIX}\\.event\\.pages\\.\\*\\.page_name`,
|
|
`${PREFIX}\\.head_ref`,
|
|
// GlueStack-class additions
|
|
`${PREFIX}\\.event\\.discussion\\.title`,
|
|
`${PREFIX}\\.event\\.discussion\\.body`,
|
|
`${PREFIX}\\.event\\.discussion\\.user\\.login`,
|
|
`${PREFIX}\\.event\\.inputs\\.[\\w-]+`,
|
|
`${PREFIX}\\.event\\.client_payload\\.[\\w-]+`,
|
|
`inputs\\.[\\w-]+`,
|
|
];
|
|
|
|
const DANGEROUS_RE = new RegExp(
|
|
'(?:' +
|
|
DANGEROUS_FIELDS.map(p => p.replace(/\\\.\\\*/g, '\\.[^.]+')).join('|') +
|
|
')',
|
|
);
|
|
|
|
// Numeric/hex/fixed-string fields — INFO-level, never injection sinks
|
|
const SAFE_FIELDS_RE = new RegExp(
|
|
'^(?:' +
|
|
`${PREFIX}\\.event\\.pull_request\\.number|` +
|
|
`${PREFIX}\\.event\\.pull_request\\.head\\.sha|` +
|
|
`${PREFIX}\\.run_id|` +
|
|
`${PREFIX}\\.run_number|` +
|
|
`${PREFIX}\\.sha|` +
|
|
`${PREFIX}\\.event\\.action|` +
|
|
`${PREFIX}\\.event\\.repository\\.full_name` +
|
|
')$',
|
|
);
|
|
|
|
// Triggers that grant attacker-controlled context with elevated
|
|
// privileges (read/write tokens).
|
|
const PRIVILEGED_TRIGGERS = new Set([
|
|
'pull_request_target',
|
|
'issue_comment',
|
|
'discussion',
|
|
'discussion_comment',
|
|
'workflow_run',
|
|
]);
|
|
|
|
// Triggers where attacker can supply input but token is read-only or
|
|
// scoped (still an injection sink, just lower severity).
|
|
const SEMI_PRIVILEGED_TRIGGERS = new Set([
|
|
'pull_request',
|
|
'workflow_dispatch',
|
|
'repository_dispatch',
|
|
]);
|
|
|
|
// Sink contexts that ARE shell:
|
|
const SINK_PARENTS = new Set(['run']);
|
|
// Contexts where ${{ ... }} is evaluated by the runner expression
|
|
// engine, NOT the shell. These are sink mismatches, not injection.
|
|
const NON_SINK_PARENTS = new Set(['if', 'with', 'env', 'name', 'runs-on', 'timeout-minutes', 'continue-on-error']);
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Discovery
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Walk `<targetPath>/.github/workflows/` and `<targetPath>/.forgejo/workflows/`
|
|
* one level deep. Return absolute paths of `.yml` and `.yaml` files,
|
|
* combined and capped at MAX_FILES total.
|
|
*
|
|
* @param {string} targetPath
|
|
* @returns {Promise<string[]>}
|
|
*/
|
|
export async function discoverWorkflows(targetPath) {
|
|
const out = [];
|
|
const dirs = [
|
|
join(targetPath, '.github', 'workflows'),
|
|
join(targetPath, '.forgejo', 'workflows'),
|
|
];
|
|
for (const dir of dirs) {
|
|
if (!existsSync(dir)) continue;
|
|
let entries;
|
|
try {
|
|
entries = await readdir(dir, { withFileTypes: true });
|
|
} catch {
|
|
continue;
|
|
}
|
|
for (const entry of entries) {
|
|
if (!entry.isFile()) continue;
|
|
if (!/\.ya?ml$/i.test(entry.name)) continue;
|
|
out.push(join(dir, entry.name));
|
|
if (out.length >= MAX_FILES) return out;
|
|
}
|
|
}
|
|
return out;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Severity matrix
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Map (triggerSet, fieldClass) → severity.
|
|
*
|
|
* @param {Set<string>} triggers
|
|
* @param {'dangerous'|'safe'|'other'} fieldClass
|
|
* @returns {string|null} SEVERITY constant, or null = suppress
|
|
*/
|
|
function severityFor(triggers, fieldClass) {
|
|
if (fieldClass === 'safe') return SEVERITY.INFO;
|
|
if (fieldClass !== 'dangerous') return null;
|
|
for (const t of triggers) {
|
|
if (PRIVILEGED_TRIGGERS.has(t)) return SEVERITY.HIGH;
|
|
}
|
|
for (const t of triggers) {
|
|
if (SEMI_PRIVILEGED_TRIGGERS.has(t)) return SEVERITY.MEDIUM;
|
|
}
|
|
// No relevant trigger → still flag at MEDIUM (e.g. push events
|
|
// can still be reachable from forks via PRs).
|
|
return SEVERITY.MEDIUM;
|
|
}
|
|
|
|
function classifyField(expr) {
|
|
if (SAFE_FIELDS_RE.test(expr)) return 'safe';
|
|
if (DANGEROUS_RE.test(expr)) return 'dangerous';
|
|
return 'other';
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Platform detection (filename-based; keeps schema unchanged)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
function detectPlatform(absPath) {
|
|
if (absPath.includes('/.forgejo/workflows/')) return 'forgejo';
|
|
if (absPath.includes('/.github/workflows/')) return 'github';
|
|
return 'unknown';
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Recommendation text
|
|
// ---------------------------------------------------------------------------
|
|
|
|
function buildRecommendation(platform, parent) {
|
|
const base = parent === 'run'
|
|
? 'Bind the expression to an env var first, then consume it via $VAR in the run script: `env: { TITLE: ${{ ... }} }; run: echo "$TITLE"`. Re-interpolating ${{ env.TITLE }} inside run: cancels the mitigation.'
|
|
: 'This expression is not a shell injection sink, but the underlying field is attacker-controlled. Review its downstream use.';
|
|
if (platform === 'forgejo') {
|
|
return base + ' Forgejo note: job-level `permissions:` is ignored on Forgejo (admin-guide); rely on token scoping at server level instead.';
|
|
}
|
|
return base;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Scan one workflow file
|
|
// ---------------------------------------------------------------------------
|
|
|
|
async function scanFile(absPath, targetPath, stderrLog) {
|
|
const findings = [];
|
|
const stat_ = await stat(absPath).catch(() => null);
|
|
if (!stat_ || stat_.size > MAX_FILE_SIZE) return findings;
|
|
const text = await readFile(absPath, 'utf8').catch(() => null);
|
|
if (text === null) return findings;
|
|
|
|
const relPath = relative(targetPath, absPath) || basename(absPath);
|
|
const platform = detectPlatform(absPath);
|
|
|
|
let parsed;
|
|
try {
|
|
parsed = parseWorkflow(text);
|
|
} catch (err) {
|
|
stderrLog(`[workflow-scanner] parse error in ${relPath}: ${err.message}\n`);
|
|
return findings;
|
|
}
|
|
|
|
const triggers = parsed.triggers;
|
|
|
|
// Forgejo divergence advisory: `workflow_run` is not documented for
|
|
// Forgejo. Emit to stderr (not as a finding) so the user knows the
|
|
// severity-matrix logic applied as if it were privileged.
|
|
if (platform === 'forgejo' && triggers.has('workflow_run')) {
|
|
stderrLog(
|
|
`[workflow-scanner] ${relPath}: 'workflow_run' trigger is not documented for Forgejo Actions; ` +
|
|
`severity logic still treats it as privileged. See knowledge/workflow-injection-patterns.md §Forgejo.\n`
|
|
);
|
|
}
|
|
|
|
for (const ev of parsed.events) {
|
|
if (NON_SINK_PARENTS.has(ev.parent)) continue;
|
|
if (!SINK_PARENTS.has(ev.parent)) continue;
|
|
|
|
const fieldClass = classifyField(ev.expr);
|
|
if (fieldClass === 'other') continue;
|
|
|
|
const severity = severityFor(triggers, fieldClass);
|
|
if (!severity) continue;
|
|
|
|
const platformLabel = platform === 'forgejo' ? 'Forgejo' : 'GitHub';
|
|
const triggerList = [...triggers].join(', ') || 'unknown';
|
|
|
|
findings.push(finding({
|
|
scanner: SCANNER_PREFIX,
|
|
severity,
|
|
title: severity === SEVERITY.INFO
|
|
? `Safe expression in ${platformLabel} workflow run:`
|
|
: `Workflow injection: ${platformLabel} ${ev.expr} in run: under ${triggerList}`,
|
|
description:
|
|
`${platformLabel} workflow at ${relPath} interpolates \${{ ${ev.expr} }} ` +
|
|
`inside a run: step. Triggers: ${triggerList}. ` +
|
|
`Field class: ${fieldClass}. Block scalar: ${ev.blockScalar}.`,
|
|
file: relPath,
|
|
line: ev.line,
|
|
evidence: `\${{ ${ev.expr} }}`,
|
|
owasp: 'LLM02',
|
|
recommendation: buildRecommendation(platform, ev.parent),
|
|
}));
|
|
}
|
|
|
|
return findings;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Public entry — orchestrator-compatible
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Scan a target path for workflow injection.
|
|
*
|
|
* @param {string} targetPath
|
|
* @param {object} [_discovery] Ignored — workflow-scanner does its own
|
|
* directory probe.
|
|
* @returns {Promise<object>} scannerResult envelope
|
|
*/
|
|
export async function scan(targetPath, _discovery) {
|
|
const startMs = Date.now();
|
|
const allFindings = [];
|
|
let filesScanned = 0;
|
|
const stderrLog = (msg) => process.stderr.write(msg);
|
|
|
|
try {
|
|
const files = await discoverWorkflows(targetPath);
|
|
for (const f of files) {
|
|
filesScanned++;
|
|
const fileFindings = await scanFile(f, targetPath, stderrLog);
|
|
allFindings.push(...fileFindings);
|
|
}
|
|
return scannerResult(SCANNER_NAME, 'ok', allFindings, filesScanned, Date.now() - startMs);
|
|
} catch (err) {
|
|
return scannerResult(
|
|
SCANNER_NAME,
|
|
'error',
|
|
allFindings,
|
|
filesScanned,
|
|
Date.now() - startMs,
|
|
err.message,
|
|
);
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// CLI entry
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const isDirectRun = process.argv[1] === fileURLToPath(import.meta.url);
|
|
if (isDirectRun) {
|
|
const target = process.argv[2];
|
|
if (!target) {
|
|
console.error('Usage: node workflow-scanner.mjs <target-path>');
|
|
process.exit(1);
|
|
}
|
|
scan(target).then(result => {
|
|
process.stdout.write(JSON.stringify(result, null, 2) + '\n');
|
|
});
|
|
}
|