// workflow-scanner.mjs — E11 GitHub/Forgejo Actions injection scanner // Detects `${{ }}` interpolations inside `run:` step // blocks under privileged triggers. Sink-restricted (only `run:` is a // shell sink — `if:`/`with:`/`env:` are evaluated by the runner's // expression engine, not the shell, so they are NOT injection sinks). // // Discovery: explicitly probes `/.github/workflows/` and // `/.forgejo/workflows/`. discoverFiles() (file-discovery.mjs) // does not support glob include patterns, so we walk the two // directories directly via node:fs/promises. // // Knowledge: knowledge/workflow-injection-patterns.md (23-field // blacklist + severity matrix + Forgejo divergences). // // Out of scope (deferred): // - Composite-action input tracing // - Reusable-workflow call analysis // - GITHUB_ENV poisoning detection // - Zombie-workflow scanning across non-default branches // // Zero external dependencies. import { readdir, readFile, stat } from 'node:fs/promises'; import { join, relative, basename } from 'node:path'; import { existsSync } from 'node:fs'; import { fileURLToPath } from 'node:url'; import { dirname } from 'node:path'; import { finding, scannerResult } from './lib/output.mjs'; import { SEVERITY } from './lib/severity.mjs'; import { parseWorkflow } from './lib/workflow-yaml-state.mjs'; const __dirname = dirname(fileURLToPath(import.meta.url)); const MAX_FILES = 100; const MAX_FILE_SIZE = 256 * 1024; const SCANNER_NAME = 'workflow'; const SCANNER_PREFIX = 'WFL'; // --------------------------------------------------------------------------- // 23-field canonical blacklist (GHSL Security Lab 17 + 6 GlueStack-class // additions per research/01-github-forgejo-actions-injection.md). Stored // as patterns matching the inner expression after `${{ ` and before ` }}`. // All patterns match BOTH `github.*` and `forgejo.*` prefixes. // --------------------------------------------------------------------------- const PREFIX = '(?:github|forgejo)'; const DANGEROUS_FIELDS = [ // GHSL 17 `${PREFIX}\\.event\\.issue\\.title`, `${PREFIX}\\.event\\.issue\\.body`, `${PREFIX}\\.event\\.pull_request\\.title`, `${PREFIX}\\.event\\.pull_request\\.body`, `${PREFIX}\\.event\\.pull_request\\.head\\.ref`, `${PREFIX}\\.event\\.pull_request\\.head\\.label`, `${PREFIX}\\.event\\.pull_request\\.head\\.repo\\.default_branch`, `${PREFIX}\\.event\\.comment\\.body`, `${PREFIX}\\.event\\.review\\.body`, `${PREFIX}\\.event\\.commits\\.\\*\\.message`, `${PREFIX}\\.event\\.commits\\.\\*\\.author\\.email`, `${PREFIX}\\.event\\.commits\\.\\*\\.author\\.name`, `${PREFIX}\\.event\\.head_commit\\.message`, `${PREFIX}\\.event\\.head_commit\\.author\\.email`, `${PREFIX}\\.event\\.head_commit\\.author\\.name`, `${PREFIX}\\.event\\.pages\\.\\*\\.page_name`, `${PREFIX}\\.head_ref`, // GlueStack-class additions `${PREFIX}\\.event\\.discussion\\.title`, `${PREFIX}\\.event\\.discussion\\.body`, `${PREFIX}\\.event\\.discussion\\.user\\.login`, `${PREFIX}\\.event\\.inputs\\.[\\w-]+`, `${PREFIX}\\.event\\.client_payload\\.[\\w-]+`, `inputs\\.[\\w-]+`, ]; const DANGEROUS_RE = new RegExp( '(?:' + DANGEROUS_FIELDS.map(p => p.replace(/\\\.\\\*/g, '\\.[^.]+')).join('|') + ')', ); // Numeric/hex/fixed-string fields — INFO-level, never injection sinks const SAFE_FIELDS_RE = new RegExp( '^(?:' + `${PREFIX}\\.event\\.pull_request\\.number|` + `${PREFIX}\\.event\\.pull_request\\.head\\.sha|` + `${PREFIX}\\.run_id|` + `${PREFIX}\\.run_number|` + `${PREFIX}\\.sha|` + `${PREFIX}\\.event\\.action|` + `${PREFIX}\\.event\\.repository\\.full_name` + ')$', ); // Triggers that grant attacker-controlled context with elevated // privileges (read/write tokens). const PRIVILEGED_TRIGGERS = new Set([ 'pull_request_target', 'issue_comment', 'discussion', 'discussion_comment', 'workflow_run', ]); // Triggers where attacker can supply input but token is read-only or // scoped (still an injection sink, just lower severity). const SEMI_PRIVILEGED_TRIGGERS = new Set([ 'pull_request', 'workflow_dispatch', 'repository_dispatch', ]); // Sink contexts that ARE shell: const SINK_PARENTS = new Set(['run']); // Contexts where ${{ ... }} is evaluated by the runner expression // engine, NOT the shell. These are sink mismatches, not injection. const NON_SINK_PARENTS = new Set(['if', 'with', 'env', 'name', 'runs-on', 'timeout-minutes', 'continue-on-error']); // B4: auth-bypass — github.actor or forgejo.actor compared against a // bot identity in if: contexts (Synacktiv 2023 Dependabot spoofing). const AUTH_BYPASS_RE = /\b(?:github|forgejo)\.actor\s*(?:==|!=)\s*['"][\w-]+\[bot\]['"]/; // --------------------------------------------------------------------------- // Discovery // --------------------------------------------------------------------------- /** * Walk `/.github/workflows/` and `/.forgejo/workflows/` * one level deep. Return absolute paths of `.yml` and `.yaml` files, * combined and capped at MAX_FILES total. * * @param {string} targetPath * @returns {Promise} */ export async function discoverWorkflows(targetPath) { const out = []; const dirs = [ join(targetPath, '.github', 'workflows'), join(targetPath, '.forgejo', 'workflows'), ]; for (const dir of dirs) { if (!existsSync(dir)) continue; let entries; try { entries = await readdir(dir, { withFileTypes: true }); } catch { continue; } for (const entry of entries) { if (!entry.isFile()) continue; if (!/\.ya?ml$/i.test(entry.name)) continue; out.push(join(dir, entry.name)); if (out.length >= MAX_FILES) return out; } } return out; } // --------------------------------------------------------------------------- // Severity matrix // --------------------------------------------------------------------------- /** * Map (triggerSet, fieldClass) → severity. * * @param {Set} triggers * @param {'dangerous'|'safe'|'other'} fieldClass * @returns {string|null} SEVERITY constant, or null = suppress */ function severityFor(triggers, fieldClass) { if (fieldClass === 'safe') return SEVERITY.INFO; if (fieldClass !== 'dangerous') return null; for (const t of triggers) { if (PRIVILEGED_TRIGGERS.has(t)) return SEVERITY.HIGH; } for (const t of triggers) { if (SEMI_PRIVILEGED_TRIGGERS.has(t)) return SEVERITY.MEDIUM; } // No relevant trigger → still flag at MEDIUM (e.g. push events // can still be reachable from forks via PRs). return SEVERITY.MEDIUM; } function classifyField(expr) { if (SAFE_FIELDS_RE.test(expr)) return 'safe'; if (DANGEROUS_RE.test(expr)) return 'dangerous'; return 'other'; } // --------------------------------------------------------------------------- // Platform detection (filename-based; keeps schema unchanged) // --------------------------------------------------------------------------- function detectPlatform(absPath) { if (absPath.includes('/.forgejo/workflows/')) return 'forgejo'; if (absPath.includes('/.github/workflows/')) return 'github'; return 'unknown'; } // --------------------------------------------------------------------------- // Recommendation text // --------------------------------------------------------------------------- function buildRecommendation(platform, parent) { const base = parent === 'run' ? 'Bind the expression to an env var first, then consume it via $VAR in the run script: `env: { TITLE: ${{ ... }} }; run: echo "$TITLE"`. Re-interpolating ${{ env.TITLE }} inside run: cancels the mitigation.' : 'This expression is not a shell injection sink, but the underlying field is attacker-controlled. Review its downstream use.'; if (platform === 'forgejo') { return base + ' Forgejo note: job-level `permissions:` is ignored on Forgejo (admin-guide); rely on token scoping at server level instead.'; } return base; } // --------------------------------------------------------------------------- // Scan one workflow file // --------------------------------------------------------------------------- async function scanFile(absPath, targetPath, stderrLog) { const findings = []; const stat_ = await stat(absPath).catch(() => null); if (!stat_ || stat_.size > MAX_FILE_SIZE) return findings; const text = await readFile(absPath, 'utf8').catch(() => null); if (text === null) return findings; const relPath = relative(targetPath, absPath) || basename(absPath); const platform = detectPlatform(absPath); let parsed; try { parsed = parseWorkflow(text); } catch (err) { stderrLog(`[workflow-scanner] parse error in ${relPath}: ${err.message}\n`); return findings; } const triggers = parsed.triggers; // Forgejo divergence advisory: `workflow_run` is not documented for // Forgejo. Emit to stderr (not as a finding) so the user knows the // severity-matrix logic applied as if it were privileged. if (platform === 'forgejo' && triggers.has('workflow_run')) { stderrLog( `[workflow-scanner] ${relPath}: 'workflow_run' trigger is not documented for Forgejo Actions; ` + `severity logic still treats it as privileged. See knowledge/workflow-injection-patterns.md §Forgejo.\n` ); } const platformLabel = platform === 'forgejo' ? 'Forgejo' : 'GitHub'; const triggerList = [...triggers].join(', ') || 'unknown'; // B4: collect env: bindings (key -> source-expression). Used for // re-interpolation detection. A binding is an event whose parent is // a key under an `env:` block — i.e. parentChain includes 'env' // and the parent is not 'env' itself. const envBindings = new Map(); for (const ev of parsed.events) { if (!ev.parentChain.includes('env')) continue; if (ev.parent === 'env') continue; if (SINK_PARENTS.has(ev.parent)) continue; if (NON_SINK_PARENTS.has(ev.parent)) continue; envBindings.set(ev.parent, ev.expr); } for (const ev of parsed.events) { // B4: auth-bypass first — fires only on if: events if (ev.parent === 'if' && AUTH_BYPASS_RE.test(ev.expr)) { findings.push(finding({ scanner: SCANNER_PREFIX, severity: SEVERITY.MEDIUM, title: `Actor auth-bypass: ${platformLabel} workflow trusts bot identity`, description: `Actor auth-bypass: if-condition trusts bot identity that can be ` + `spoofed via pull_request_target. ${platformLabel} workflow at ` + `${relPath}: ${ev.expr}.`, file: relPath, line: ev.line, evidence: `\${{ ${ev.expr} }}`, owasp: 'LLM06', recommendation: 'Use `github.event.pull_request.user.login` (immutable per PR) ' + 'instead of `github.actor` for authorization decisions. The actor ' + 'name can be spoofed via Synacktiv-2023 Dependabot path. If the ' + 'check must remain, gate it on an `id-token` OIDC claim.', })); continue; } if (NON_SINK_PARENTS.has(ev.parent)) continue; if (!SINK_PARENTS.has(ev.parent)) continue; // B4: re-interpolation pattern — `${{ env. }}` inside run: // where was bound from a blacklisted field via top-level // or job-level env:. Cancels the env-indirection mitigation. const reinterpMatch = ev.expr.match(/^env\.([\w-]+)$/); if (reinterpMatch) { const key = reinterpMatch[1]; const source = envBindings.get(key); if (source && DANGEROUS_RE.test(source)) { findings.push(finding({ scanner: SCANNER_PREFIX, severity: SEVERITY.MEDIUM, title: `Re-interpolation: env.${key} re-injects ${source.split(/\s+/)[0]} at ${platformLabel} run:`, description: `Re-interpolation: env.${key} was set from \${{ ${source} }}; reading via ` + `\${{ env.${key} }} in run: re-injects the unsafe value (Appsmith ` + `GHSL-2024-277 stealth pattern). Workflow: ${relPath}.`, file: relPath, line: ev.line, evidence: `\${{ env.${key} }}`, owasp: 'LLM02', recommendation: `Consume the env var via \$${key} (shell variable) inside run:, ` + `not via \${{ env.${key} }}. Template substitution happens before ` + `shell parsing — re-interpolating cancels the env-indirection ` + 'mitigation and re-introduces the original injection.', })); continue; } } const fieldClass = classifyField(ev.expr); if (fieldClass === 'other') continue; const severity = severityFor(triggers, fieldClass); if (!severity) continue; findings.push(finding({ scanner: SCANNER_PREFIX, severity, title: severity === SEVERITY.INFO ? `Safe expression in ${platformLabel} workflow run:` : `Workflow injection: ${platformLabel} ${ev.expr} in run: under ${triggerList}`, description: `${platformLabel} workflow at ${relPath} interpolates \${{ ${ev.expr} }} ` + `inside a run: step. Triggers: ${triggerList}. ` + `Field class: ${fieldClass}. Block scalar: ${ev.blockScalar}.`, file: relPath, line: ev.line, evidence: `\${{ ${ev.expr} }}`, owasp: 'LLM02', recommendation: buildRecommendation(platform, ev.parent), })); } return findings; } // --------------------------------------------------------------------------- // Public entry — orchestrator-compatible // --------------------------------------------------------------------------- /** * Scan a target path for workflow injection. * * @param {string} targetPath * @param {object} [_discovery] Ignored — workflow-scanner does its own * directory probe. * @returns {Promise} scannerResult envelope */ export async function scan(targetPath, _discovery) { const startMs = Date.now(); const allFindings = []; let filesScanned = 0; const stderrLog = (msg) => process.stderr.write(msg); try { const files = await discoverWorkflows(targetPath); for (const f of files) { filesScanned++; const fileFindings = await scanFile(f, targetPath, stderrLog); allFindings.push(...fileFindings); } return scannerResult(SCANNER_NAME, 'ok', allFindings, filesScanned, Date.now() - startMs); } catch (err) { return scannerResult( SCANNER_NAME, 'error', allFindings, filesScanned, Date.now() - startMs, err.message, ); } } // --------------------------------------------------------------------------- // CLI entry // --------------------------------------------------------------------------- const isDirectRun = process.argv[1] === fileURLToPath(import.meta.url); if (isDirectRun) { const target = process.argv[2]; if (!target) { console.error('Usage: node workflow-scanner.mjs '); process.exit(1); } scan(target).then(result => { process.stdout.write(JSON.stringify(result, null, 2) + '\n'); }); }