Closes E11. Three new pieces, plus integration:
1. Re-interpolation detector (Appsmith GHSL-2024-277 stealth pattern).
The scanner now collects env: bindings (key -> source-expression
text) by walking parsed events whose parentChain includes 'env',
then for each `${{ env.<KEY> }}` inside run:, re-injects MEDIUM
if the binding source matches the 23-field blacklist. This
catches the pattern where developers apply env-indirection but
then re-interpolate the env var in run:, which cancels the
mitigation (template substitution happens before shell parsing).
2. Auth-bypass category (Synacktiv 2023 Dependabot spoofing).
Detects `if: ${{ github.actor == 'dependabot[bot]' }}` and
variants. MEDIUM, owasp: 'LLM06' (Excessive Agency). Distinct
from injection — same expression syntax, different threat class.
Recommendation steers users to `github.event.pull_request.user.login`.
3. severity.mjs OWASP map registration. WFL prefix added to all
four maps:
- OWASP_MAP['WFL'] = ['LLM02', 'LLM06']
- OWASP_AGENTIC_MAP['WFL'] = ['ASI04']
- OWASP_SKILLS_MAP['WFL'] = []
- OWASP_MCP_MAP['WFL'] = []
Empty arrays for skills/MCP are explicit, not omitted — keeps
`Object.keys(OWASP_MAP)` symmetric across maps.
4. scan-orchestrator.mjs registration. workflowScan added between
supply-chain and toxic-flow (toxic-flow correlates after primaries).
Verified via integration: orchestrator emits 9 WFL findings on
tests/fixtures/workflows/.
Bug fix: extractTriggers in workflow-yaml-state.mjs was collecting
sub-properties (`branches:`, `types:`) as triggers. Now tracks the
first nested indent level and ignores anything deeper.
Tests:
- 6 new cases in tests/scanners/workflow-scanner.test.mjs:
re-interp TP, no-double-count, auth-bypass TP, auth-bypass FP
(startsWith head_ref is not auth-bypass), OWASP map shape,
orchestrator import + SCANNERS array entry.
- 2 new fixtures: tp-reinterpolation.yml, auth-bypass-dependabot.yml.
- Existing 14 scanner tests + 15 state-machine tests unchanged.
Test count: 1732 -> 1738 (+6). Wave B total: +53 over baseline 1685.
Pre-compact-scan flake unchanged (passes in isolation).
233 lines
7.4 KiB
JavaScript
233 lines
7.4 KiB
JavaScript
// workflow-yaml-state.mjs — Line-based YAML state machine for E11
|
|
// (workflow-scanner). Zero dependencies. Tracks indentation, parent
|
|
// context, and `run:` block-scalar entry/exit so the scanner can
|
|
// distinguish injection sinks (`run:`) from sink-mismatch contexts
|
|
// (`if:`, `env:`, `with:`).
|
|
//
|
|
// Why hand-roll instead of importing a YAML library:
|
|
// - Zero-dependency invariant (CLAUDE.md)
|
|
// - Workflows live in `.github/workflows/` and `.forgejo/workflows/`,
|
|
// have a constrained shape (top-level `on:`, `jobs:`, with each
|
|
// job a mapping of {steps, env, …}). A line-based state machine
|
|
// captures everything we need without a full YAML parser.
|
|
//
|
|
// Out of scope:
|
|
// - Anchors / aliases (treated as no-op; rarely used in workflows)
|
|
// - Multi-line flow scalars spanning lines via `... \n ...`
|
|
// - Full `${{ <expr> }}` AST (we extract substring text only)
|
|
|
|
const EXPR_RE = /\$\{\{\s*([\s\S]+?)\s*\}\}/g;
|
|
const KV_RE = /^([A-Za-z_][\w-]*)\s*:\s*(.*)$/;
|
|
const LIST_KV_RE = /^-\s+([A-Za-z_][\w-]*)\s*:\s*(.*)$/;
|
|
const TRIGGER_RE = /^([a-z_]+)(?::|$)/;
|
|
const BLOCK_SCALAR_VALUES = new Set(['|', '>', '|-', '>-', '|+', '>+']);
|
|
|
|
/**
|
|
* Strip comments after first unquoted `#`. Workflows rarely embed `#`
|
|
* in strings; an over-eager strip is acceptable since we never write
|
|
* the stripped text back.
|
|
*/
|
|
function stripComments(line) {
|
|
// Preserve `#` inside ${{ }} expressions (rare, but possible)
|
|
return line.replace(/(^|\s)#.*$/, '');
|
|
}
|
|
|
|
/** Count leading spaces. YAML disallows tabs in indent, so we treat them as 1. */
|
|
function getIndent(line) {
|
|
let i = 0;
|
|
while (i < line.length && (line[i] === ' ' || line[i] === '\t')) i++;
|
|
return i;
|
|
}
|
|
|
|
/** Extract `${{ <expr> }}` substrings with line/column metadata. */
|
|
function findExpressions(rawLine, lineNum) {
|
|
const out = [];
|
|
EXPR_RE.lastIndex = 0;
|
|
let m;
|
|
while ((m = EXPR_RE.exec(rawLine)) !== null) {
|
|
out.push({
|
|
line: lineNum,
|
|
column: m.index + 1,
|
|
expr: m[1].trim(),
|
|
});
|
|
}
|
|
return out;
|
|
}
|
|
|
|
/**
|
|
* Extract the set of triggers declared by top-level `on:`. Handles all
|
|
* four common forms (string, inline-list, block-list, block-mapping).
|
|
*
|
|
* @param {string[]} lines
|
|
* @returns {Set<string>}
|
|
*/
|
|
export function extractTriggers(lines) {
|
|
const triggers = new Set();
|
|
for (let i = 0; i < lines.length; i++) {
|
|
const stripped = stripComments(lines[i]);
|
|
const trimmed = stripped.trim();
|
|
if (!trimmed) continue;
|
|
// Top-level keys are at indent 0
|
|
if (getIndent(stripped) !== 0) continue;
|
|
const m = stripped.match(/^on\s*:\s*(.*)$/);
|
|
if (!m) continue;
|
|
const tail = m[1].trim();
|
|
|
|
// Form 1: `on: push` or `on: [push, pull_request]`
|
|
if (tail) {
|
|
if (tail.startsWith('[')) {
|
|
const inner = tail.replace(/^\[|\]$/g, '');
|
|
for (const part of inner.split(',')) {
|
|
const name = part.trim().replace(/^["']|["']$/g, '');
|
|
if (name) triggers.add(name);
|
|
}
|
|
} else {
|
|
const name = tail.replace(/^["']|["']$/g, '');
|
|
if (name) triggers.add(name);
|
|
}
|
|
return triggers;
|
|
}
|
|
|
|
// Form 2/3: block list or block mapping. Only collect entries at
|
|
// the FIRST nested indent — anything deeper is a sub-property of
|
|
// the trigger (e.g. `branches:`, `types:`), not a new trigger.
|
|
let triggerIndent = null;
|
|
for (let j = i + 1; j < lines.length; j++) {
|
|
const sj = stripComments(lines[j]);
|
|
const tj = sj.trim();
|
|
if (!tj) continue;
|
|
const indent = getIndent(sj);
|
|
if (indent === 0) break; // back to top-level key
|
|
if (triggerIndent === null) triggerIndent = indent;
|
|
if (indent > triggerIndent) continue; // sub-property of a trigger
|
|
// List item: `- push`
|
|
if (tj.startsWith('- ')) {
|
|
const name = tj.slice(2).trim().replace(/^["']|["']$/g, '');
|
|
if (name) triggers.add(name);
|
|
continue;
|
|
}
|
|
// Mapping key: `push:` or `pull_request_target:`
|
|
const tm = tj.match(TRIGGER_RE);
|
|
if (tm) triggers.add(tm[1]);
|
|
}
|
|
return triggers;
|
|
}
|
|
return triggers;
|
|
}
|
|
|
|
/**
|
|
* Walk the workflow text line-by-line and emit `${{ <expr> }}` events
|
|
* tagged with the parent context (`run`, `if`, `with`, `env`, …) and
|
|
* a flag indicating whether the expression appeared inside a `run:`
|
|
* block-scalar body.
|
|
*
|
|
* @param {string} text
|
|
* @returns {{
|
|
* triggers: Set<string>,
|
|
* events: {
|
|
* line: number,
|
|
* column: number,
|
|
* expr: string,
|
|
* parent: string,
|
|
* parentChain: string[],
|
|
* blockScalar: boolean,
|
|
* }[],
|
|
* }}
|
|
*/
|
|
export function parseWorkflow(text) {
|
|
const lines = text.split('\n');
|
|
const triggers = extractTriggers(lines);
|
|
const events = [];
|
|
/** @type {{indent: number, key: string, isBlockScalar: boolean}[]} */
|
|
const stack = [];
|
|
|
|
for (let i = 0; i < lines.length; i++) {
|
|
const raw = lines[i];
|
|
const stripped = stripComments(raw);
|
|
const trimmed = stripped.trim();
|
|
if (!trimmed) continue;
|
|
|
|
const indent = getIndent(stripped);
|
|
|
|
// Pop frames whose indent >= current indent. Block-scalar frames
|
|
// are popped when we leave the scalar body (indent shallower).
|
|
while (stack.length && stack[stack.length - 1].indent >= indent) {
|
|
stack.pop();
|
|
}
|
|
|
|
const top = stack.length ? stack[stack.length - 1] : null;
|
|
|
|
// Inside a block-scalar body? Body lines have indent strictly
|
|
// greater than the opener; the opener frame is on top of stack.
|
|
if (top && top.isBlockScalar) {
|
|
const exprs = findExpressions(raw, i + 1);
|
|
for (const e of exprs) {
|
|
events.push({
|
|
...e,
|
|
parent: top.key,
|
|
parentChain: stack.map(s => s.key),
|
|
blockScalar: true,
|
|
});
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// Try `<key>: <value>` first
|
|
const kv = trimmed.match(KV_RE);
|
|
if (kv) {
|
|
const key = kv[1];
|
|
const value = kv[2];
|
|
const isBlock = BLOCK_SCALAR_VALUES.has(value);
|
|
const exprs = !isBlock && value ? findExpressions(raw, i + 1) : [];
|
|
for (const e of exprs) {
|
|
events.push({
|
|
...e,
|
|
parent: key,
|
|
parentChain: [...stack.map(s => s.key), key],
|
|
blockScalar: false,
|
|
});
|
|
}
|
|
stack.push({ indent, key, isBlockScalar: isBlock });
|
|
continue;
|
|
}
|
|
|
|
// List item: `- <key>: <value>` or just `- <value>`
|
|
const lkv = trimmed.match(LIST_KV_RE);
|
|
if (lkv) {
|
|
const key = lkv[1];
|
|
const value = lkv[2];
|
|
const isBlock = BLOCK_SCALAR_VALUES.has(value);
|
|
const exprs = !isBlock && value ? findExpressions(raw, i + 1) : [];
|
|
for (const e of exprs) {
|
|
events.push({
|
|
...e,
|
|
parent: key,
|
|
parentChain: [...stack.map(s => s.key), key],
|
|
blockScalar: false,
|
|
});
|
|
}
|
|
// List items create a deeper synthetic indent so subsequent
|
|
// sibling keys at the same column still resolve to this item.
|
|
stack.push({ indent: indent + 2, key, isBlockScalar: isBlock });
|
|
continue;
|
|
}
|
|
|
|
// Plain list item `- something` — no key. Still scan for ${{ ... }}
|
|
// (rare but possible) and tag with the enclosing parent.
|
|
if (trimmed.startsWith('- ')) {
|
|
const exprs = findExpressions(raw, i + 1);
|
|
const enclosing = top ? top.key : '';
|
|
for (const e of exprs) {
|
|
events.push({
|
|
...e,
|
|
parent: enclosing,
|
|
parentChain: stack.map(s => s.key),
|
|
blockScalar: false,
|
|
});
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
|
|
return { triggers, events };
|
|
}
|