From ede37219a366463b164f439c490d2039054b4787 Mon Sep 17 00:00:00 2001 From: Kjell Tore Guttormsen Date: Thu, 30 Apr 2026 15:57:10 +0200 Subject: [PATCH] =?UTF-8?q?feat(workflow-scanner):=20E11=20part=202=20?= =?UTF-8?q?=E2=80=94=20re-interpolation=20+=20auth-bypass=20+=20WFL=20pref?= =?UTF-8?q?ix=20+=20orchestrator?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes E11. Three new pieces, plus integration: 1. Re-interpolation detector (Appsmith GHSL-2024-277 stealth pattern). The scanner now collects env: bindings (key -> source-expression text) by walking parsed events whose parentChain includes 'env', then for each `${{ env. }}` inside run:, re-injects MEDIUM if the binding source matches the 23-field blacklist. This catches the pattern where developers apply env-indirection but then re-interpolate the env var in run:, which cancels the mitigation (template substitution happens before shell parsing). 2. Auth-bypass category (Synacktiv 2023 Dependabot spoofing). Detects `if: ${{ github.actor == 'dependabot[bot]' }}` and variants. MEDIUM, owasp: 'LLM06' (Excessive Agency). Distinct from injection — same expression syntax, different threat class. Recommendation steers users to `github.event.pull_request.user.login`. 3. severity.mjs OWASP map registration. WFL prefix added to all four maps: - OWASP_MAP['WFL'] = ['LLM02', 'LLM06'] - OWASP_AGENTIC_MAP['WFL'] = ['ASI04'] - OWASP_SKILLS_MAP['WFL'] = [] - OWASP_MCP_MAP['WFL'] = [] Empty arrays for skills/MCP are explicit, not omitted — keeps `Object.keys(OWASP_MAP)` symmetric across maps. 4. scan-orchestrator.mjs registration. workflowScan added between supply-chain and toxic-flow (toxic-flow correlates after primaries). Verified via integration: orchestrator emits 9 WFL findings on tests/fixtures/workflows/. Bug fix: extractTriggers in workflow-yaml-state.mjs was collecting sub-properties (`branches:`, `types:`) as triggers. Now tracks the first nested indent level and ignores anything deeper. Tests: - 6 new cases in tests/scanners/workflow-scanner.test.mjs: re-interp TP, no-double-count, auth-bypass TP, auth-bypass FP (startsWith head_ref is not auth-bypass), OWASP map shape, orchestrator import + SCANNERS array entry. - 2 new fixtures: tp-reinterpolation.yml, auth-bypass-dependabot.yml. - Existing 14 scanner tests + 15 state-machine tests unchanged. Test count: 1732 -> 1738 (+6). Wave B total: +53 over baseline 1685. Pre-compact-scan flake unchanged (passes in isolation). --- .../llm-security/scanners/lib/severity.mjs | 4 + .../scanners/lib/workflow-yaml-state.mjs | 7 +- .../scanners/scan-orchestrator.mjs | 2 + .../scanners/workflow-scanner.mjs | 76 ++++++++++++++++++- .../workflows/auth-bypass-dependabot.yml | 14 ++++ .../.github/workflows/tp-reinterpolation.yml | 13 ++++ .../tests/scanners/workflow-scanner.test.mjs | 68 +++++++++++++++++ 7 files changed, 180 insertions(+), 4 deletions(-) create mode 100644 plugins/llm-security/tests/fixtures/workflows/.github/workflows/auth-bypass-dependabot.yml create mode 100644 plugins/llm-security/tests/fixtures/workflows/.github/workflows/tp-reinterpolation.yml diff --git a/plugins/llm-security/scanners/lib/severity.mjs b/plugins/llm-security/scanners/lib/severity.mjs index 70cbe08..d87972b 100644 --- a/plugins/llm-security/scanners/lib/severity.mjs +++ b/plugins/llm-security/scanners/lib/severity.mjs @@ -141,6 +141,7 @@ export const OWASP_MAP = Object.freeze({ MEM: ['LLM01'], SCR: ['LLM03'], PST: ['LLM01', 'LLM06'], + WFL: ['LLM02', 'LLM06'], }); /** @@ -159,6 +160,7 @@ export const OWASP_AGENTIC_MAP = Object.freeze({ MEM: ['ASI01', 'ASI02'], SCR: ['ASI04'], PST: ['ASI02', 'ASI03', 'ASI04', 'ASI05'], + WFL: ['ASI04'], }); /** @@ -177,6 +179,7 @@ export const OWASP_SKILLS_MAP = Object.freeze({ MEM: ['AST01', 'AST05'], SCR: ['AST06'], PST: ['AST01', 'AST03'], + WFL: [], }); /** @@ -195,6 +198,7 @@ export const OWASP_MCP_MAP = Object.freeze({ MEM: ['MCP05', 'MCP06'], SCR: ['MCP04'], PST: ['MCP02', 'MCP07'], + WFL: [], }); /** diff --git a/plugins/llm-security/scanners/lib/workflow-yaml-state.mjs b/plugins/llm-security/scanners/lib/workflow-yaml-state.mjs index 0a23b32..9c7b549 100644 --- a/plugins/llm-security/scanners/lib/workflow-yaml-state.mjs +++ b/plugins/llm-security/scanners/lib/workflow-yaml-state.mjs @@ -88,13 +88,18 @@ export function extractTriggers(lines) { return triggers; } - // Form 2/3: block list or block mapping + // Form 2/3: block list or block mapping. Only collect entries at + // the FIRST nested indent — anything deeper is a sub-property of + // the trigger (e.g. `branches:`, `types:`), not a new trigger. + let triggerIndent = null; for (let j = i + 1; j < lines.length; j++) { const sj = stripComments(lines[j]); const tj = sj.trim(); if (!tj) continue; const indent = getIndent(sj); if (indent === 0) break; // back to top-level key + if (triggerIndent === null) triggerIndent = indent; + if (indent > triggerIndent) continue; // sub-property of a trigger // List item: `- push` if (tj.startsWith('- ')) { const name = tj.slice(2).trim().replace(/^["']|["']$/g, ''); diff --git a/plugins/llm-security/scanners/scan-orchestrator.mjs b/plugins/llm-security/scanners/scan-orchestrator.mjs index 8a4109a..fce1176 100644 --- a/plugins/llm-security/scanners/scan-orchestrator.mjs +++ b/plugins/llm-security/scanners/scan-orchestrator.mjs @@ -107,6 +107,7 @@ import { scan as gitScan } from './git-forensics.mjs'; import { scan as networkScan } from './network-mapper.mjs'; import { scan as memoryScan } from './memory-poisoning-scanner.mjs'; import { scan as supplyChainScan } from './supply-chain-recheck.mjs'; +import { scan as workflowScan } from './workflow-scanner.mjs'; import { scan as tfaScan } from './toxic-flow-analyzer.mjs'; const SCANNERS = [ @@ -119,6 +120,7 @@ const SCANNERS = [ { name: 'network', fn: networkScan }, { name: 'memory', fn: memoryScan }, { name: 'supply-chain', fn: supplyChainScan }, + { name: 'workflow', fn: workflowScan }, { name: 'toxic-flow', fn: tfaScan, requiresPriorResults: true }, ]; diff --git a/plugins/llm-security/scanners/workflow-scanner.mjs b/plugins/llm-security/scanners/workflow-scanner.mjs index e8059e8..f81fc4f 100644 --- a/plugins/llm-security/scanners/workflow-scanner.mjs +++ b/plugins/llm-security/scanners/workflow-scanner.mjs @@ -116,6 +116,10 @@ const SINK_PARENTS = new Set(['run']); // engine, NOT the shell. These are sink mismatches, not injection. const NON_SINK_PARENTS = new Set(['if', 'with', 'env', 'name', 'runs-on', 'timeout-minutes', 'continue-on-error']); +// B4: auth-bypass — github.actor or forgejo.actor compared against a +// bot identity in if: contexts (Synacktiv 2023 Dependabot spoofing). +const AUTH_BYPASS_RE = /\b(?:github|forgejo)\.actor\s*(?:==|!=)\s*['"][\w-]+\[bot\]['"]/; + // --------------------------------------------------------------------------- // Discovery // --------------------------------------------------------------------------- @@ -241,19 +245,85 @@ async function scanFile(absPath, targetPath, stderrLog) { ); } + const platformLabel = platform === 'forgejo' ? 'Forgejo' : 'GitHub'; + const triggerList = [...triggers].join(', ') || 'unknown'; + + // B4: collect env: bindings (key -> source-expression). Used for + // re-interpolation detection. A binding is an event whose parent is + // a key under an `env:` block — i.e. parentChain includes 'env' + // and the parent is not 'env' itself. + const envBindings = new Map(); for (const ev of parsed.events) { + if (!ev.parentChain.includes('env')) continue; + if (ev.parent === 'env') continue; + if (SINK_PARENTS.has(ev.parent)) continue; + if (NON_SINK_PARENTS.has(ev.parent)) continue; + envBindings.set(ev.parent, ev.expr); + } + + for (const ev of parsed.events) { + // B4: auth-bypass first — fires only on if: events + if (ev.parent === 'if' && AUTH_BYPASS_RE.test(ev.expr)) { + findings.push(finding({ + scanner: SCANNER_PREFIX, + severity: SEVERITY.MEDIUM, + title: `Actor auth-bypass: ${platformLabel} workflow trusts bot identity`, + description: + `Actor auth-bypass: if-condition trusts bot identity that can be ` + + `spoofed via pull_request_target. ${platformLabel} workflow at ` + + `${relPath}: ${ev.expr}.`, + file: relPath, + line: ev.line, + evidence: `\${{ ${ev.expr} }}`, + owasp: 'LLM06', + recommendation: + 'Use `github.event.pull_request.user.login` (immutable per PR) ' + + 'instead of `github.actor` for authorization decisions. The actor ' + + 'name can be spoofed via Synacktiv-2023 Dependabot path. If the ' + + 'check must remain, gate it on an `id-token` OIDC claim.', + })); + continue; + } + if (NON_SINK_PARENTS.has(ev.parent)) continue; if (!SINK_PARENTS.has(ev.parent)) continue; + // B4: re-interpolation pattern — `${{ env. }}` inside run: + // where was bound from a blacklisted field via top-level + // or job-level env:. Cancels the env-indirection mitigation. + const reinterpMatch = ev.expr.match(/^env\.([\w-]+)$/); + if (reinterpMatch) { + const key = reinterpMatch[1]; + const source = envBindings.get(key); + if (source && DANGEROUS_RE.test(source)) { + findings.push(finding({ + scanner: SCANNER_PREFIX, + severity: SEVERITY.MEDIUM, + title: `Re-interpolation: env.${key} re-injects ${source.split(/\s+/)[0]} at ${platformLabel} run:`, + description: + `Re-interpolation: env.${key} was set from \${{ ${source} }}; reading via ` + + `\${{ env.${key} }} in run: re-injects the unsafe value (Appsmith ` + + `GHSL-2024-277 stealth pattern). Workflow: ${relPath}.`, + file: relPath, + line: ev.line, + evidence: `\${{ env.${key} }}`, + owasp: 'LLM02', + recommendation: + `Consume the env var via \$${key} (shell variable) inside run:, ` + + `not via \${{ env.${key} }}. Template substitution happens before ` + + `shell parsing — re-interpolating cancels the env-indirection ` + + 'mitigation and re-introduces the original injection.', + })); + continue; + } + } + const fieldClass = classifyField(ev.expr); if (fieldClass === 'other') continue; const severity = severityFor(triggers, fieldClass); if (!severity) continue; - const platformLabel = platform === 'forgejo' ? 'Forgejo' : 'GitHub'; - const triggerList = [...triggers].join(', ') || 'unknown'; - findings.push(finding({ scanner: SCANNER_PREFIX, severity, diff --git a/plugins/llm-security/tests/fixtures/workflows/.github/workflows/auth-bypass-dependabot.yml b/plugins/llm-security/tests/fixtures/workflows/.github/workflows/auth-bypass-dependabot.yml new file mode 100644 index 0000000..012203f --- /dev/null +++ b/plugins/llm-security/tests/fixtures/workflows/.github/workflows/auth-bypass-dependabot.yml @@ -0,0 +1,14 @@ +name: dependabot trust check (auth-bypass — Synacktiv 2023) +on: + pull_request_target: + branches: [main] + +jobs: + auto-merge: + runs-on: ubuntu-latest + if: ${{ github.actor == 'dependabot[bot]' }} + steps: + - name: Checkout PR + uses: actions/checkout@v4 + - name: Approve + run: gh pr review --approve diff --git a/plugins/llm-security/tests/fixtures/workflows/.github/workflows/tp-reinterpolation.yml b/plugins/llm-security/tests/fixtures/workflows/.github/workflows/tp-reinterpolation.yml new file mode 100644 index 0000000..ad27b5f --- /dev/null +++ b/plugins/llm-security/tests/fixtures/workflows/.github/workflows/tp-reinterpolation.yml @@ -0,0 +1,13 @@ +name: re-interpolation stealth (TP — Appsmith GHSL-2024-277) +on: + pull_request_target: + branches: [main] + +jobs: + build: + runs-on: ubuntu-latest + env: + PR_TITLE: ${{ github.event.pull_request.title }} + steps: + - name: Looks-safe but is not + run: echo "${{ env.PR_TITLE }}" diff --git a/plugins/llm-security/tests/scanners/workflow-scanner.test.mjs b/plugins/llm-security/tests/scanners/workflow-scanner.test.mjs index 5172a05..614d984 100644 --- a/plugins/llm-security/tests/scanners/workflow-scanner.test.mjs +++ b/plugins/llm-security/tests/scanners/workflow-scanner.test.mjs @@ -146,3 +146,71 @@ describe('workflow-scanner — output envelope', () => { assert.equal(r.findings.length, 0); }); }); + +// --------------------------------------------------------------------------- +// B4 — re-interpolation + auth-bypass categories +// --------------------------------------------------------------------------- + +describe('workflow-scanner — B4 re-interpolation', () => { + it('flags ${{ env.PR_TITLE }} in run: when PR_TITLE was set from blacklisted source', async () => { + resetCounter(); + const r = await scan(FIXTURE_DIR); + const fs = findingsByFile(r.findings, 'tp-reinterpolation.yml'); + const reinterp = fs.find(f => /Re-interpolation/i.test(f.title)); + assert.ok(reinterp, `expected a Re-interpolation finding in ${JSON.stringify(fs)}`); + assert.equal(reinterp.severity, 'medium'); + assert.match(reinterp.evidence, /env\.PR_TITLE/); + assert.match(reinterp.description, /Appsmith|GHSL-2024-277/); + }); + + it('does not double-count: re-interpolation is emitted instead of a regular run-context finding for the env. expression', async () => { + resetCounter(); + const r = await scan(FIXTURE_DIR); + const fs = findingsByFile(r.findings, 'tp-reinterpolation.yml'); + const runForEnvVar = fs.filter(f => f.evidence === '${{ env.PR_TITLE }}' && !/Re-interpolation/i.test(f.title)); + assert.equal(runForEnvVar.length, 0); + }); +}); + +describe('workflow-scanner — B4 auth-bypass', () => { + it('flags github.actor == "dependabot[bot]" in if: as MEDIUM auth-bypass', async () => { + resetCounter(); + const r = await scan(FIXTURE_DIR); + const fs = findingsByFile(r.findings, 'auth-bypass-dependabot.yml'); + const auth = fs.find(f => /Actor auth-bypass/i.test(f.title)); + assert.ok(auth, `expected an Actor auth-bypass finding in ${JSON.stringify(fs)}`); + assert.equal(auth.severity, 'medium'); + assert.equal(auth.owasp, 'LLM06'); + assert.match(auth.recommendation, /pull_request\.user\.login/); + }); + + it('does NOT flag plain `if: ${{ startsWith(github.head_ref, …) }}` as auth-bypass', async () => { + resetCounter(); + const r = await scan(FIXTURE_DIR); + const fs = findingsByFile(r.findings, 'fp-if-context.yml'); + const auth = fs.filter(f => /Actor auth-bypass/i.test(f.title)); + assert.equal(auth.length, 0); + }); +}); + +describe('workflow-scanner — severity.mjs OWASP map registration', () => { + it('OWASP_MAP includes WFL with LLM02 and LLM06', async () => { + const { OWASP_MAP, OWASP_AGENTIC_MAP, OWASP_SKILLS_MAP, OWASP_MCP_MAP } = + await import('../../scanners/lib/severity.mjs'); + assert.deepEqual(OWASP_MAP.WFL, ['LLM02', 'LLM06']); + assert.deepEqual(OWASP_AGENTIC_MAP.WFL, ['ASI04']); + assert.deepEqual(OWASP_SKILLS_MAP.WFL, []); + assert.deepEqual(OWASP_MCP_MAP.WFL, []); + }); +}); + +describe('workflow-scanner — orchestrator registration', () => { + it('scan-orchestrator imports and lists the workflow scanner', async () => { + const { readFileSync } = await import('node:fs'); + const { resolve: resolvePath } = await import('node:path'); + const orchPath = resolvePath(import.meta.dirname, '../../scanners/scan-orchestrator.mjs'); + const text = readFileSync(orchPath, 'utf8'); + assert.match(text, /import\s+\{\s*scan as workflowScan\s*\}\s+from\s+['"]\.\/workflow-scanner\.mjs['"]/); + assert.match(text, /\{\s*name:\s*'workflow',\s*fn:\s*workflowScan\s*\}/); + }); +});