From ede37219a366463b164f439c490d2039054b4787 Mon Sep 17 00:00:00 2001
From: Kjell Tore Guttormsen <ktg@humanize.no>
Date: Thu, 30 Apr 2026 15:57:10 +0200
Subject: [PATCH] =?UTF-8?q?feat(workflow-scanner):=20E11=20part=202=20?=
 =?UTF-8?q?=E2=80=94=20re-interpolation=20+=20auth-bypass=20+=20WFL=20pref?=
 =?UTF-8?q?ix=20+=20orchestrator?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes E11. Three new pieces, plus integration:

1. Re-interpolation detector (Appsmith GHSL-2024-277 stealth pattern).
   The scanner now collects env: bindings (key -> source-expression
   text) by walking parsed events whose parentChain includes 'env',
   then for each `${{ env.<KEY> }}` inside run:, re-injects MEDIUM
   if the binding source matches the 23-field blacklist. This
   catches the pattern where developers apply env-indirection but
   then re-interpolate the env var in run:, which cancels the
   mitigation (template substitution happens before shell parsing).

2. Auth-bypass category (Synacktiv 2023 Dependabot spoofing).
   Detects `if: ${{ github.actor == 'dependabot[bot]' }}` and
   variants. MEDIUM, owasp: 'LLM06' (Excessive Agency). Distinct
   from injection — same expression syntax, different threat class.
   Recommendation steers users to `github.event.pull_request.user.login`.

3. severity.mjs OWASP map registration. WFL prefix added to all
   four maps:
   - OWASP_MAP['WFL'] = ['LLM02', 'LLM06']
   - OWASP_AGENTIC_MAP['WFL'] = ['ASI04']
   - OWASP_SKILLS_MAP['WFL'] = []
   - OWASP_MCP_MAP['WFL'] = []
   Empty arrays for skills/MCP are explicit, not omitted — keeps
   `Object.keys(OWASP_MAP)` symmetric across maps.

4. scan-orchestrator.mjs registration. workflowScan added between
   supply-chain and toxic-flow (toxic-flow correlates after primaries).
   Verified via integration: orchestrator emits 9 WFL findings on
   tests/fixtures/workflows/.

Bug fix: extractTriggers in workflow-yaml-state.mjs was collecting
sub-properties (`branches:`, `types:`) as triggers. Now tracks the
first nested indent level and ignores anything deeper.

Tests:
- 6 new cases in tests/scanners/workflow-scanner.test.mjs:
  re-interp TP, no-double-count, auth-bypass TP, auth-bypass FP
  (startsWith head_ref is not auth-bypass), OWASP map shape,
  orchestrator import + SCANNERS array entry.
- 2 new fixtures: tp-reinterpolation.yml, auth-bypass-dependabot.yml.
- Existing 14 scanner tests + 15 state-machine tests unchanged.

Test count: 1732 -> 1738 (+6). Wave B total: +53 over baseline 1685.
Pre-compact-scan flake unchanged (passes in isolation).
---
 .../llm-security/scanners/lib/severity.mjs    |  4 +
 .../scanners/lib/workflow-yaml-state.mjs      |  7 +-
 .../scanners/scan-orchestrator.mjs            |  2 +
 .../scanners/workflow-scanner.mjs             | 76 ++++++++++++++++++-
 .../workflows/auth-bypass-dependabot.yml      | 14 ++++
 .../.github/workflows/tp-reinterpolation.yml  | 13 ++++
 .../tests/scanners/workflow-scanner.test.mjs  | 68 +++++++++++++++++
 7 files changed, 180 insertions(+), 4 deletions(-)
 create mode 100644 plugins/llm-security/tests/fixtures/workflows/.github/workflows/auth-bypass-dependabot.yml
 create mode 100644 plugins/llm-security/tests/fixtures/workflows/.github/workflows/tp-reinterpolation.yml
diff --git a/plugins/llm-security/scanners/lib/severity.mjs b/plugins/llm-security/scanners/lib/severity.mjs
index 70cbe08..d87972b 100644
--- a/plugins/llm-security/scanners/lib/severity.mjs
+++ b/plugins/llm-security/scanners/lib/severity.mjs
@@ -141,6 +141,7 @@ export const OWASP_MAP = Object.freeze({
   MEM: ['LLM01'],
   SCR: ['LLM03'],
   PST: ['LLM01', 'LLM06'],
+  WFL: ['LLM02', 'LLM06'],
 });
 
 /**
@@ -159,6 +160,7 @@ export const OWASP_AGENTIC_MAP = Object.freeze({
   MEM: ['ASI01', 'ASI02'],
   SCR: ['ASI04'],
   PST: ['ASI02', 'ASI03', 'ASI04', 'ASI05'],
+  WFL: ['ASI04'],
 });
 
 /**
@@ -177,6 +179,7 @@ export const OWASP_SKILLS_MAP = Object.freeze({
   MEM: ['AST01', 'AST05'],
   SCR: ['AST06'],
   PST: ['AST01', 'AST03'],
+  WFL: [],
 });
 
 /**
@@ -195,6 +198,7 @@ export const OWASP_MCP_MAP = Object.freeze({
   MEM: ['MCP05', 'MCP06'],
   SCR: ['MCP04'],
   PST: ['MCP02', 'MCP07'],
+  WFL: [],
 });
 
 /**
diff --git a/plugins/llm-security/scanners/lib/workflow-yaml-state.mjs b/plugins/llm-security/scanners/lib/workflow-yaml-state.mjs
index 0a23b32..9c7b549 100644
--- a/plugins/llm-security/scanners/lib/workflow-yaml-state.mjs
+++ b/plugins/llm-security/scanners/lib/workflow-yaml-state.mjs
@@ -88,13 +88,18 @@ export function extractTriggers(lines) {
       return triggers;
     }
 
-    // Form 2/3: block list or block mapping
+    // Form 2/3: block list or block mapping. Only collect entries at
+    // the FIRST nested indent — anything deeper is a sub-property of
+    // the trigger (e.g. `branches:`, `types:`), not a new trigger.
+    let triggerIndent = null;
     for (let j = i + 1; j < lines.length; j++) {
       const sj = stripComments(lines[j]);
       const tj = sj.trim();
       if (!tj) continue;
       const indent = getIndent(sj);
       if (indent === 0) break; // back to top-level key
+      if (triggerIndent === null) triggerIndent = indent;
+      if (indent > triggerIndent) continue; // sub-property of a trigger
       // List item: `- push`
       if (tj.startsWith('- ')) {
         const name = tj.slice(2).trim().replace(/^["']|["']$/g, '');
diff --git a/plugins/llm-security/scanners/scan-orchestrator.mjs b/plugins/llm-security/scanners/scan-orchestrator.mjs
index 8a4109a..fce1176 100644
--- a/plugins/llm-security/scanners/scan-orchestrator.mjs
+++ b/plugins/llm-security/scanners/scan-orchestrator.mjs
@@ -107,6 +107,7 @@ import { scan as gitScan } from './git-forensics.mjs';
 import { scan as networkScan } from './network-mapper.mjs';
 import { scan as memoryScan } from './memory-poisoning-scanner.mjs';
 import { scan as supplyChainScan } from './supply-chain-recheck.mjs';
+import { scan as workflowScan } from './workflow-scanner.mjs';
 import { scan as tfaScan } from './toxic-flow-analyzer.mjs';
 
 const SCANNERS = [
@@ -119,6 +120,7 @@ const SCANNERS = [
   { name: 'network',       fn: networkScan },
   { name: 'memory',        fn: memoryScan },
   { name: 'supply-chain',  fn: supplyChainScan },
+  { name: 'workflow',      fn: workflowScan },
   { name: 'toxic-flow',    fn: tfaScan, requiresPriorResults: true },
 ];
 
diff --git a/plugins/llm-security/scanners/workflow-scanner.mjs b/plugins/llm-security/scanners/workflow-scanner.mjs
index e8059e8..f81fc4f 100644
--- a/plugins/llm-security/scanners/workflow-scanner.mjs
+++ b/plugins/llm-security/scanners/workflow-scanner.mjs
@@ -116,6 +116,10 @@ const SINK_PARENTS = new Set(['run']);
 // engine, NOT the shell. These are sink mismatches, not injection.
 const NON_SINK_PARENTS = new Set(['if', 'with', 'env', 'name', 'runs-on', 'timeout-minutes', 'continue-on-error']);
 
+// B4: auth-bypass — github.actor or forgejo.actor compared against a
+// bot identity in if: contexts (Synacktiv 2023 Dependabot spoofing).
+const AUTH_BYPASS_RE = /\b(?:github|forgejo)\.actor\s*(?:==|!=)\s*['"][\w-]+\[bot\]['"]/;
+
 // ---------------------------------------------------------------------------
 // Discovery
 // ---------------------------------------------------------------------------
@@ -241,19 +245,85 @@ async function scanFile(absPath, targetPath, stderrLog) {
     );
   }
 
+  const platformLabel = platform === 'forgejo' ? 'Forgejo' : 'GitHub';
+  const triggerList = [...triggers].join(', ') || 'unknown';
+
+  // B4: collect env: bindings (key -> source-expression). Used for
+  // re-interpolation detection. A binding is an event whose parent is
+  // a key under an `env:` block — i.e. parentChain includes 'env'
+  // and the parent is not 'env' itself.
+  const envBindings = new Map();
   for (const ev of parsed.events) {
+    if (!ev.parentChain.includes('env')) continue;
+    if (ev.parent === 'env') continue;
+    if (SINK_PARENTS.has(ev.parent)) continue;
+    if (NON_SINK_PARENTS.has(ev.parent)) continue;
+    envBindings.set(ev.parent, ev.expr);
+  }
+
+  for (const ev of parsed.events) {
+    // B4: auth-bypass first — fires only on if: events
+    if (ev.parent === 'if' && AUTH_BYPASS_RE.test(ev.expr)) {
+      findings.push(finding({
+        scanner: SCANNER_PREFIX,
+        severity: SEVERITY.MEDIUM,
+        title: `Actor auth-bypass: ${platformLabel} workflow trusts bot identity`,
+        description:
+          `Actor auth-bypass: if-condition trusts bot identity that can be ` +
+          `spoofed via pull_request_target. ${platformLabel} workflow at ` +
+          `${relPath}: ${ev.expr}.`,
+        file: relPath,
+        line: ev.line,
+        evidence: `\${{ ${ev.expr} }}`,
+        owasp: 'LLM06',
+        recommendation:
+          'Use `github.event.pull_request.user.login` (immutable per PR) ' +
+          'instead of `github.actor` for authorization decisions. The actor ' +
+          'name can be spoofed via Synacktiv-2023 Dependabot path. If the ' +
+          'check must remain, gate it on an `id-token` OIDC claim.',
+      }));
+      continue;
+    }
+
     if (NON_SINK_PARENTS.has(ev.parent)) continue;
     if (!SINK_PARENTS.has(ev.parent)) continue;
 
+    // B4: re-interpolation pattern — `${{ env.<KEY> }}` inside run:
+    // where <KEY> was bound from a blacklisted field via top-level
+    // or job-level env:. Cancels the env-indirection mitigation.
+    const reinterpMatch = ev.expr.match(/^env\.([\w-]+)$/);
+    if (reinterpMatch) {
+      const key = reinterpMatch[1];
+      const source = envBindings.get(key);
+      if (source && DANGEROUS_RE.test(source)) {
+        findings.push(finding({
+          scanner: SCANNER_PREFIX,
+          severity: SEVERITY.MEDIUM,
+          title: `Re-interpolation: env.${key} re-injects ${source.split(/\s+/)[0]} at ${platformLabel} run:`,
+          description:
+            `Re-interpolation: env.${key} was set from \${{ ${source} }}; reading via ` +
+            `\${{ env.${key} }} in run: re-injects the unsafe value (Appsmith ` +
+            `GHSL-2024-277 stealth pattern). Workflow: ${relPath}.`,
+          file: relPath,
+          line: ev.line,
+          evidence: `\${{ env.${key} }}`,
+          owasp: 'LLM02',
+          recommendation:
+            `Consume the env var via \$${key} (shell variable) inside run:, ` +
+            `not via \${{ env.${key} }}. Template substitution happens before ` +
+            `shell parsing — re-interpolating cancels the env-indirection ` +
+            'mitigation and re-introduces the original injection.',
+        }));
+        continue;
+      }
+    }
+
     const fieldClass = classifyField(ev.expr);
     if (fieldClass === 'other') continue;
 
     const severity = severityFor(triggers, fieldClass);
     if (!severity) continue;
 
-    const platformLabel = platform === 'forgejo' ? 'Forgejo' : 'GitHub';
-    const triggerList = [...triggers].join(', ') || 'unknown';
-
     findings.push(finding({
       scanner: SCANNER_PREFIX,
       severity,
diff --git a/plugins/llm-security/tests/fixtures/workflows/.github/workflows/auth-bypass-dependabot.yml b/plugins/llm-security/tests/fixtures/workflows/.github/workflows/auth-bypass-dependabot.yml
new file mode 100644
index 0000000..012203f
--- /dev/null
+++ b/plugins/llm-security/tests/fixtures/workflows/.github/workflows/auth-bypass-dependabot.yml
@@ -0,0 +1,14 @@
+name: dependabot trust check (auth-bypass — Synacktiv 2023)
+on:
+  pull_request_target:
+    branches: [main]
+
+jobs:
+  auto-merge:
+    runs-on: ubuntu-latest
+    if: ${{ github.actor == 'dependabot[bot]' }}
+    steps:
+      - name: Checkout PR
+        uses: actions/checkout@v4
+      - name: Approve
+        run: gh pr review --approve
diff --git a/plugins/llm-security/tests/fixtures/workflows/.github/workflows/tp-reinterpolation.yml b/plugins/llm-security/tests/fixtures/workflows/.github/workflows/tp-reinterpolation.yml
new file mode 100644
index 0000000..ad27b5f
--- /dev/null
+++ b/plugins/llm-security/tests/fixtures/workflows/.github/workflows/tp-reinterpolation.yml
@@ -0,0 +1,13 @@
+name: re-interpolation stealth (TP — Appsmith GHSL-2024-277)
+on:
+  pull_request_target:
+    branches: [main]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    env:
+      PR_TITLE: ${{ github.event.pull_request.title }}
+    steps:
+      - name: Looks-safe but is not
+        run: echo "${{ env.PR_TITLE }}"
diff --git a/plugins/llm-security/tests/scanners/workflow-scanner.test.mjs b/plugins/llm-security/tests/scanners/workflow-scanner.test.mjs
index 5172a05..614d984 100644
--- a/plugins/llm-security/tests/scanners/workflow-scanner.test.mjs
+++ b/plugins/llm-security/tests/scanners/workflow-scanner.test.mjs
@@ -146,3 +146,71 @@ describe('workflow-scanner — output envelope', () => {
     assert.equal(r.findings.length, 0);
   });
 });
+
+// ---------------------------------------------------------------------------
+// B4 — re-interpolation + auth-bypass categories
+// ---------------------------------------------------------------------------
+
+describe('workflow-scanner — B4 re-interpolation', () => {
+  it('flags ${{ env.PR_TITLE }} in run: when PR_TITLE was set from blacklisted source', async () => {
+    resetCounter();
+    const r = await scan(FIXTURE_DIR);
+    const fs = findingsByFile(r.findings, 'tp-reinterpolation.yml');
+    const reinterp = fs.find(f => /Re-interpolation/i.test(f.title));
+    assert.ok(reinterp, `expected a Re-interpolation finding in ${JSON.stringify(fs)}`);
+    assert.equal(reinterp.severity, 'medium');
+    assert.match(reinterp.evidence, /env\.PR_TITLE/);
+    assert.match(reinterp.description, /Appsmith|GHSL-2024-277/);
+  });
+
+  it('does not double-count: re-interpolation is emitted instead of a regular run-context finding for the env.<KEY> expression', async () => {
+    resetCounter();
+    const r = await scan(FIXTURE_DIR);
+    const fs = findingsByFile(r.findings, 'tp-reinterpolation.yml');
+    const runForEnvVar = fs.filter(f => f.evidence === '${{ env.PR_TITLE }}' && !/Re-interpolation/i.test(f.title));
+    assert.equal(runForEnvVar.length, 0);
+  });
+});
+
+describe('workflow-scanner — B4 auth-bypass', () => {
+  it('flags github.actor == "dependabot[bot]" in if: as MEDIUM auth-bypass', async () => {
+    resetCounter();
+    const r = await scan(FIXTURE_DIR);
+    const fs = findingsByFile(r.findings, 'auth-bypass-dependabot.yml');
+    const auth = fs.find(f => /Actor auth-bypass/i.test(f.title));
+    assert.ok(auth, `expected an Actor auth-bypass finding in ${JSON.stringify(fs)}`);
+    assert.equal(auth.severity, 'medium');
+    assert.equal(auth.owasp, 'LLM06');
+    assert.match(auth.recommendation, /pull_request\.user\.login/);
+  });
+
+  it('does NOT flag plain `if: ${{ startsWith(github.head_ref, …) }}` as auth-bypass', async () => {
+    resetCounter();
+    const r = await scan(FIXTURE_DIR);
+    const fs = findingsByFile(r.findings, 'fp-if-context.yml');
+    const auth = fs.filter(f => /Actor auth-bypass/i.test(f.title));
+    assert.equal(auth.length, 0);
+  });
+});
+
+describe('workflow-scanner — severity.mjs OWASP map registration', () => {
+  it('OWASP_MAP includes WFL with LLM02 and LLM06', async () => {
+    const { OWASP_MAP, OWASP_AGENTIC_MAP, OWASP_SKILLS_MAP, OWASP_MCP_MAP } =
+      await import('../../scanners/lib/severity.mjs');
+    assert.deepEqual(OWASP_MAP.WFL, ['LLM02', 'LLM06']);
+    assert.deepEqual(OWASP_AGENTIC_MAP.WFL, ['ASI04']);
+    assert.deepEqual(OWASP_SKILLS_MAP.WFL, []);
+    assert.deepEqual(OWASP_MCP_MAP.WFL, []);
+  });
+});
+
+describe('workflow-scanner — orchestrator registration', () => {
+  it('scan-orchestrator imports and lists the workflow scanner', async () => {
+    const { readFileSync } = await import('node:fs');
+    const { resolve: resolvePath } = await import('node:path');
+    const orchPath = resolvePath(import.meta.dirname, '../../scanners/scan-orchestrator.mjs');
+    const text = readFileSync(orchPath, 'utf8');
+    assert.match(text, /import\s+\{\s*scan as workflowScan\s*\}\s+from\s+['"]\.\/workflow-scanner\.mjs['"]/);
+    assert.match(text, /\{\s*name:\s*'workflow',\s*fn:\s*workflowScan\s*\}/);
+  });
+});