feat(workflow-scanner): E11 part 1 — core file-walk + 23-field blacklist + sink-restriction

Adds a deterministic GitHub Actions / Forgejo Actions injection scanner. Detects \${{ <dangerous-field> }} interpolations inside \`run:\` step blocks under privileged or semi-privileged triggers. Sink-restricted: \`if:\` / \`with:\` / \`env:\` (block-level) are evaluated by the runner expression engine, not the shell, so they are NOT injection sinks and are suppressed at parser level. Why: workflow expression injection is the most prevalent SAST class on GitHub (CodeQL preview: 800K+ findings across 158K repos). The graduated severity matrix (HIGH for pull_request_target / discussion / workflow_run; MEDIUM for pull_request / workflow_dispatch) is the community-converged calibration target — uniform HIGH causes alert fatigue. Components: - scanners/lib/workflow-yaml-state.mjs — line-based YAML state machine. Tracks indentation, parent-context stack, and \`run: |\` / \`run: >\` block-scalar entry/exit. Zero deps. - scanners/workflow-scanner.mjs — discoverWorkflows() probes .github/workflows/ and .forgejo/workflows/ directly (file-discovery has no glob include). 23-field blacklist (GHSL 17 + 6 GlueStack- class additions). Platform encoded via file path; no schema extension to finding(). Forgejo-specific: workflow_run advisory emitted to stderr; recommendation text mentions Forgejo's server-level token scoping (job-level permissions: is ignored). - knowledge/workflow-injection-patterns.md — 23-field blacklist, trigger taxonomy, severity matrix, Forgejo divergences, NVD CVE corpus. Tests (47 new): - tests/lib/workflow-yaml-state.test.mjs (15): trigger forms (string / inline-list / block-list / block-mapping), single-line run, block-scalar | and > tracking, env/with sink-mismatch, multi-line, comment stripping, line-number accuracy. - tests/scanners/workflow-scanner.test.mjs (14): TP head_ref pull_request_target, TP discussion.title gluestack pattern, TP comment.body pull_request, TP issue.body block-scalar, FP if-context, FP env-block, INFO numeric, Forgejo TP, Forgejo workflow_run advisory, envelope shape, WFL prefix. - 9 fixtures in tests/fixtures/workflows/{.github,.forgejo}/workflows/. Out of scope (B4 / Batch D): - Re-interpolation detection (env.VAR after env: from blacklisted source) - github.actor authorization-bypass category - WFL prefix in severity.mjs OWASP maps + scan-orchestrator registration (B4) - Composite-action input tracing, GITHUB_ENV poisoning (Batch D) Test count: 1685 → 1732 (+47). Pre-compact-scan flake unchanged (passes in isolation).
2026-04-30 15:48:48 +02:00 · 2026-04-30 15:48:48 +02:00 · c31d4b1718
commit c31d4b1718
parent ad86f5031a
14 changed files with 1167 additions and 0 deletions
--- a/plugins/llm-security/tests/fixtures/workflows/.forgejo/workflows/forgejo-tp.yml
+++ b/plugins/llm-security/tests/fixtures/workflows/.forgejo/workflows/forgejo-tp.yml
@ -0,0 +1,11 @@
+name: forgejo head_ref echo (TP — Forgejo + pull_request)
+on:
+  pull_request:
+    branches: [main]
+
+jobs:
+  echo-ref:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Echo head ref
+        run: echo "Forgejo head_ref ${{ forgejo.head_ref }}"
--- a/plugins/llm-security/tests/fixtures/workflows/.forgejo/workflows/forgejo-workflow-run.yml
+++ b/plugins/llm-security/tests/fixtures/workflows/.forgejo/workflows/forgejo-workflow-run.yml
@ -0,0 +1,12 @@
+name: forgejo workflow_run divergence (advisory)
+on:
+  workflow_run:
+    workflows: ["build"]
+    types: [completed]
+
+jobs:
+  echo:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Echo title
+        run: echo "Title was ${{ forgejo.event.pull_request.title }}"
--- a/plugins/llm-security/tests/fixtures/workflows/.github/workflows/fp-env-block.yml
+++ b/plugins/llm-security/tests/fixtures/workflows/.github/workflows/fp-env-block.yml
@ -0,0 +1,14 @@
+name: env block-level mapping (FP — bind, not exec)
+on:
+  pull_request_target:
+    branches: [main]
+
+env:
+  PR_TITLE: ${{ github.event.pull_request.title }}
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Use env safely
+        run: echo "$PR_TITLE"
--- a/plugins/llm-security/tests/fixtures/workflows/.github/workflows/fp-if-context.yml
+++ b/plugins/llm-security/tests/fixtures/workflows/.github/workflows/fp-if-context.yml
@ -0,0 +1,12 @@
+name: if-context evaluation (FP — engine, not shell)
+on:
+  pull_request_target:
+    branches: [main]
+
+jobs:
+  conditional:
+    runs-on: ubuntu-latest
+    if: ${{ startsWith(github.head_ref, 'release/') }}
+    steps:
+      - name: Run only on release branches
+        run: echo "release branch detected"
--- a/plugins/llm-security/tests/fixtures/workflows/.github/workflows/fp-numeric-field.yml
+++ b/plugins/llm-security/tests/fixtures/workflows/.github/workflows/fp-numeric-field.yml
@ -0,0 +1,11 @@
+name: numeric-field run: (FP/INFO — character-set guarantees no shell metas)
+on:
+  pull_request:
+    branches: [main]
+
+jobs:
+  log-pr-number:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Echo PR number
+        run: echo "PR ${{ github.event.pull_request.number }}"
--- a/plugins/llm-security/tests/fixtures/workflows/.github/workflows/tp-block-scalar-run.yml
+++ b/plugins/llm-security/tests/fixtures/workflows/.github/workflows/tp-block-scalar-run.yml
@ -0,0 +1,14 @@
+name: multi-line run scalar (TP — block-scalar tracking)
+on:
+  pull_request_target:
+    branches: [main]
+
+jobs:
+  log:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Multi-line script
+        run: |
+          echo "Issue title:"
+          echo "${{ github.event.issue.body }}"
+          echo "----"
--- a/plugins/llm-security/tests/fixtures/workflows/.github/workflows/tp-discussion-title.yml
+++ b/plugins/llm-security/tests/fixtures/workflows/.github/workflows/tp-discussion-title.yml
@ -0,0 +1,11 @@
+name: discussion welcome (TP — gluestack CVE-2025-53104 pattern)
+on:
+  discussion:
+    types: [created]
+
+jobs:
+  greet:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Welcome
+        run: echo "New discussion: ${{ github.event.discussion.title }}"
--- a/plugins/llm-security/tests/fixtures/workflows/.github/workflows/tp-prtarget-head-ref.yml
+++ b/plugins/llm-security/tests/fixtures/workflows/.github/workflows/tp-prtarget-head-ref.yml
@ -0,0 +1,11 @@
+name: pwn-request demo (TP)
+on:
+  pull_request_target:
+    branches: [main]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Echo head ref
+        run: echo "Building branch ${{ github.head_ref }}"
--- a/plugins/llm-security/tests/fixtures/workflows/.github/workflows/tp-pull-request-comment.yml
+++ b/plugins/llm-security/tests/fixtures/workflows/.github/workflows/tp-pull-request-comment.yml
@ -0,0 +1,11 @@
+name: comment echo (TP — pull_request, MEDIUM)
+on:
+  pull_request:
+    types: [opened, edited]
+
+jobs:
+  echo-comment:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Echo body
+        run: echo "Comment said ${{ github.event.comment.body }}"
--- a/plugins/llm-security/tests/lib/workflow-yaml-state.test.mjs
+++ b/plugins/llm-security/tests/lib/workflow-yaml-state.test.mjs
@ -0,0 +1,193 @@
+// workflow-yaml-state.test.mjs — unit tests for E11 line-based state machine.
+
+import { describe, it } from 'node:test';
+import assert from 'node:assert/strict';
+
+const { parseWorkflow, extractTriggers } = await import('../../scanners/lib/workflow-yaml-state.mjs');
+
+describe('extractTriggers', () => {
+  it('handles `on: push` (string form)', () => {
+    const t = extractTriggers(['on: push'.split('\n')[0]]);
+    assert.deepEqual([...t], ['push']);
+  });
+
+  it('handles `on: [push, pull_request]` (inline list)', () => {
+    const t = extractTriggers(['on: [push, pull_request_target]']);
+    assert.deepEqual([...t].sort(), ['pull_request_target', 'push']);
+  });
+
+  it('handles block list', () => {
+    const text = ['on:', '  - push', '  - pull_request'];
+    const t = extractTriggers(text);
+    assert.deepEqual([...t].sort(), ['pull_request', 'push']);
+  });
+
+  it('handles block mapping', () => {
+    const text = ['on:', '  pull_request_target:', '    branches: [main]', '  discussion:', 'jobs:'];
+    const t = extractTriggers(text);
+    assert.ok(t.has('pull_request_target'));
+    assert.ok(t.has('discussion'));
+  });
+
+  it('returns empty set when no `on:` block found', () => {
+    const t = extractTriggers(['name: hello', 'jobs:', '  build:', '    runs-on: ubuntu-latest']);
+    assert.equal(t.size, 0);
+  });
+});
+
+describe('parseWorkflow — single-line run:', () => {
+  it('emits a run-context event for ${{ ... }} in inline run:', () => {
+    const yml = [
+      'on: pull_request_target',
+      'jobs:',
+      '  j:',
+      '    steps:',
+      '      - name: echo',
+      '        run: echo "${{ github.head_ref }}"',
+    ].join('\n');
+    const { events } = parseWorkflow(yml);
+    const runs = events.filter(e => e.parent === 'run');
+    assert.equal(runs.length, 1);
+    assert.equal(runs[0].expr, 'github.head_ref');
+    assert.equal(runs[0].blockScalar, false);
+  });
+
+  it('emits an if-context event (parent === "if") for if: expression', () => {
+    const yml = [
+      'on: pull_request_target',
+      'jobs:',
+      '  j:',
+      '    if: ${{ startsWith(github.head_ref, "release/") }}',
+      '    runs-on: ubuntu-latest',
+    ].join('\n');
+    const { events } = parseWorkflow(yml);
+    const ifs = events.filter(e => e.parent === 'if');
+    assert.ok(ifs.length >= 1);
+    assert.ok(ifs[0].expr.startsWith('startsWith'));
+  });
+});
+
+describe('parseWorkflow — block scalars', () => {
+  it('tracks `run: |` body lines as run-context with blockScalar=true', () => {
+    const yml = [
+      'on: pull_request_target',
+      'jobs:',
+      '  j:',
+      '    steps:',
+      '      - name: multi',
+      '        run: |',
+      '          echo "Issue title:"',
+      '          echo "${{ github.event.issue.body }}"',
+      '          echo done',
+    ].join('\n');
+    const { events } = parseWorkflow(yml);
+    const runs = events.filter(e => e.parent === 'run');
+    assert.equal(runs.length, 1);
+    assert.equal(runs[0].expr, 'github.event.issue.body');
+    assert.equal(runs[0].blockScalar, true);
+    assert.equal(runs[0].line, 8);
+  });
+
+  it('tracks `run: >` (folded scalar) the same way', () => {
+    const yml = [
+      'on: pull_request',
+      'jobs:',
+      '  j:',
+      '    steps:',
+      '      - name: folded',
+      '        run: >',
+      '          echo ${{ github.event.pull_request.title }}',
+    ].join('\n');
+    const { events } = parseWorkflow(yml);
+    assert.ok(events.find(e => e.parent === 'run' && e.blockScalar));
+  });
+});
+
+describe('parseWorkflow — sink-mismatch contexts', () => {
+  it('parent === "env" for top-level env: mapping with ${{ ... }}', () => {
+    const yml = [
+      'on: pull_request_target',
+      'env:',
+      '  PR_TITLE: ${{ github.event.pull_request.title }}',
+      'jobs:',
+      '  j:',
+      '    runs-on: ubuntu-latest',
+    ].join('\n');
+    const { events } = parseWorkflow(yml);
+    const envEvts = events.filter(e => e.parent === 'PR_TITLE');
+    assert.equal(envEvts.length, 1);
+    assert.ok(envEvts[0].parentChain.includes('env'));
+  });
+
+  it('parent === "with" for action input', () => {
+    const yml = [
+      'on: pull_request',
+      'jobs:',
+      '  j:',
+      '    steps:',
+      '      - uses: actions/checkout@v4',
+      '        with:',
+      '          ref: ${{ github.head_ref }}',
+    ].join('\n');
+    const { events } = parseWorkflow(yml);
+    const withEvts = events.filter(e => e.parent === 'ref');
+    assert.equal(withEvts.length, 1);
+    assert.ok(withEvts[0].parentChain.includes('with'));
+  });
+});
+
+describe('parseWorkflow — no-op cases', () => {
+  it('returns empty events for workflow with no expressions', () => {
+    const yml = [
+      'on: push',
+      'jobs:',
+      '  j:',
+      '    runs-on: ubuntu-latest',
+      '    steps:',
+      '      - run: echo hello',
+    ].join('\n');
+    const { events } = parseWorkflow(yml);
+    assert.equal(events.length, 0);
+  });
+
+  it('strips comments before parsing', () => {
+    const yml = [
+      'on: push',
+      '# comment ${{ github.head_ref }} should be ignored',
+      'jobs:',
+      '  j:',
+      '    runs-on: ubuntu-latest',
+    ].join('\n');
+    const { events } = parseWorkflow(yml);
+    assert.equal(events.length, 0);
+  });
+
+  it('handles multiple ${{ ... }} on a single line', () => {
+    const yml = [
+      'on: pull_request_target',
+      'jobs:',
+      '  j:',
+      '    steps:',
+      '      - run: echo "${{ github.head_ref }} and ${{ github.event.pull_request.title }}"',
+    ].join('\n');
+    const { events } = parseWorkflow(yml);
+    const runs = events.filter(e => e.parent === 'run');
+    assert.equal(runs.length, 2);
+  });
+});
+
+describe('parseWorkflow — line-number accuracy', () => {
+  it('reports correct line for inline run:', () => {
+    const yml = [
+      'name: x',
+      'on: push',
+      '',
+      'jobs:',
+      '  j:',
+      '    steps:',
+      '      - run: echo "${{ github.head_ref }}"',
+    ].join('\n');
+    const { events } = parseWorkflow(yml);
+    assert.equal(events[0].line, 7);
+  });
+});
--- a/plugins/llm-security/tests/scanners/workflow-scanner.test.mjs
+++ b/plugins/llm-security/tests/scanners/workflow-scanner.test.mjs
@ -0,0 +1,148 @@
+// workflow-scanner.test.mjs — E11 integration tests against fixtures
+// in tests/fixtures/workflows/.{github,forgejo}/workflows/.
+
+import { describe, it, before } from 'node:test';
+import assert from 'node:assert/strict';
+import { resolve } from 'node:path';
+import { resetCounter } from '../../scanners/lib/output.mjs';
+
+const { scan, discoverWorkflows } = await import('../../scanners/workflow-scanner.mjs');
+
+const FIXTURE_DIR = resolve(import.meta.dirname, '../fixtures/workflows');
+
+function findingsByFile(findings, fileSubstr) {
+  return findings.filter(f => (f.file || '').includes(fileSubstr));
+}
+
+describe('workflow-scanner — discoverWorkflows', () => {
+  it('finds .yml files in .github/workflows/ and .forgejo/workflows/', async () => {
+    const files = await discoverWorkflows(FIXTURE_DIR);
+    const githubCount = files.filter(f => f.includes('/.github/workflows/')).length;
+    const forgejoCount = files.filter(f => f.includes('/.forgejo/workflows/')).length;
+    assert.ok(githubCount >= 5, `expected ≥5 GitHub fixtures, got ${githubCount}`);
+    assert.ok(forgejoCount >= 2, `expected ≥2 Forgejo fixtures, got ${forgejoCount}`);
+  });
+
+  it('returns empty array for path with no workflow dirs', async () => {
+    const files = await discoverWorkflows('/tmp');
+    assert.deepEqual(files, []);
+  });
+});
+
+describe('workflow-scanner — true-positive cases', () => {
+  let result;
+  before(async () => {
+    resetCounter();
+    result = await scan(FIXTURE_DIR);
+  });
+
+  it('flags github.head_ref under pull_request_target as HIGH', () => {
+    const fs = findingsByFile(result.findings, 'tp-prtarget-head-ref.yml');
+    assert.equal(fs.length, 1);
+    assert.equal(fs[0].severity, 'high');
+    assert.match(fs[0].evidence, /github\.head_ref/);
+  });
+
+  it('flags discussion.title under discussion as HIGH (gluestack CVE pattern)', () => {
+    const fs = findingsByFile(result.findings, 'tp-discussion-title.yml');
+    assert.equal(fs.length, 1);
+    assert.equal(fs[0].severity, 'high');
+    assert.match(fs[0].evidence, /discussion\.title/);
+  });
+
+  it('flags comment.body under pull_request as MEDIUM', () => {
+    const fs = findingsByFile(result.findings, 'tp-pull-request-comment.yml');
+    assert.equal(fs.length, 1);
+    assert.equal(fs[0].severity, 'medium');
+    assert.match(fs[0].evidence, /comment\.body/);
+  });
+
+  it('flags issue.body inside `run: |` block-scalar as HIGH', () => {
+    const fs = findingsByFile(result.findings, 'tp-block-scalar-run.yml');
+    assert.equal(fs.length, 1);
+    assert.equal(fs[0].severity, 'high');
+    assert.match(fs[0].evidence, /issue\.body/);
+  });
+});
+
+describe('workflow-scanner — false-positive suppression', () => {
+  let result;
+  before(async () => {
+    resetCounter();
+    result = await scan(FIXTURE_DIR);
+  });
+
+  it('does NOT flag head_ref inside `if:` (sink mismatch)', () => {
+    const fs = findingsByFile(result.findings, 'fp-if-context.yml');
+    assert.equal(fs.length, 0, `expected no findings, got: ${JSON.stringify(fs)}`);
+  });
+
+  it('does NOT flag pull_request.title inside top-level `env:` mapping', () => {
+    const fs = findingsByFile(result.findings, 'fp-env-block.yml');
+    assert.equal(fs.length, 0);
+  });
+});
+
+describe('workflow-scanner — INFO classification', () => {
+  let result;
+  before(async () => {
+    resetCounter();
+    result = await scan(FIXTURE_DIR);
+  });
+
+  it('reports github.event.pull_request.number as INFO (numeric/safe)', () => {
+    const fs = findingsByFile(result.findings, 'fp-numeric-field.yml');
+    assert.equal(fs.length, 1);
+    assert.equal(fs[0].severity, 'info');
+  });
+});
+
+describe('workflow-scanner — Forgejo platform', () => {
+  let result;
+  before(async () => {
+    resetCounter();
+    result = await scan(FIXTURE_DIR);
+  });
+
+  it('flags forgejo.head_ref under pull_request as MEDIUM', () => {
+    const fs = findingsByFile(result.findings, 'forgejo-tp.yml');
+    assert.equal(fs.length, 1);
+    assert.equal(fs[0].severity, 'medium');
+    assert.match(fs[0].file, /\.forgejo\/workflows\//);
+    assert.match(fs[0].recommendation, /Forgejo/);
+  });
+
+  it('treats workflow_run as privileged on Forgejo (HIGH severity preserved)', () => {
+    const fs = findingsByFile(result.findings, 'forgejo-workflow-run.yml');
+    assert.equal(fs.length, 1);
+    assert.equal(fs[0].severity, 'high');
+  });
+});
+
+describe('workflow-scanner — output envelope', () => {
+  it('returns scannerResult with status=ok and counts', async () => {
+    resetCounter();
+    const r = await scan(FIXTURE_DIR);
+    assert.equal(r.status, 'ok');
+    assert.equal(r.scanner, 'workflow');
+    assert.ok(r.files_scanned >= 7);
+    assert.ok(typeof r.duration_ms === 'number');
+    assert.ok(r.counts.high + r.counts.medium + r.counts.info >= 7);
+  });
+
+  it('emits findings with WFL scanner prefix in id', async () => {
+    resetCounter();
+    const r = await scan(FIXTURE_DIR);
+    for (const f of r.findings) {
+      assert.match(f.id, /^DS-WFL-\d{3}$/);
+      assert.equal(f.scanner, 'WFL');
+    }
+  });
+
+  it('returns ok with no findings on empty target', async () => {
+    resetCounter();
+    const r = await scan('/tmp');
+    assert.equal(r.status, 'ok');
+    assert.equal(r.findings.length, 0);
+  });
+});