feat(workflow-scanner): E11 part 1 — core file-walk + 23-field blacklist + sink-restriction

Adds a deterministic GitHub Actions / Forgejo Actions injection
scanner. Detects \${{ <dangerous-field> }} interpolations inside
\`run:\` step blocks under privileged or semi-privileged triggers.
Sink-restricted: \`if:\` / \`with:\` / \`env:\` (block-level) are
evaluated by the runner expression engine, not the shell, so they
are NOT injection sinks and are suppressed at parser level.

Why: workflow expression injection is the most prevalent SAST class
on GitHub (CodeQL preview: 800K+ findings across 158K repos). The
graduated severity matrix (HIGH for pull_request_target / discussion
/ workflow_run; MEDIUM for pull_request / workflow_dispatch) is the
community-converged calibration target — uniform HIGH causes alert
fatigue.

Components:
- scanners/lib/workflow-yaml-state.mjs — line-based YAML state
  machine. Tracks indentation, parent-context stack, and
  \`run: |\` / \`run: >\` block-scalar entry/exit. Zero deps.
- scanners/workflow-scanner.mjs — discoverWorkflows() probes
  .github/workflows/ and .forgejo/workflows/ directly (file-discovery
  has no glob include). 23-field blacklist (GHSL 17 + 6 GlueStack-
  class additions). Platform encoded via file path; no schema
  extension to finding(). Forgejo-specific: workflow_run advisory
  emitted to stderr; recommendation text mentions Forgejo's
  server-level token scoping (job-level permissions: is ignored).
- knowledge/workflow-injection-patterns.md — 23-field blacklist,
  trigger taxonomy, severity matrix, Forgejo divergences, NVD CVE
  corpus.

Tests (47 new):
- tests/lib/workflow-yaml-state.test.mjs (15): trigger forms
  (string / inline-list / block-list / block-mapping), single-line
  run, block-scalar | and > tracking, env/with sink-mismatch,
  multi-line, comment stripping, line-number accuracy.
- tests/scanners/workflow-scanner.test.mjs (14): TP head_ref
  pull_request_target, TP discussion.title gluestack pattern,
  TP comment.body pull_request, TP issue.body block-scalar,
  FP if-context, FP env-block, INFO numeric, Forgejo TP, Forgejo
  workflow_run advisory, envelope shape, WFL prefix.
- 9 fixtures in tests/fixtures/workflows/{.github,.forgejo}/workflows/.

Out of scope (B4 / Batch D):
- Re-interpolation detection (env.VAR after env: from blacklisted source)
- github.actor authorization-bypass category
- WFL prefix in severity.mjs OWASP maps + scan-orchestrator
  registration (B4)
- Composite-action input tracing, GITHUB_ENV poisoning (Batch D)

Test count: 1685 → 1732 (+47). Pre-compact-scan flake unchanged
(passes in isolation).
This commit is contained in:
Kjell Tore Guttormsen 2026-04-30 15:48:48 +02:00
commit c31d4b1718
14 changed files with 1167 additions and 0 deletions

View file

@ -0,0 +1,11 @@
name: forgejo head_ref echo (TP — Forgejo + pull_request)
on:
pull_request:
branches: [main]
jobs:
echo-ref:
runs-on: ubuntu-latest
steps:
- name: Echo head ref
run: echo "Forgejo head_ref ${{ forgejo.head_ref }}"

View file

@ -0,0 +1,12 @@
name: forgejo workflow_run divergence (advisory)
on:
workflow_run:
workflows: ["build"]
types: [completed]
jobs:
echo:
runs-on: ubuntu-latest
steps:
- name: Echo title
run: echo "Title was ${{ forgejo.event.pull_request.title }}"

View file

@ -0,0 +1,14 @@
name: env block-level mapping (FP — bind, not exec)
on:
pull_request_target:
branches: [main]
env:
PR_TITLE: ${{ github.event.pull_request.title }}
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Use env safely
run: echo "$PR_TITLE"

View file

@ -0,0 +1,12 @@
name: if-context evaluation (FP — engine, not shell)
on:
pull_request_target:
branches: [main]
jobs:
conditional:
runs-on: ubuntu-latest
if: ${{ startsWith(github.head_ref, 'release/') }}
steps:
- name: Run only on release branches
run: echo "release branch detected"

View file

@ -0,0 +1,11 @@
name: numeric-field run: (FP/INFO — character-set guarantees no shell metas)
on:
pull_request:
branches: [main]
jobs:
log-pr-number:
runs-on: ubuntu-latest
steps:
- name: Echo PR number
run: echo "PR ${{ github.event.pull_request.number }}"

View file

@ -0,0 +1,14 @@
name: multi-line run scalar (TP — block-scalar tracking)
on:
pull_request_target:
branches: [main]
jobs:
log:
runs-on: ubuntu-latest
steps:
- name: Multi-line script
run: |
echo "Issue title:"
echo "${{ github.event.issue.body }}"
echo "----"

View file

@ -0,0 +1,11 @@
name: discussion welcome (TP — gluestack CVE-2025-53104 pattern)
on:
discussion:
types: [created]
jobs:
greet:
runs-on: ubuntu-latest
steps:
- name: Welcome
run: echo "New discussion: ${{ github.event.discussion.title }}"

View file

@ -0,0 +1,11 @@
name: pwn-request demo (TP)
on:
pull_request_target:
branches: [main]
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Echo head ref
run: echo "Building branch ${{ github.head_ref }}"

View file

@ -0,0 +1,11 @@
name: comment echo (TP — pull_request, MEDIUM)
on:
pull_request:
types: [opened, edited]
jobs:
echo-comment:
runs-on: ubuntu-latest
steps:
- name: Echo body
run: echo "Comment said ${{ github.event.comment.body }}"

View file

@ -0,0 +1,193 @@
// workflow-yaml-state.test.mjs — unit tests for E11 line-based state machine.
import { describe, it } from 'node:test';
import assert from 'node:assert/strict';
const { parseWorkflow, extractTriggers } = await import('../../scanners/lib/workflow-yaml-state.mjs');
describe('extractTriggers', () => {
it('handles `on: push` (string form)', () => {
const t = extractTriggers(['on: push'.split('\n')[0]]);
assert.deepEqual([...t], ['push']);
});
it('handles `on: [push, pull_request]` (inline list)', () => {
const t = extractTriggers(['on: [push, pull_request_target]']);
assert.deepEqual([...t].sort(), ['pull_request_target', 'push']);
});
it('handles block list', () => {
const text = ['on:', ' - push', ' - pull_request'];
const t = extractTriggers(text);
assert.deepEqual([...t].sort(), ['pull_request', 'push']);
});
it('handles block mapping', () => {
const text = ['on:', ' pull_request_target:', ' branches: [main]', ' discussion:', 'jobs:'];
const t = extractTriggers(text);
assert.ok(t.has('pull_request_target'));
assert.ok(t.has('discussion'));
});
it('returns empty set when no `on:` block found', () => {
const t = extractTriggers(['name: hello', 'jobs:', ' build:', ' runs-on: ubuntu-latest']);
assert.equal(t.size, 0);
});
});
describe('parseWorkflow — single-line run:', () => {
it('emits a run-context event for ${{ ... }} in inline run:', () => {
const yml = [
'on: pull_request_target',
'jobs:',
' j:',
' steps:',
' - name: echo',
' run: echo "${{ github.head_ref }}"',
].join('\n');
const { events } = parseWorkflow(yml);
const runs = events.filter(e => e.parent === 'run');
assert.equal(runs.length, 1);
assert.equal(runs[0].expr, 'github.head_ref');
assert.equal(runs[0].blockScalar, false);
});
it('emits an if-context event (parent === "if") for if: expression', () => {
const yml = [
'on: pull_request_target',
'jobs:',
' j:',
' if: ${{ startsWith(github.head_ref, "release/") }}',
' runs-on: ubuntu-latest',
].join('\n');
const { events } = parseWorkflow(yml);
const ifs = events.filter(e => e.parent === 'if');
assert.ok(ifs.length >= 1);
assert.ok(ifs[0].expr.startsWith('startsWith'));
});
});
describe('parseWorkflow — block scalars', () => {
it('tracks `run: |` body lines as run-context with blockScalar=true', () => {
const yml = [
'on: pull_request_target',
'jobs:',
' j:',
' steps:',
' - name: multi',
' run: |',
' echo "Issue title:"',
' echo "${{ github.event.issue.body }}"',
' echo done',
].join('\n');
const { events } = parseWorkflow(yml);
const runs = events.filter(e => e.parent === 'run');
assert.equal(runs.length, 1);
assert.equal(runs[0].expr, 'github.event.issue.body');
assert.equal(runs[0].blockScalar, true);
assert.equal(runs[0].line, 8);
});
it('tracks `run: >` (folded scalar) the same way', () => {
const yml = [
'on: pull_request',
'jobs:',
' j:',
' steps:',
' - name: folded',
' run: >',
' echo ${{ github.event.pull_request.title }}',
].join('\n');
const { events } = parseWorkflow(yml);
assert.ok(events.find(e => e.parent === 'run' && e.blockScalar));
});
});
describe('parseWorkflow — sink-mismatch contexts', () => {
it('parent === "env" for top-level env: mapping with ${{ ... }}', () => {
const yml = [
'on: pull_request_target',
'env:',
' PR_TITLE: ${{ github.event.pull_request.title }}',
'jobs:',
' j:',
' runs-on: ubuntu-latest',
].join('\n');
const { events } = parseWorkflow(yml);
const envEvts = events.filter(e => e.parent === 'PR_TITLE');
assert.equal(envEvts.length, 1);
assert.ok(envEvts[0].parentChain.includes('env'));
});
it('parent === "with" for action input', () => {
const yml = [
'on: pull_request',
'jobs:',
' j:',
' steps:',
' - uses: actions/checkout@v4',
' with:',
' ref: ${{ github.head_ref }}',
].join('\n');
const { events } = parseWorkflow(yml);
const withEvts = events.filter(e => e.parent === 'ref');
assert.equal(withEvts.length, 1);
assert.ok(withEvts[0].parentChain.includes('with'));
});
});
describe('parseWorkflow — no-op cases', () => {
it('returns empty events for workflow with no expressions', () => {
const yml = [
'on: push',
'jobs:',
' j:',
' runs-on: ubuntu-latest',
' steps:',
' - run: echo hello',
].join('\n');
const { events } = parseWorkflow(yml);
assert.equal(events.length, 0);
});
it('strips comments before parsing', () => {
const yml = [
'on: push',
'# comment ${{ github.head_ref }} should be ignored',
'jobs:',
' j:',
' runs-on: ubuntu-latest',
].join('\n');
const { events } = parseWorkflow(yml);
assert.equal(events.length, 0);
});
it('handles multiple ${{ ... }} on a single line', () => {
const yml = [
'on: pull_request_target',
'jobs:',
' j:',
' steps:',
' - run: echo "${{ github.head_ref }} and ${{ github.event.pull_request.title }}"',
].join('\n');
const { events } = parseWorkflow(yml);
const runs = events.filter(e => e.parent === 'run');
assert.equal(runs.length, 2);
});
});
describe('parseWorkflow — line-number accuracy', () => {
it('reports correct line for inline run:', () => {
const yml = [
'name: x',
'on: push',
'',
'jobs:',
' j:',
' steps:',
' - run: echo "${{ github.head_ref }}"',
].join('\n');
const { events } = parseWorkflow(yml);
assert.equal(events[0].line, 7);
});
});

View file

@ -0,0 +1,148 @@
// workflow-scanner.test.mjs — E11 integration tests against fixtures
// in tests/fixtures/workflows/.{github,forgejo}/workflows/.
import { describe, it, before } from 'node:test';
import assert from 'node:assert/strict';
import { resolve } from 'node:path';
import { resetCounter } from '../../scanners/lib/output.mjs';
const { scan, discoverWorkflows } = await import('../../scanners/workflow-scanner.mjs');
const FIXTURE_DIR = resolve(import.meta.dirname, '../fixtures/workflows');
function findingsByFile(findings, fileSubstr) {
return findings.filter(f => (f.file || '').includes(fileSubstr));
}
describe('workflow-scanner — discoverWorkflows', () => {
it('finds .yml files in .github/workflows/ and .forgejo/workflows/', async () => {
const files = await discoverWorkflows(FIXTURE_DIR);
const githubCount = files.filter(f => f.includes('/.github/workflows/')).length;
const forgejoCount = files.filter(f => f.includes('/.forgejo/workflows/')).length;
assert.ok(githubCount >= 5, `expected ≥5 GitHub fixtures, got ${githubCount}`);
assert.ok(forgejoCount >= 2, `expected ≥2 Forgejo fixtures, got ${forgejoCount}`);
});
it('returns empty array for path with no workflow dirs', async () => {
const files = await discoverWorkflows('/tmp');
assert.deepEqual(files, []);
});
});
describe('workflow-scanner — true-positive cases', () => {
let result;
before(async () => {
resetCounter();
result = await scan(FIXTURE_DIR);
});
it('flags github.head_ref under pull_request_target as HIGH', () => {
const fs = findingsByFile(result.findings, 'tp-prtarget-head-ref.yml');
assert.equal(fs.length, 1);
assert.equal(fs[0].severity, 'high');
assert.match(fs[0].evidence, /github\.head_ref/);
});
it('flags discussion.title under discussion as HIGH (gluestack CVE pattern)', () => {
const fs = findingsByFile(result.findings, 'tp-discussion-title.yml');
assert.equal(fs.length, 1);
assert.equal(fs[0].severity, 'high');
assert.match(fs[0].evidence, /discussion\.title/);
});
it('flags comment.body under pull_request as MEDIUM', () => {
const fs = findingsByFile(result.findings, 'tp-pull-request-comment.yml');
assert.equal(fs.length, 1);
assert.equal(fs[0].severity, 'medium');
assert.match(fs[0].evidence, /comment\.body/);
});
it('flags issue.body inside `run: |` block-scalar as HIGH', () => {
const fs = findingsByFile(result.findings, 'tp-block-scalar-run.yml');
assert.equal(fs.length, 1);
assert.equal(fs[0].severity, 'high');
assert.match(fs[0].evidence, /issue\.body/);
});
});
describe('workflow-scanner — false-positive suppression', () => {
let result;
before(async () => {
resetCounter();
result = await scan(FIXTURE_DIR);
});
it('does NOT flag head_ref inside `if:` (sink mismatch)', () => {
const fs = findingsByFile(result.findings, 'fp-if-context.yml');
assert.equal(fs.length, 0, `expected no findings, got: ${JSON.stringify(fs)}`);
});
it('does NOT flag pull_request.title inside top-level `env:` mapping', () => {
const fs = findingsByFile(result.findings, 'fp-env-block.yml');
assert.equal(fs.length, 0);
});
});
describe('workflow-scanner — INFO classification', () => {
let result;
before(async () => {
resetCounter();
result = await scan(FIXTURE_DIR);
});
it('reports github.event.pull_request.number as INFO (numeric/safe)', () => {
const fs = findingsByFile(result.findings, 'fp-numeric-field.yml');
assert.equal(fs.length, 1);
assert.equal(fs[0].severity, 'info');
});
});
describe('workflow-scanner — Forgejo platform', () => {
let result;
before(async () => {
resetCounter();
result = await scan(FIXTURE_DIR);
});
it('flags forgejo.head_ref under pull_request as MEDIUM', () => {
const fs = findingsByFile(result.findings, 'forgejo-tp.yml');
assert.equal(fs.length, 1);
assert.equal(fs[0].severity, 'medium');
assert.match(fs[0].file, /\.forgejo\/workflows\//);
assert.match(fs[0].recommendation, /Forgejo/);
});
it('treats workflow_run as privileged on Forgejo (HIGH severity preserved)', () => {
const fs = findingsByFile(result.findings, 'forgejo-workflow-run.yml');
assert.equal(fs.length, 1);
assert.equal(fs[0].severity, 'high');
});
});
describe('workflow-scanner — output envelope', () => {
it('returns scannerResult with status=ok and counts', async () => {
resetCounter();
const r = await scan(FIXTURE_DIR);
assert.equal(r.status, 'ok');
assert.equal(r.scanner, 'workflow');
assert.ok(r.files_scanned >= 7);
assert.ok(typeof r.duration_ms === 'number');
assert.ok(r.counts.high + r.counts.medium + r.counts.info >= 7);
});
it('emits findings with WFL scanner prefix in id', async () => {
resetCounter();
const r = await scan(FIXTURE_DIR);
for (const f of r.findings) {
assert.match(f.id, /^DS-WFL-\d{3}$/);
assert.equal(f.scanner, 'WFL');
}
});
it('returns ok with no findings on empty target', async () => {
resetCounter();
const r = await scan('/tmp');
assert.equal(r.status, 'ok');
assert.equal(r.findings.length, 0);
});
});