ktg-plugin-marketplace/plugins/llm-security/tests/lib/workflow-yaml-state.test.mjs
Kjell Tore Guttormsen c31d4b1718 feat(workflow-scanner): E11 part 1 — core file-walk + 23-field blacklist + sink-restriction
Adds a deterministic GitHub Actions / Forgejo Actions injection
scanner. Detects \${{ <dangerous-field> }} interpolations inside
\`run:\` step blocks under privileged or semi-privileged triggers.
Sink-restricted: \`if:\` / \`with:\` / \`env:\` (block-level) are
evaluated by the runner expression engine, not the shell, so they
are NOT injection sinks and are suppressed at parser level.

Why: workflow expression injection is the most prevalent SAST class
on GitHub (CodeQL preview: 800K+ findings across 158K repos). The
graduated severity matrix (HIGH for pull_request_target / discussion
/ workflow_run; MEDIUM for pull_request / workflow_dispatch) is the
community-converged calibration target — uniform HIGH causes alert
fatigue.

Components:
- scanners/lib/workflow-yaml-state.mjs — line-based YAML state
  machine. Tracks indentation, parent-context stack, and
  \`run: |\` / \`run: >\` block-scalar entry/exit. Zero deps.
- scanners/workflow-scanner.mjs — discoverWorkflows() probes
  .github/workflows/ and .forgejo/workflows/ directly (file-discovery
  has no glob include). 23-field blacklist (GHSL 17 + 6 GlueStack-
  class additions). Platform encoded via file path; no schema
  extension to finding(). Forgejo-specific: workflow_run advisory
  emitted to stderr; recommendation text mentions Forgejo's
  server-level token scoping (job-level permissions: is ignored).
- knowledge/workflow-injection-patterns.md — 23-field blacklist,
  trigger taxonomy, severity matrix, Forgejo divergences, NVD CVE
  corpus.

Tests (47 new):
- tests/lib/workflow-yaml-state.test.mjs (15): trigger forms
  (string / inline-list / block-list / block-mapping), single-line
  run, block-scalar | and > tracking, env/with sink-mismatch,
  multi-line, comment stripping, line-number accuracy.
- tests/scanners/workflow-scanner.test.mjs (14): TP head_ref
  pull_request_target, TP discussion.title gluestack pattern,
  TP comment.body pull_request, TP issue.body block-scalar,
  FP if-context, FP env-block, INFO numeric, Forgejo TP, Forgejo
  workflow_run advisory, envelope shape, WFL prefix.
- 9 fixtures in tests/fixtures/workflows/{.github,.forgejo}/workflows/.

Out of scope (B4 / Batch D):
- Re-interpolation detection (env.VAR after env: from blacklisted source)
- github.actor authorization-bypass category
- WFL prefix in severity.mjs OWASP maps + scan-orchestrator
  registration (B4)
- Composite-action input tracing, GITHUB_ENV poisoning (Batch D)

Test count: 1685 → 1732 (+47). Pre-compact-scan flake unchanged
(passes in isolation).
2026-04-30 15:48:48 +02:00

193 lines
5.9 KiB
JavaScript

// workflow-yaml-state.test.mjs — unit tests for E11 line-based state machine.
import { describe, it } from 'node:test';
import assert from 'node:assert/strict';
const { parseWorkflow, extractTriggers } = await import('../../scanners/lib/workflow-yaml-state.mjs');
describe('extractTriggers', () => {
it('handles `on: push` (string form)', () => {
const t = extractTriggers(['on: push'.split('\n')[0]]);
assert.deepEqual([...t], ['push']);
});
it('handles `on: [push, pull_request]` (inline list)', () => {
const t = extractTriggers(['on: [push, pull_request_target]']);
assert.deepEqual([...t].sort(), ['pull_request_target', 'push']);
});
it('handles block list', () => {
const text = ['on:', ' - push', ' - pull_request'];
const t = extractTriggers(text);
assert.deepEqual([...t].sort(), ['pull_request', 'push']);
});
it('handles block mapping', () => {
const text = ['on:', ' pull_request_target:', ' branches: [main]', ' discussion:', 'jobs:'];
const t = extractTriggers(text);
assert.ok(t.has('pull_request_target'));
assert.ok(t.has('discussion'));
});
it('returns empty set when no `on:` block found', () => {
const t = extractTriggers(['name: hello', 'jobs:', ' build:', ' runs-on: ubuntu-latest']);
assert.equal(t.size, 0);
});
});
describe('parseWorkflow — single-line run:', () => {
it('emits a run-context event for ${{ ... }} in inline run:', () => {
const yml = [
'on: pull_request_target',
'jobs:',
' j:',
' steps:',
' - name: echo',
' run: echo "${{ github.head_ref }}"',
].join('\n');
const { events } = parseWorkflow(yml);
const runs = events.filter(e => e.parent === 'run');
assert.equal(runs.length, 1);
assert.equal(runs[0].expr, 'github.head_ref');
assert.equal(runs[0].blockScalar, false);
});
it('emits an if-context event (parent === "if") for if: expression', () => {
const yml = [
'on: pull_request_target',
'jobs:',
' j:',
' if: ${{ startsWith(github.head_ref, "release/") }}',
' runs-on: ubuntu-latest',
].join('\n');
const { events } = parseWorkflow(yml);
const ifs = events.filter(e => e.parent === 'if');
assert.ok(ifs.length >= 1);
assert.ok(ifs[0].expr.startsWith('startsWith'));
});
});
describe('parseWorkflow — block scalars', () => {
it('tracks `run: |` body lines as run-context with blockScalar=true', () => {
const yml = [
'on: pull_request_target',
'jobs:',
' j:',
' steps:',
' - name: multi',
' run: |',
' echo "Issue title:"',
' echo "${{ github.event.issue.body }}"',
' echo done',
].join('\n');
const { events } = parseWorkflow(yml);
const runs = events.filter(e => e.parent === 'run');
assert.equal(runs.length, 1);
assert.equal(runs[0].expr, 'github.event.issue.body');
assert.equal(runs[0].blockScalar, true);
assert.equal(runs[0].line, 8);
});
it('tracks `run: >` (folded scalar) the same way', () => {
const yml = [
'on: pull_request',
'jobs:',
' j:',
' steps:',
' - name: folded',
' run: >',
' echo ${{ github.event.pull_request.title }}',
].join('\n');
const { events } = parseWorkflow(yml);
assert.ok(events.find(e => e.parent === 'run' && e.blockScalar));
});
});
describe('parseWorkflow — sink-mismatch contexts', () => {
it('parent === "env" for top-level env: mapping with ${{ ... }}', () => {
const yml = [
'on: pull_request_target',
'env:',
' PR_TITLE: ${{ github.event.pull_request.title }}',
'jobs:',
' j:',
' runs-on: ubuntu-latest',
].join('\n');
const { events } = parseWorkflow(yml);
const envEvts = events.filter(e => e.parent === 'PR_TITLE');
assert.equal(envEvts.length, 1);
assert.ok(envEvts[0].parentChain.includes('env'));
});
it('parent === "with" for action input', () => {
const yml = [
'on: pull_request',
'jobs:',
' j:',
' steps:',
' - uses: actions/checkout@v4',
' with:',
' ref: ${{ github.head_ref }}',
].join('\n');
const { events } = parseWorkflow(yml);
const withEvts = events.filter(e => e.parent === 'ref');
assert.equal(withEvts.length, 1);
assert.ok(withEvts[0].parentChain.includes('with'));
});
});
describe('parseWorkflow — no-op cases', () => {
it('returns empty events for workflow with no expressions', () => {
const yml = [
'on: push',
'jobs:',
' j:',
' runs-on: ubuntu-latest',
' steps:',
' - run: echo hello',
].join('\n');
const { events } = parseWorkflow(yml);
assert.equal(events.length, 0);
});
it('strips comments before parsing', () => {
const yml = [
'on: push',
'# comment ${{ github.head_ref }} should be ignored',
'jobs:',
' j:',
' runs-on: ubuntu-latest',
].join('\n');
const { events } = parseWorkflow(yml);
assert.equal(events.length, 0);
});
it('handles multiple ${{ ... }} on a single line', () => {
const yml = [
'on: pull_request_target',
'jobs:',
' j:',
' steps:',
' - run: echo "${{ github.head_ref }} and ${{ github.event.pull_request.title }}"',
].join('\n');
const { events } = parseWorkflow(yml);
const runs = events.filter(e => e.parent === 'run');
assert.equal(runs.length, 2);
});
});
describe('parseWorkflow — line-number accuracy', () => {
it('reports correct line for inline run:', () => {
const yml = [
'name: x',
'on: push',
'',
'jobs:',
' j:',
' steps:',
' - run: echo "${{ github.head_ref }}"',
].join('\n');
const { events } = parseWorkflow(yml);
assert.equal(events[0].line, 7);
});
});