From e2c892407483dbc9bdb503e29bfc4500f5c777f0 Mon Sep 17 00:00:00 2001 From: Kjell Tore Guttormsen Date: Fri, 10 Apr 2026 12:49:10 +0200 Subject: [PATCH] feat(knowledge): add MITRE ATLAS IDs to OWASP files + Norwegian regulatory context --- .../knowledge/deepmind-agent-traps.md | 12 ++ .../knowledge/mcp-threat-patterns.md | 18 +++ .../knowledge/mitigation-matrix.md | 20 +-- .../knowledge/norwegian-context.md | 139 ++++++++++++++++++ .../knowledge/owasp-agentic-top10.md | 20 +-- .../llm-security/knowledge/owasp-llm-top10.md | 20 +++ .../knowledge/owasp-skills-top10.md | 20 +-- .../tests/scanners/knowledge-atlas.test.mjs | 82 +++++++++++ 8 files changed, 301 insertions(+), 30 deletions(-) create mode 100644 plugins/llm-security/knowledge/norwegian-context.md create mode 100644 plugins/llm-security/tests/scanners/knowledge-atlas.test.mjs diff --git a/plugins/llm-security/knowledge/deepmind-agent-traps.md b/plugins/llm-security/knowledge/deepmind-agent-traps.md index 0702510..340551b 100644 --- a/plugins/llm-security/knowledge/deepmind-agent-traps.md +++ b/plugins/llm-security/knowledge/deepmind-agent-traps.md @@ -10,6 +10,8 @@ Full taxonomy of AI agent traps from Google DeepMind's "AI Agent Traps" paper (A ## Category 1: Content Injection +**MITRE ATLAS:** AML.T0051 (LLM Prompt Injection), AML.T0043 (Craft Adversarial Data) + Attacks that embed malicious instructions in content the agent reads or processes. ### 1a. Steganography @@ -41,6 +43,8 @@ Instructions hidden in structural elements of content formats. ## Category 2: Semantic Manipulation +**MITRE ATLAS:** AML.T0051 (LLM Prompt Injection) + Attacks that manipulate the agent's reasoning without direct injection. ### 2a. Oversight Evasion @@ -72,6 +76,8 @@ Subtle reframing that overrides rules without explicit override language. ## Category 3: Context Manipulation +**MITRE ATLAS:** AML.T0058 (AI Agent Context Poisoning), AML.T0020 (Poison Training Data) + Attacks that poison the agent's memory or persistent state. | Technique | Description | Plugin Coverage | @@ -89,6 +95,8 @@ Attacks that poison the agent's memory or persistent state. ## Category 4: Multi-Agent Exploitation +**MITRE ATLAS:** AML.T0062 (Exfiltration via AI Agent Tool Invocation), AML.T0061 (AI Agent Tools) + Attacks that exploit trust relationships between agents in multi-agent systems. | Technique | Description | Plugin Coverage | @@ -103,6 +111,8 @@ Attacks that exploit trust relationships between agents in multi-agent systems. ## Category 5: Capability Manipulation +**MITRE ATLAS:** AML.T0061 (AI Agent Tools), AML.T0010 (ML Supply Chain Compromise) + Attacks that cause the agent to misuse its own tools or escalate privileges. | Technique | Description | Plugin Coverage | @@ -117,6 +127,8 @@ Attacks that cause the agent to misuse its own tools or escalate privileges. ## Category 6: Human-in-the-Loop Exploitation +**MITRE ATLAS:** AML.T0043 (Craft Adversarial Data) + Attacks that exploit the human review step in agent workflows. | Technique | Description | Plugin Coverage | diff --git a/plugins/llm-security/knowledge/mcp-threat-patterns.md b/plugins/llm-security/knowledge/mcp-threat-patterns.md index a12d417..6e3f474 100644 --- a/plugins/llm-security/knowledge/mcp-threat-patterns.md +++ b/plugins/llm-security/knowledge/mcp-threat-patterns.md @@ -12,6 +12,8 @@ MCP08 Lack of Audit · MCP09 Shadow MCP Servers · MCP10 Context Over-Sharing ## 1. Tool Poisoning +**MITRE ATLAS:** AML.T0043 (Craft Adversarial Data), AML.T0051 (LLM Prompt Injection) + ### Description Malicious instructions embedded in tool `description`, `name`, or parameter `description` fields that @@ -61,6 +63,8 @@ MCP03:2025 Tool Poisoning · LLM02:2025 Sensitive Information Disclosure · OWAS ## 2. Path Traversal +**MITRE ATLAS:** AML.T0037 (Data from Local System) + ### Description MCP file-system tools that accept path parameters without canonicalization allow reading or writing @@ -121,6 +125,8 @@ MCP05:2025 Command Injection & Execution · CWE-22 Path Traversal · OWASP A01 B ## 3. Rug Pull Attacks +**MITRE ATLAS:** AML.T0010 (ML Supply Chain Compromise) + ### Description A malicious MCP server first presents a benign tool description to gain user trust and approval, @@ -179,6 +185,8 @@ MCP03:2025 Tool Poisoning · MCP09:2025 Shadow MCP Servers · LLM07:2025 System ## 4. Data Exfiltration via Tool Descriptions +**MITRE ATLAS:** AML.T0024 (Exfiltration via ML Inference API), AML.T0062 (Exfiltration via AI Agent Tool Invocation) + ### Description Tool descriptions instruct the LLM to collect sensitive data from the conversation context, @@ -242,6 +250,8 @@ OWASP A02 Cryptographic Failures (data in transit unprotected) ## 5. Cross-Server Attacks +**MITRE ATLAS:** AML.T0061 (AI Agent Tools) + ### Description When multiple MCP servers are connected to the same Claude instance, a malicious server can @@ -305,6 +315,8 @@ MCP07:2025 Insufficient Authentication & Authorization ## 6. Dependency Vulnerabilities +**MITRE ATLAS:** AML.T0010 (ML Supply Chain Compromise) + ### Description MCP servers are npm or pip packages with their own dependency trees. Malicious actors target @@ -368,6 +380,8 @@ CWE-494 Download of Code Without Integrity Check ## 7. Network Exposure +**MITRE ATLAS:** AML.T0025 (Exfiltration via Cyber Means) + ### Description MCP servers that use HTTP/SSE transport (rather than stdio) create network attack surfaces. @@ -434,6 +448,8 @@ OWASP A05 Security Misconfiguration · CWE-918 SSRF ## 8. Credential Harvesting +**MITRE ATLAS:** AML.T0035 (ML Artifact Collection) + ### Description MCP servers can access environment variables passed by the host application, configuration files @@ -514,6 +530,8 @@ CWE-312 Cleartext Storage of Sensitive Information · CWE-732 Incorrect Permissi ## 9. Shadow Escape (Operant AI, October 2025) +**MITRE ATLAS:** AML.T0011 (User Execution) + ### Description Shadow Escape is a zero-click agentic attack class disclosed by Operant AI in October 2025 diff --git a/plugins/llm-security/knowledge/mitigation-matrix.md b/plugins/llm-security/knowledge/mitigation-matrix.md index 7f65da0..753d934 100644 --- a/plugins/llm-security/knowledge/mitigation-matrix.md +++ b/plugins/llm-security/knowledge/mitigation-matrix.md @@ -17,7 +17,7 @@ Used by `posture-assessor-agent` to evaluate which controls are in place and whi ## Matrix -### LLM01 — Prompt Injection +### LLM01 — Prompt Injection (MITRE ATLAS: AML.T0051) Attacker injects instructions via external content (files, web pages, tool outputs) that override intended behavior. @@ -39,7 +39,7 @@ Attacker injects instructions via external content (files, web pages, tool outpu --- -### LLM02 — Sensitive Information Disclosure +### LLM02 — Sensitive Information Disclosure (MITRE ATLAS: AML.T0024) Model reveals sensitive data from training, context, or external sources in its outputs. @@ -54,7 +54,7 @@ Model reveals sensitive data from training, context, or external sources in its --- -### LLM03 — Supply Chain Vulnerabilities +### LLM03 — Supply Chain Vulnerabilities (MITRE ATLAS: AML.T0010) Compromised models, plugins, or MCP servers introduce malicious behavior. @@ -68,7 +68,7 @@ Compromised models, plugins, or MCP servers introduce malicious behavior. --- -### LLM04 — Data and Model Poisoning +### LLM04 — Data and Model Poisoning (MITRE ATLAS: AML.T0020) Malicious training data or fine-tuning corrupts model behavior. @@ -82,7 +82,7 @@ Malicious training data or fine-tuning corrupts model behavior. --- -### LLM05 — Improper Output Handling +### LLM05 — Improper Output Handling (MITRE ATLAS: AML.T0043) Model output treated as trusted without sanitization, leading to injection in downstream systems. @@ -96,7 +96,7 @@ Model output treated as trusted without sanitization, leading to injection in do --- -### LLM06 — Excessive Agency +### LLM06 — Excessive Agency (MITRE ATLAS: AML.T0061) Model granted too many permissions or capabilities, enabling unintended high-impact actions. @@ -111,7 +111,7 @@ Model granted too many permissions or capabilities, enabling unintended high-imp --- -### LLM07 — System Prompt Leakage +### LLM07 — System Prompt Leakage (MITRE ATLAS: AML.T0024) System prompt or CLAUDE.md exposed through adversarial extraction, revealing security controls. @@ -125,7 +125,7 @@ System prompt or CLAUDE.md exposed through adversarial extraction, revealing sec --- -### LLM08 — Vector and Embedding Weaknesses +### LLM08 — Vector and Embedding Weaknesses (MITRE ATLAS: AML.T0020) Manipulated embeddings or vector store content used to inject malicious context into RAG pipelines. @@ -139,7 +139,7 @@ Manipulated embeddings or vector store content used to inject malicious context --- -### LLM09 — Misinformation +### LLM09 — Misinformation (MITRE ATLAS: AML.T0031) Model generates plausible but false information, leading to incorrect decisions. @@ -154,7 +154,7 @@ Model generates plausible but false information, leading to incorrect decisions. --- -### LLM10 — Unbounded Consumption +### LLM10 — Unbounded Consumption (MITRE ATLAS: AML.T0029) Model or agents consume excessive compute, tokens, or API calls, causing denial of service or cost overruns. diff --git a/plugins/llm-security/knowledge/norwegian-context.md b/plugins/llm-security/knowledge/norwegian-context.md new file mode 100644 index 0000000..01e1846 --- /dev/null +++ b/plugins/llm-security/knowledge/norwegian-context.md @@ -0,0 +1,139 @@ +# Norwegian Regulatory Context for AI Security + +Reference material for compliance-aware scanning and CAISS presentations. Maps Norwegian regulatory +bodies, frameworks, and guidance to plugin capabilities. + +**Last verified:** 2026-04-10 + +--- + +## Datatilsynet (Norwegian Data Protection Authority) + +### AI Regulatory Sandbox + +Datatilsynet operates a regulatory sandbox for AI since 2020, now in its fifth round (2025). +Focus: GDPR compliance in AI systems, generative AI projects. + +**Relevance to plugin:** +- Sandbox projects produce public reports with privacy-by-design requirements +- Plugin's posture scanner evaluates credential protection and secrets management — directly relevant + to GDPR data protection obligations +- Plugin's audit trail capability (v6.0) provides the record-keeping evidence sandbox evaluations require + +**Participation criteria:** AI-based project, specific privacy question, Norwegian-based organization, +societal purpose beyond the developer. + +**Source:** https://www.datatilsynet.no/en/regulations-and-tools/sandbox-for-artificial-intelligence/ + +### GDPR + AI Act Intersection + +The AI Act supplements GDPR — both apply simultaneously to AI systems processing personal data. +Datatilsynet is designated as market surveillance authority for certain AI uses (e.g., law enforcement). + +--- + +## NSM (Nasjonal Sikkerhetsmyndighet) + +### Grunnprinsipper for IKT-sikkerhet (ICT Security Principles) + +NSM's ICT security principles (v2.1) provide a comprehensive framework for securing information systems. +Applicable to all public and private organizations. Four main principle areas: +1. Identify and map +2. Protect and maintain +3. Detect +4. Respond and recover + +**Relevance to plugin:** +- **Identify and map:** Plugin's posture scanner identifies AI-specific security gaps; AI-BOM generator + maps AI components (models, MCP servers, plugins, knowledge bases) +- **Protect and maintain:** 8 runtime hooks provide automated protection; policy-as-code enables + distributable security configuration +- **Detect:** Prompt injection scanning, trifecta detection, behavioral drift monitoring, supply chain + checks — all contribute to NSM's detect principle +- **Respond and recover:** Clean command provides remediation; baseline diff tracks security drift over time + +**Source:** https://nsm.no/regelverk-og-hjelp/rad-og-anbefalinger/grunnprinsipper-for-ikt-sikkerhet/ + +### AI-Specific Guidance + +NSM has not yet published dedicated AI security guidelines (as of April 2026). The ICT security +principles are technology-neutral and apply to AI systems through their general security requirements. +NSM's annual threat assessment (Risiko) covers emerging technology threats including AI. + +--- + +## Digdir (Digitaliseringsdirektoratet) + +### AI Guidance for Public Sector + +Digdir provides guidance on responsible development and use of AI in public sector: +- Principles: transparency, explainability, accountability, human oversight, privacy, equal treatment +- Aligned with EU AI Act requirements +- Government target: 80% of public entities adopt AI by 2026 + +**Relevance to plugin:** +- **Transparency:** Posture reports, scan results, and AI-BOM provide transparency tooling +- **Human oversight:** Human Review Requirements category (posture scanner ID 7) directly measures + human oversight controls; Rule of Two enforces human-in-the-loop for dangerous patterns +- **Accountability:** Audit trail provides event-level accountability; SARIF output enables CI/CD + integration for automated compliance checking + +**Source:** https://www.digdir.no/kunstig-intelligens/veiledning-ki-i-offentlig-sektor/4132 +**Source:** https://www.digdir.no/kunstig-intelligens/rad-ansvarlig-utvikling-og-bruk-av-kunstig-intelligens-i-offentlig-sektor/4272 + +### KI Norge (AI Norway) + +Expanded expert environment within Digdir. Serves as driving force, advisory service, and connector +between AI players in public sector, industry, research, and academia. Will host a national +regulatory sandbox for controlled testing under the AI Act. + +--- + +## Norwegian AI Act Implementation + +### Timeline + +- **June 2025:** Ministry of Digitalisation published draft Artificial Intelligence Act +- **September 2025:** Public consultation deadline +- **August 2026 (expected):** Norwegian AI Act enters into force + +### Supervisory Structure + +- **Nkom (Nasjonal kommunikasjonsmyndighet):** National coordinating market surveillance authority, + EU contact point +- **Sectoral authorities:** Domain-specific market surveillance for high-risk AI +- **Datatilsynet:** Market surveillance for certain uses (law enforcement) +- **Digdir/KI Norge:** Guidance, capacity building, regulatory sandbox + +**Source:** https://regulations.ai/regulations/norway-ai-act-2026 +**Source:** https://www.regjeringen.no/en/whats-new/gjor-norge-klar-for-trygg-og-innovativ-ki-bruk/id3093081/ + +--- + +## Plugin Capability Mapping to Norwegian Requirements + +| Norwegian Requirement | Regulatory Source | Plugin Capability | Coverage | +|----------------------|-------------------|-------------------|----------| +| Risk management for AI systems | AI Act Art. 9, NSM grunnprinsipper | Posture scanner (13+3 categories), threat-model command | Partial | +| Data protection in AI | GDPR, Datatilsynet sandbox | Secrets protection hooks, path guarding, credential scanning | Full | +| Transparency and explainability | Digdir principles, AI Act Art. 13 | Scan reports, posture reports, AI-BOM | Partial | +| Human oversight | Digdir principles, AI Act Art. 14 | Human Review Requirements (PST-07), Rule of Two, deny-first config | Full | +| Cybersecurity | AI Act Art. 15, NSM grunnprinsipper | All 8 hooks, 10 scanners, prompt injection hardening | Full | +| Record-keeping | AI Act Art. 12, NSM detect principle | Audit trail (JSONL), session logging, baseline diffs | Full (v6.0) | +| Quality management | AI Act Art. 17 | Test suite (1147+ tests), posture scanner, scan-orchestrator | Partial | +| Supply chain integrity | AI Act Art. 15, NSM identify principle | Supply chain hooks, dep audit scanner, AI-BOM | Full | +| Incident response | NSM respond principle | Clean command, baseline diff, watch/cron monitoring | Partial | + +--- + +## Verification Log + +| Claim | Source | URL | +|-------|--------|-----| +| Datatilsynet sandbox since 2020, fifth round 2025 | Datatilsynet website | https://www.datatilsynet.no/en/regulations-and-tools/sandbox-for-artificial-intelligence/ | +| NSM Grunnprinsipper v2.1 | NSM website | https://nsm.no/regelverk-og-hjelp/rad-og-anbefalinger/grunnprinsipper-for-ikt-sikkerhet/ | +| Digdir AI guidance for public sector | Digdir website | https://www.digdir.no/kunstig-intelligens/veiledning-ki-i-offentlig-sektor/4132 | +| 80% public sector AI adoption target by 2026 | Shifter (citing government plan) | https://www.shifter.no/nyheter/regjeringen-80-prosent-av-offentlige-virksomheter-skal-bruke-ai/443164 | +| Norwegian AI Act draft June 2025, expected August 2026 | Regulations.AI | https://regulations.ai/regulations/norway-ai-act-2026 | +| Nkom as coordinating authority | Government press release | https://www.regjeringen.no/en/whats-new/gjor-norge-klar-for-trygg-og-innovativ-ki-bruk/id3093081/ | +| NSM has no dedicated AI security guidelines (April 2026) | NSM website review — no AI-specific publication found | https://nsm.no/ | diff --git a/plugins/llm-security/knowledge/owasp-agentic-top10.md b/plugins/llm-security/knowledge/owasp-agentic-top10.md index 945cf47..5ad164f 100644 --- a/plugins/llm-security/knowledge/owasp-agentic-top10.md +++ b/plugins/llm-security/knowledge/owasp-agentic-top10.md @@ -12,7 +12,7 @@ human supervision. Claude Code is an agentic system and maps directly to these r ## ASI01 — Agent Goal Hijack -**Category:** Goal and instruction integrity +**Category:** Goal and instruction integrity | **MITRE ATLAS:** AML.T0051 (LLM Prompt Injection), AML.T0058 (AI Agent Context Poisoning) ### Description Attackers alter agent objectives by embedding hidden instructions in external content that the agent @@ -54,7 +54,7 @@ Real incident: EchoLeak — copilots turned into silent exfiltration engines via ## ASI02 — Tool Misuse and Exploitation -**Category:** Tool integrity and authorization +**Category:** Tool integrity and authorization | **MITRE ATLAS:** AML.T0061 (AI Agent Tools) ### Description Agents misuse legitimate tools due to ambiguous prompts, manipulated input, or over-provisioned @@ -97,7 +97,7 @@ Real incident: Amazon Q and GitHub Actions compromised via repository content tr ## ASI03 — Identity and Privilege Abuse -**Category:** Identity, credentials, and delegation +**Category:** Identity, credentials, and delegation | **MITRE ATLAS:** AML.T0012 (Valid Accounts) ### Description Agents often inherit user or system identities including high-privilege credentials, session tokens, @@ -140,7 +140,7 @@ exercise. ## ASI04 — Agentic Supply Chain Vulnerabilities -**Category:** Component integrity and provenance +**Category:** Component integrity and provenance | **MITRE ATLAS:** AML.T0010 (ML Supply Chain Compromise) ### Description Tools, plugins, prompt templates, MCP servers, and agent definitions fetched or loaded dynamically @@ -183,7 +183,7 @@ Real incident: Malicious MCP servers impersonating legitimate ones, altering too ## ASI05 — Unexpected Code Execution -**Category:** Code generation and execution safety +**Category:** Code generation and execution safety | **MITRE ATLAS:** AML.T0011 (User Execution) ### Description Agents generate or execute code unsafely through shell commands, eval-like constructs, script @@ -225,7 +225,7 @@ Coding agents like Claude Code are high-risk because code generation and executi ## ASI06 — Memory and Context Poisoning -**Category:** State integrity and persistence +**Category:** State integrity and persistence | **MITRE ATLAS:** AML.T0058 (AI Agent Context Poisoning), AML.T0020 (Poison Training Data) ### Description Agents rely on memory systems, embeddings, RAG databases, context windows, and summaries to maintain @@ -266,7 +266,7 @@ Unlike one-shot injection, memory poisoning executes on every future session wit ## ASI07 — Insecure Inter-Agent Communication -**Category:** Multi-agent protocol integrity +**Category:** Multi-agent protocol integrity | **MITRE ATLAS:** AML.T0062 (Exfiltration via AI Agent Tool Invocation) ### Description In multi-agent architectures, agents coordinate through message passing over MCP, RPC, shared files, @@ -309,7 +309,7 @@ downstream agents through compromised peers. ## ASI08 — Cascading Failures -**Category:** System resilience and blast radius +**Category:** System resilience and blast radius | **MITRE ATLAS:** AML.T0029 (Denial of ML Service) ### Description In interconnected multi-agent architectures, a single compromised or hallucinating agent can @@ -353,7 +353,7 @@ execute without verification, multiplying the blast radius. ## ASI09 — Human-Agent Trust Exploitation -**Category:** Human oversight and social engineering +**Category:** Human oversight and social engineering | **MITRE ATLAS:** AML.T0043 (Craft Adversarial Data) ### Description Users and operators over-trust agent recommendations due to their confident, authoritative @@ -400,7 +400,7 @@ copilots approving fraudulent transactions; support agents soliciting credential ## ASI10 — Rogue Agents -**Category:** Agent alignment and containment +**Category:** Agent alignment and containment | **MITRE ATLAS:** AML.T0018 (Backdoor ML Model) ### Description Rogue agents are compromised or misaligned agents that act harmfully while appearing legitimate. diff --git a/plugins/llm-security/knowledge/owasp-llm-top10.md b/plugins/llm-security/knowledge/owasp-llm-top10.md index eaaff0e..9e60cbd 100644 --- a/plugins/llm-security/knowledge/owasp-llm-top10.md +++ b/plugins/llm-security/knowledge/owasp-llm-top10.md @@ -10,6 +10,8 @@ Source: https://genai.owasp.org/llm-top-10/ — OWASP GenAI Security Project v20 ## LLM01 — Prompt Injection +**MITRE ATLAS:** AML.T0051 (LLM Prompt Injection) + **Risk:** Attackers manipulate LLM behavior by crafting inputs that override system instructions, bypass guardrails, or cause the model to execute unintended actions. @@ -63,6 +65,8 @@ instructions, bypass guardrails, or cause the model to execute unintended action ## LLM02 — Sensitive Information Disclosure +**MITRE ATLAS:** AML.T0024 (Exfiltration via ML Inference API) + **Risk:** LLMs unintentionally expose private, proprietary, or credential data through outputs, memorized training content, or cross-session leakage. @@ -113,6 +117,8 @@ outputs, memorized training content, or cross-session leakage. ## LLM03 — Supply Chain Vulnerabilities +**MITRE ATLAS:** AML.T0010 (ML Supply Chain Compromise) + **Risk:** Compromised third-party models, datasets, plugins, MCP servers, or dependencies introduce backdoors, malicious behavior, or known vulnerabilities. @@ -161,6 +167,8 @@ dependencies introduce backdoors, malicious behavior, or known vulnerabilities. ## LLM04 — Data and Model Poisoning +**MITRE ATLAS:** AML.T0020 (Poison Training Data), AML.T0018 (Backdoor ML Model) + **Risk:** Malicious or accidental contamination of training data, fine-tuning datasets, RAG knowledge bases, or embeddings degrades model behavior or introduces backdoors. @@ -208,6 +216,8 @@ RAG knowledge bases, or embeddings degrades model behavior or introduces backdoo ## LLM05 — Improper Output Handling +**MITRE ATLAS:** AML.T0043 (Craft Adversarial Data) + **Risk:** LLM-generated output is passed to downstream systems without adequate validation or sanitization, enabling injection attacks, privilege escalation, or unintended side effects. @@ -262,6 +272,8 @@ unintended side effects. ## LLM06 — Excessive Agency +**MITRE ATLAS:** AML.T0061 (AI Agent Tools) + **Risk:** LLMs granted excessive functionality, permissions, or autonomy take unintended high-impact actions with real-world consequences. @@ -317,6 +329,8 @@ unintended high-impact actions with real-world consequences. ## LLM07 — System Prompt Leakage +**MITRE ATLAS:** AML.T0024 (Exfiltration via ML Inference API) + **Risk:** Internal system prompts containing sensitive instructions, credentials, or behavioral guardrails are exposed to users or attackers, enabling bypass or credential theft. @@ -368,6 +382,8 @@ credential theft. ## LLM08 — Vector and Embedding Weaknesses +**MITRE ATLAS:** AML.T0020 (Poison Training Data), AML.T0019 (Publish Poisoned Datasets) + **Risk:** Vulnerabilities in how embeddings are generated, stored, or retrieved allow unauthorized data access, information leakage, or manipulation of RAG-based agent behavior. @@ -421,6 +437,8 @@ behavior. ## LLM09 — Misinformation +**MITRE ATLAS:** AML.T0031 (Erode ML Model Integrity) + **Risk:** LLMs generate plausible but factually incorrect outputs (hallucinations) that are acted upon without verification, leading to incorrect decisions, security bypasses, or dependency on non-existent resources. @@ -475,6 +493,8 @@ or dependency on non-existent resources. ## LLM10 — Unbounded Consumption +**MITRE ATLAS:** AML.T0029 (Denial of ML Service), AML.T0034 (Cost Harvesting) + **Risk:** Uncontrolled resource usage by LLM applications enables denial of service, financial exploitation via excessive API costs, or unauthorized model capability extraction through systematic querying. diff --git a/plugins/llm-security/knowledge/owasp-skills-top10.md b/plugins/llm-security/knowledge/owasp-skills-top10.md index defc0c6..0338f94 100644 --- a/plugins/llm-security/knowledge/owasp-skills-top10.md +++ b/plugins/llm-security/knowledge/owasp-skills-top10.md @@ -13,7 +13,7 @@ skill-scanner-agent threat model, and cross-mapped to OWASP LLM Top 10 and Agent ## AST01 — Prompt Injection via Skill Content -**Category:** Instruction integrity | **Maps to:** LLM01, ASI01 | **Severity:** CRITICAL in frontmatter; HIGH in body +**Category:** Instruction integrity | **Maps to:** LLM01, ASI01 | **Severity:** CRITICAL in frontmatter; HIGH in body | **MITRE ATLAS:** AML.T0051 (LLM Prompt Injection) Instructions embedded in skill/command/agent files that override model operating rules. Frontmatter `name`/`description` fields load directly into the system prompt — injections here bypass all hooks. @@ -32,7 +32,7 @@ in any frontmatter field; spoofed headers or identity phrases anywhere in skill ## AST02 — Data Exfiltration from Skills -**Category:** Data protection | **Maps to:** LLM02, ASI02 | **Severity:** CRITICAL (credential+network); HIGH (file reads alone) +**Category:** Data protection | **Maps to:** LLM02, ASI02 | **Severity:** CRITICAL (credential+network); HIGH (file reads alone) | **MITRE ATLAS:** AML.T0024 (Exfiltration via ML Inference API), AML.T0062 (Exfiltration via AI Agent Tool Invocation) Skills instructing the agent to read sensitive local files and transmit their contents externally. ToxicSkills found 17.7% of scanned skills fetch from or post to untrusted URLs. @@ -52,7 +52,7 @@ reads to `~/.ssh/`, `~/.env`, `~/.aws/credentials`, `~/.npmrc`; `| base64` on en ## AST03 — Privilege Escalation via Skill Tools -**Category:** Authorization | **Maps to:** LLM06, ASI03 | **Severity:** CRITICAL (hook/settings writes); HIGH (unjustified Bash) +**Category:** Authorization | **Maps to:** LLM06, ASI03 | **Severity:** CRITICAL (hook/settings writes); HIGH (unjustified Bash) | **MITRE ATLAS:** AML.T0012 (Valid Accounts) Skills requesting tool permissions beyond their stated function, or instructing the agent to modify the plugin/hook infrastructure. Excess tools expand blast radius and enable chained attacks. @@ -72,7 +72,7 @@ in non-execution skills as HIGH. `pre-write-pathguard.mjs` blocks writes to hook ## AST04 — Scope Creep and Credential Access -**Category:** Credential protection | **Maps to:** LLM02, LLM06, ASI03 | **Severity:** CRITICAL (wallet/SSH/cloud); HIGH (dev tokens) +**Category:** Credential protection | **Maps to:** LLM02, LLM06, ASI03 | **Severity:** CRITICAL (wallet/SSH/cloud); HIGH (dev tokens) | **MITRE ATLAS:** AML.T0035 (ML Artifact Collection) Skills that exceed their documented purpose by reading sensitive credential files. The "rug-pull" attack: skill gains adoption legitimately, then an update introduces harvesting framed as diagnostics. @@ -94,7 +94,7 @@ is an escalating severity signal. Update `pre-bash-destructive.mjs` pattern list ## AST05 — Hidden Instructions in Skills -**Category:** Instruction integrity | **Maps to:** LLM01, ASI01 | **Severity:** CRITICAL for any confirmed instance +**Category:** Instruction integrity | **Maps to:** LLM01, ASI01 | **Severity:** CRITICAL for any confirmed instance | **MITRE ATLAS:** AML.T0051 (LLM Prompt Injection) Malicious content concealed from human review but interpreted by LLMs. Unicode steganography, base64-encoded payloads, and HTML comment injection are documented ClawHavoc techniques. Effective @@ -117,7 +117,7 @@ high density of U+200B-U+200D in plain-English files; base64 strings >40 chars a ## AST06 — Toolchain Manipulation via Skills -**Category:** Supply chain | **Maps to:** LLM03, ASI04 | **Severity:** CRITICAL (registry redirection); HIGH (package install) +**Category:** Supply chain | **Maps to:** LLM03, ASI04 | **Severity:** CRITICAL (registry redirection); HIGH (package install) | **MITRE ATLAS:** AML.T0010 (ML Supply Chain Compromise) Skills that modify the dependency graph or package manager configuration to introduce malicious packages. Registry redirection poisons all subsequent installs, not just the immediate one. @@ -138,7 +138,7 @@ any package a skill recommends installing. Flag any registry URL change as CRITI ## AST07 — Persistence Mechanisms via Skills -**Category:** System integrity | **Maps to:** LLM01, LLM03, ASI10 | **Severity:** CRITICAL for all variants +**Category:** System integrity | **Maps to:** LLM01, LLM03, ASI10 | **Severity:** CRITICAL for all variants | **MITRE ATLAS:** AML.T0018 (Backdoor ML Model) Skills that attempt to survive session termination via system startup modification, scheduled tasks, or hook registration. AMOS (ClawHavoc) used macOS LaunchAgents; Claude Code hooks are an additional @@ -161,7 +161,7 @@ persistence commands. `pre-write-pathguard.mjs` blocks plugin/hook path writes. ## AST08 — Skill Description Mismatch -**Category:** Trust boundary | **Maps to:** LLM06, ASI09 | **Severity:** HIGH; CRITICAL if mismatch enables privilege escalation +**Category:** Trust boundary | **Maps to:** LLM06, ASI09 | **Severity:** HIGH; CRITICAL if mismatch enables privilege escalation | **MITRE ATLAS:** AML.T0043 (Craft Adversarial Data) Frontmatter description claims read-only or safe analysis, but `allowed-tools`/`tools` grant write/execution capabilities. Users approve installation based on stated description, not actual @@ -183,7 +183,7 @@ security agents. Re-scan all frontmatter after plugin updates — description dr ## AST09 — Over-Privileged Knowledge Access -**Category:** Data trust | **Maps to:** LLM04, ASI06 | **Severity:** HIGH (bulk loads); MEDIUM (missing attribution) +**Category:** Data trust | **Maps to:** LLM04, ASI06 | **Severity:** HIGH (bulk loads); MEDIUM (missing attribution) | **MITRE ATLAS:** AML.T0035 (ML Artifact Collection), AML.T0036 (Data from Information Repositories) Knowledge files treated as trusted instructions rather than reference data. Skills loading entire `knowledge/` directories without selection violate the context budget rule (max 3 files per @@ -207,7 +207,7 @@ with explicit data framing before passing to subagents. ## AST10 — Uncontrolled Skill Execution -**Category:** Resource control | **Maps to:** LLM10, ASI08 | **Severity:** HIGH; CRITICAL if combined with AST01 trigger +**Category:** Resource control | **Maps to:** LLM10, ASI08 | **Severity:** HIGH; CRITICAL if combined with AST01 trigger | **MITRE ATLAS:** AML.T0011 (User Execution) Skills or commands without iteration limits, file count caps, or circuit breakers in loop contexts. Enables Denial of Wallet attacks and runaway autonomous pipelines. Especially dangerous in harness diff --git a/plugins/llm-security/tests/scanners/knowledge-atlas.test.mjs b/plugins/llm-security/tests/scanners/knowledge-atlas.test.mjs new file mode 100644 index 0000000..ecb925a --- /dev/null +++ b/plugins/llm-security/tests/scanners/knowledge-atlas.test.mjs @@ -0,0 +1,82 @@ +// knowledge-atlas.test.mjs — Tests for MITRE ATLAS IDs in knowledge files + Norwegian context +// Verifies: each knowledge file contains AML.T references; norwegian-context.md exists and has content + +import { describe, it } from 'node:test'; +import assert from 'node:assert/strict'; +import { readFileSync } from 'node:fs'; +import { resolve } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const __dirname = fileURLToPath(new URL('.', import.meta.url)); +const ROOT = resolve(__dirname, '../..'); +const KNOWLEDGE = resolve(ROOT, 'knowledge'); + +function readKnowledge(filename) { + try { return readFileSync(resolve(KNOWLEDGE, filename), 'utf-8'); } + catch { return null; } +} + +// --------------------------------------------------------------------------- +// ATLAS IDs in OWASP and threat pattern files +// --------------------------------------------------------------------------- + +const atlasFiles = [ + { file: 'owasp-llm-top10.md', minRefs: 10 }, + { file: 'owasp-agentic-top10.md', minRefs: 10 }, + { file: 'owasp-skills-top10.md', minRefs: 10 }, + { file: 'mcp-threat-patterns.md', minRefs: 5 }, + { file: 'deepmind-agent-traps.md', minRefs: 5 }, + { file: 'mitigation-matrix.md', minRefs: 5 }, +]; + +describe('MITRE ATLAS IDs in knowledge files', () => { + for (const { file, minRefs } of atlasFiles) { + describe(file, () => { + const content = readKnowledge(file); + + it('file exists', () => { + assert.ok(content !== null, `${file} should exist`); + }); + + it(`contains at least ${minRefs} AML.T references`, () => { + const matches = content.match(/AML\.T\d{4}/g) || []; + assert.ok( + matches.length >= minRefs, + `Expected >= ${minRefs} AML.T refs, found ${matches.length}` + ); + }); + + it('contains AML.T0051 (LLM Prompt Injection)', () => { + assert.ok(content.includes('AML.T0051'), `${file} should reference AML.T0051`); + }); + }); + } +}); + +// --------------------------------------------------------------------------- +// Norwegian regulatory context +// --------------------------------------------------------------------------- + +describe('knowledge/norwegian-context.md', () => { + const content = readKnowledge('norwegian-context.md'); + + it('file exists', () => { + assert.ok(content !== null, 'norwegian-context.md should exist'); + }); + + it('references Datatilsynet', () => { + assert.ok(content.includes('Datatilsynet'), 'Should reference Datatilsynet'); + }); + + it('references NSM', () => { + assert.ok(content.includes('NSM'), 'Should reference NSM'); + }); + + it('references Digdir', () => { + assert.ok(content.includes('Digdir'), 'Should reference Digdir'); + }); + + it('contains verification URLs', () => { + assert.ok(/https?:\/\//.test(content), 'Should contain verification URLs'); + }); +});