feat(knowledge): add MITRE ATLAS IDs to OWASP files + Norwegian regulatory context
This commit is contained in:
parent
5bb9d5bd11
commit
e2c8924074
8 changed files with 301 additions and 30 deletions
|
|
@ -10,6 +10,8 @@ Full taxonomy of AI agent traps from Google DeepMind's "AI Agent Traps" paper (A
|
|||
|
||||
## Category 1: Content Injection
|
||||
|
||||
**MITRE ATLAS:** AML.T0051 (LLM Prompt Injection), AML.T0043 (Craft Adversarial Data)
|
||||
|
||||
Attacks that embed malicious instructions in content the agent reads or processes.
|
||||
|
||||
### 1a. Steganography
|
||||
|
|
@ -41,6 +43,8 @@ Instructions hidden in structural elements of content formats.
|
|||
|
||||
## Category 2: Semantic Manipulation
|
||||
|
||||
**MITRE ATLAS:** AML.T0051 (LLM Prompt Injection)
|
||||
|
||||
Attacks that manipulate the agent's reasoning without direct injection.
|
||||
|
||||
### 2a. Oversight Evasion
|
||||
|
|
@ -72,6 +76,8 @@ Subtle reframing that overrides rules without explicit override language.
|
|||
|
||||
## Category 3: Context Manipulation
|
||||
|
||||
**MITRE ATLAS:** AML.T0058 (AI Agent Context Poisoning), AML.T0020 (Poison Training Data)
|
||||
|
||||
Attacks that poison the agent's memory or persistent state.
|
||||
|
||||
| Technique | Description | Plugin Coverage |
|
||||
|
|
@ -89,6 +95,8 @@ Attacks that poison the agent's memory or persistent state.
|
|||
|
||||
## Category 4: Multi-Agent Exploitation
|
||||
|
||||
**MITRE ATLAS:** AML.T0062 (Exfiltration via AI Agent Tool Invocation), AML.T0061 (AI Agent Tools)
|
||||
|
||||
Attacks that exploit trust relationships between agents in multi-agent systems.
|
||||
|
||||
| Technique | Description | Plugin Coverage |
|
||||
|
|
@ -103,6 +111,8 @@ Attacks that exploit trust relationships between agents in multi-agent systems.
|
|||
|
||||
## Category 5: Capability Manipulation
|
||||
|
||||
**MITRE ATLAS:** AML.T0061 (AI Agent Tools), AML.T0010 (ML Supply Chain Compromise)
|
||||
|
||||
Attacks that cause the agent to misuse its own tools or escalate privileges.
|
||||
|
||||
| Technique | Description | Plugin Coverage |
|
||||
|
|
@ -117,6 +127,8 @@ Attacks that cause the agent to misuse its own tools or escalate privileges.
|
|||
|
||||
## Category 6: Human-in-the-Loop Exploitation
|
||||
|
||||
**MITRE ATLAS:** AML.T0043 (Craft Adversarial Data)
|
||||
|
||||
Attacks that exploit the human review step in agent workflows.
|
||||
|
||||
| Technique | Description | Plugin Coverage |
|
||||
|
|
|
|||
|
|
@ -12,6 +12,8 @@ MCP08 Lack of Audit · MCP09 Shadow MCP Servers · MCP10 Context Over-Sharing
|
|||
|
||||
## 1. Tool Poisoning
|
||||
|
||||
**MITRE ATLAS:** AML.T0043 (Craft Adversarial Data), AML.T0051 (LLM Prompt Injection)
|
||||
|
||||
### Description
|
||||
|
||||
Malicious instructions embedded in tool `description`, `name`, or parameter `description` fields that
|
||||
|
|
@ -61,6 +63,8 @@ MCP03:2025 Tool Poisoning · LLM02:2025 Sensitive Information Disclosure · OWAS
|
|||
|
||||
## 2. Path Traversal
|
||||
|
||||
**MITRE ATLAS:** AML.T0037 (Data from Local System)
|
||||
|
||||
### Description
|
||||
|
||||
MCP file-system tools that accept path parameters without canonicalization allow reading or writing
|
||||
|
|
@ -121,6 +125,8 @@ MCP05:2025 Command Injection & Execution · CWE-22 Path Traversal · OWASP A01 B
|
|||
|
||||
## 3. Rug Pull Attacks
|
||||
|
||||
**MITRE ATLAS:** AML.T0010 (ML Supply Chain Compromise)
|
||||
|
||||
### Description
|
||||
|
||||
A malicious MCP server first presents a benign tool description to gain user trust and approval,
|
||||
|
|
@ -179,6 +185,8 @@ MCP03:2025 Tool Poisoning · MCP09:2025 Shadow MCP Servers · LLM07:2025 System
|
|||
|
||||
## 4. Data Exfiltration via Tool Descriptions
|
||||
|
||||
**MITRE ATLAS:** AML.T0024 (Exfiltration via ML Inference API), AML.T0062 (Exfiltration via AI Agent Tool Invocation)
|
||||
|
||||
### Description
|
||||
|
||||
Tool descriptions instruct the LLM to collect sensitive data from the conversation context,
|
||||
|
|
@ -242,6 +250,8 @@ OWASP A02 Cryptographic Failures (data in transit unprotected)
|
|||
|
||||
## 5. Cross-Server Attacks
|
||||
|
||||
**MITRE ATLAS:** AML.T0061 (AI Agent Tools)
|
||||
|
||||
### Description
|
||||
|
||||
When multiple MCP servers are connected to the same Claude instance, a malicious server can
|
||||
|
|
@ -305,6 +315,8 @@ MCP07:2025 Insufficient Authentication & Authorization
|
|||
|
||||
## 6. Dependency Vulnerabilities
|
||||
|
||||
**MITRE ATLAS:** AML.T0010 (ML Supply Chain Compromise)
|
||||
|
||||
### Description
|
||||
|
||||
MCP servers are npm or pip packages with their own dependency trees. Malicious actors target
|
||||
|
|
@ -368,6 +380,8 @@ CWE-494 Download of Code Without Integrity Check
|
|||
|
||||
## 7. Network Exposure
|
||||
|
||||
**MITRE ATLAS:** AML.T0025 (Exfiltration via Cyber Means)
|
||||
|
||||
### Description
|
||||
|
||||
MCP servers that use HTTP/SSE transport (rather than stdio) create network attack surfaces.
|
||||
|
|
@ -434,6 +448,8 @@ OWASP A05 Security Misconfiguration · CWE-918 SSRF
|
|||
|
||||
## 8. Credential Harvesting
|
||||
|
||||
**MITRE ATLAS:** AML.T0035 (ML Artifact Collection)
|
||||
|
||||
### Description
|
||||
|
||||
MCP servers can access environment variables passed by the host application, configuration files
|
||||
|
|
@ -514,6 +530,8 @@ CWE-312 Cleartext Storage of Sensitive Information · CWE-732 Incorrect Permissi
|
|||
|
||||
## 9. Shadow Escape (Operant AI, October 2025)
|
||||
|
||||
**MITRE ATLAS:** AML.T0011 (User Execution)
|
||||
|
||||
### Description
|
||||
|
||||
Shadow Escape is a zero-click agentic attack class disclosed by Operant AI in October 2025
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ Used by `posture-assessor-agent` to evaluate which controls are in place and whi
|
|||
|
||||
## Matrix
|
||||
|
||||
### LLM01 — Prompt Injection
|
||||
### LLM01 — Prompt Injection (MITRE ATLAS: AML.T0051)
|
||||
|
||||
Attacker injects instructions via external content (files, web pages, tool outputs) that override intended behavior.
|
||||
|
||||
|
|
@ -39,7 +39,7 @@ Attacker injects instructions via external content (files, web pages, tool outpu
|
|||
|
||||
---
|
||||
|
||||
### LLM02 — Sensitive Information Disclosure
|
||||
### LLM02 — Sensitive Information Disclosure (MITRE ATLAS: AML.T0024)
|
||||
|
||||
Model reveals sensitive data from training, context, or external sources in its outputs.
|
||||
|
||||
|
|
@ -54,7 +54,7 @@ Model reveals sensitive data from training, context, or external sources in its
|
|||
|
||||
---
|
||||
|
||||
### LLM03 — Supply Chain Vulnerabilities
|
||||
### LLM03 — Supply Chain Vulnerabilities (MITRE ATLAS: AML.T0010)
|
||||
|
||||
Compromised models, plugins, or MCP servers introduce malicious behavior.
|
||||
|
||||
|
|
@ -68,7 +68,7 @@ Compromised models, plugins, or MCP servers introduce malicious behavior.
|
|||
|
||||
---
|
||||
|
||||
### LLM04 — Data and Model Poisoning
|
||||
### LLM04 — Data and Model Poisoning (MITRE ATLAS: AML.T0020)
|
||||
|
||||
Malicious training data or fine-tuning corrupts model behavior.
|
||||
|
||||
|
|
@ -82,7 +82,7 @@ Malicious training data or fine-tuning corrupts model behavior.
|
|||
|
||||
---
|
||||
|
||||
### LLM05 — Improper Output Handling
|
||||
### LLM05 — Improper Output Handling (MITRE ATLAS: AML.T0043)
|
||||
|
||||
Model output treated as trusted without sanitization, leading to injection in downstream systems.
|
||||
|
||||
|
|
@ -96,7 +96,7 @@ Model output treated as trusted without sanitization, leading to injection in do
|
|||
|
||||
---
|
||||
|
||||
### LLM06 — Excessive Agency
|
||||
### LLM06 — Excessive Agency (MITRE ATLAS: AML.T0061)
|
||||
|
||||
Model granted too many permissions or capabilities, enabling unintended high-impact actions.
|
||||
|
||||
|
|
@ -111,7 +111,7 @@ Model granted too many permissions or capabilities, enabling unintended high-imp
|
|||
|
||||
---
|
||||
|
||||
### LLM07 — System Prompt Leakage
|
||||
### LLM07 — System Prompt Leakage (MITRE ATLAS: AML.T0024)
|
||||
|
||||
System prompt or CLAUDE.md exposed through adversarial extraction, revealing security controls.
|
||||
|
||||
|
|
@ -125,7 +125,7 @@ System prompt or CLAUDE.md exposed through adversarial extraction, revealing sec
|
|||
|
||||
---
|
||||
|
||||
### LLM08 — Vector and Embedding Weaknesses
|
||||
### LLM08 — Vector and Embedding Weaknesses (MITRE ATLAS: AML.T0020)
|
||||
|
||||
Manipulated embeddings or vector store content used to inject malicious context into RAG pipelines.
|
||||
|
||||
|
|
@ -139,7 +139,7 @@ Manipulated embeddings or vector store content used to inject malicious context
|
|||
|
||||
---
|
||||
|
||||
### LLM09 — Misinformation
|
||||
### LLM09 — Misinformation (MITRE ATLAS: AML.T0031)
|
||||
|
||||
Model generates plausible but false information, leading to incorrect decisions.
|
||||
|
||||
|
|
@ -154,7 +154,7 @@ Model generates plausible but false information, leading to incorrect decisions.
|
|||
|
||||
---
|
||||
|
||||
### LLM10 — Unbounded Consumption
|
||||
### LLM10 — Unbounded Consumption (MITRE ATLAS: AML.T0029)
|
||||
|
||||
Model or agents consume excessive compute, tokens, or API calls, causing denial of service or cost overruns.
|
||||
|
||||
|
|
|
|||
139
plugins/llm-security/knowledge/norwegian-context.md
Normal file
139
plugins/llm-security/knowledge/norwegian-context.md
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
# Norwegian Regulatory Context for AI Security
|
||||
|
||||
Reference material for compliance-aware scanning and CAISS presentations. Maps Norwegian regulatory
|
||||
bodies, frameworks, and guidance to plugin capabilities.
|
||||
|
||||
**Last verified:** 2026-04-10
|
||||
|
||||
---
|
||||
|
||||
## Datatilsynet (Norwegian Data Protection Authority)
|
||||
|
||||
### AI Regulatory Sandbox
|
||||
|
||||
Datatilsynet operates a regulatory sandbox for AI since 2020, now in its fifth round (2025).
|
||||
Focus: GDPR compliance in AI systems, generative AI projects.
|
||||
|
||||
**Relevance to plugin:**
|
||||
- Sandbox projects produce public reports with privacy-by-design requirements
|
||||
- Plugin's posture scanner evaluates credential protection and secrets management — directly relevant
|
||||
to GDPR data protection obligations
|
||||
- Plugin's audit trail capability (v6.0) provides the record-keeping evidence sandbox evaluations require
|
||||
|
||||
**Participation criteria:** AI-based project, specific privacy question, Norwegian-based organization,
|
||||
societal purpose beyond the developer.
|
||||
|
||||
**Source:** https://www.datatilsynet.no/en/regulations-and-tools/sandbox-for-artificial-intelligence/
|
||||
|
||||
### GDPR + AI Act Intersection
|
||||
|
||||
The AI Act supplements GDPR — both apply simultaneously to AI systems processing personal data.
|
||||
Datatilsynet is designated as market surveillance authority for certain AI uses (e.g., law enforcement).
|
||||
|
||||
---
|
||||
|
||||
## NSM (Nasjonal Sikkerhetsmyndighet)
|
||||
|
||||
### Grunnprinsipper for IKT-sikkerhet (ICT Security Principles)
|
||||
|
||||
NSM's ICT security principles (v2.1) provide a comprehensive framework for securing information systems.
|
||||
Applicable to all public and private organizations. Four main principle areas:
|
||||
1. Identify and map
|
||||
2. Protect and maintain
|
||||
3. Detect
|
||||
4. Respond and recover
|
||||
|
||||
**Relevance to plugin:**
|
||||
- **Identify and map:** Plugin's posture scanner identifies AI-specific security gaps; AI-BOM generator
|
||||
maps AI components (models, MCP servers, plugins, knowledge bases)
|
||||
- **Protect and maintain:** 8 runtime hooks provide automated protection; policy-as-code enables
|
||||
distributable security configuration
|
||||
- **Detect:** Prompt injection scanning, trifecta detection, behavioral drift monitoring, supply chain
|
||||
checks — all contribute to NSM's detect principle
|
||||
- **Respond and recover:** Clean command provides remediation; baseline diff tracks security drift over time
|
||||
|
||||
**Source:** https://nsm.no/regelverk-og-hjelp/rad-og-anbefalinger/grunnprinsipper-for-ikt-sikkerhet/
|
||||
|
||||
### AI-Specific Guidance
|
||||
|
||||
NSM has not yet published dedicated AI security guidelines (as of April 2026). The ICT security
|
||||
principles are technology-neutral and apply to AI systems through their general security requirements.
|
||||
NSM's annual threat assessment (Risiko) covers emerging technology threats including AI.
|
||||
|
||||
---
|
||||
|
||||
## Digdir (Digitaliseringsdirektoratet)
|
||||
|
||||
### AI Guidance for Public Sector
|
||||
|
||||
Digdir provides guidance on responsible development and use of AI in public sector:
|
||||
- Principles: transparency, explainability, accountability, human oversight, privacy, equal treatment
|
||||
- Aligned with EU AI Act requirements
|
||||
- Government target: 80% of public entities adopt AI by 2026
|
||||
|
||||
**Relevance to plugin:**
|
||||
- **Transparency:** Posture reports, scan results, and AI-BOM provide transparency tooling
|
||||
- **Human oversight:** Human Review Requirements category (posture scanner ID 7) directly measures
|
||||
human oversight controls; Rule of Two enforces human-in-the-loop for dangerous patterns
|
||||
- **Accountability:** Audit trail provides event-level accountability; SARIF output enables CI/CD
|
||||
integration for automated compliance checking
|
||||
|
||||
**Source:** https://www.digdir.no/kunstig-intelligens/veiledning-ki-i-offentlig-sektor/4132
|
||||
**Source:** https://www.digdir.no/kunstig-intelligens/rad-ansvarlig-utvikling-og-bruk-av-kunstig-intelligens-i-offentlig-sektor/4272
|
||||
|
||||
### KI Norge (AI Norway)
|
||||
|
||||
Expanded expert environment within Digdir. Serves as driving force, advisory service, and connector
|
||||
between AI players in public sector, industry, research, and academia. Will host a national
|
||||
regulatory sandbox for controlled testing under the AI Act.
|
||||
|
||||
---
|
||||
|
||||
## Norwegian AI Act Implementation
|
||||
|
||||
### Timeline
|
||||
|
||||
- **June 2025:** Ministry of Digitalisation published draft Artificial Intelligence Act
|
||||
- **September 2025:** Public consultation deadline
|
||||
- **August 2026 (expected):** Norwegian AI Act enters into force
|
||||
|
||||
### Supervisory Structure
|
||||
|
||||
- **Nkom (Nasjonal kommunikasjonsmyndighet):** National coordinating market surveillance authority,
|
||||
EU contact point
|
||||
- **Sectoral authorities:** Domain-specific market surveillance for high-risk AI
|
||||
- **Datatilsynet:** Market surveillance for certain uses (law enforcement)
|
||||
- **Digdir/KI Norge:** Guidance, capacity building, regulatory sandbox
|
||||
|
||||
**Source:** https://regulations.ai/regulations/norway-ai-act-2026
|
||||
**Source:** https://www.regjeringen.no/en/whats-new/gjor-norge-klar-for-trygg-og-innovativ-ki-bruk/id3093081/
|
||||
|
||||
---
|
||||
|
||||
## Plugin Capability Mapping to Norwegian Requirements
|
||||
|
||||
| Norwegian Requirement | Regulatory Source | Plugin Capability | Coverage |
|
||||
|----------------------|-------------------|-------------------|----------|
|
||||
| Risk management for AI systems | AI Act Art. 9, NSM grunnprinsipper | Posture scanner (13+3 categories), threat-model command | Partial |
|
||||
| Data protection in AI | GDPR, Datatilsynet sandbox | Secrets protection hooks, path guarding, credential scanning | Full |
|
||||
| Transparency and explainability | Digdir principles, AI Act Art. 13 | Scan reports, posture reports, AI-BOM | Partial |
|
||||
| Human oversight | Digdir principles, AI Act Art. 14 | Human Review Requirements (PST-07), Rule of Two, deny-first config | Full |
|
||||
| Cybersecurity | AI Act Art. 15, NSM grunnprinsipper | All 8 hooks, 10 scanners, prompt injection hardening | Full |
|
||||
| Record-keeping | AI Act Art. 12, NSM detect principle | Audit trail (JSONL), session logging, baseline diffs | Full (v6.0) |
|
||||
| Quality management | AI Act Art. 17 | Test suite (1147+ tests), posture scanner, scan-orchestrator | Partial |
|
||||
| Supply chain integrity | AI Act Art. 15, NSM identify principle | Supply chain hooks, dep audit scanner, AI-BOM | Full |
|
||||
| Incident response | NSM respond principle | Clean command, baseline diff, watch/cron monitoring | Partial |
|
||||
|
||||
---
|
||||
|
||||
## Verification Log
|
||||
|
||||
| Claim | Source | URL |
|
||||
|-------|--------|-----|
|
||||
| Datatilsynet sandbox since 2020, fifth round 2025 | Datatilsynet website | https://www.datatilsynet.no/en/regulations-and-tools/sandbox-for-artificial-intelligence/ |
|
||||
| NSM Grunnprinsipper v2.1 | NSM website | https://nsm.no/regelverk-og-hjelp/rad-og-anbefalinger/grunnprinsipper-for-ikt-sikkerhet/ |
|
||||
| Digdir AI guidance for public sector | Digdir website | https://www.digdir.no/kunstig-intelligens/veiledning-ki-i-offentlig-sektor/4132 |
|
||||
| 80% public sector AI adoption target by 2026 | Shifter (citing government plan) | https://www.shifter.no/nyheter/regjeringen-80-prosent-av-offentlige-virksomheter-skal-bruke-ai/443164 |
|
||||
| Norwegian AI Act draft June 2025, expected August 2026 | Regulations.AI | https://regulations.ai/regulations/norway-ai-act-2026 |
|
||||
| Nkom as coordinating authority | Government press release | https://www.regjeringen.no/en/whats-new/gjor-norge-klar-for-trygg-og-innovativ-ki-bruk/id3093081/ |
|
||||
| NSM has no dedicated AI security guidelines (April 2026) | NSM website review — no AI-specific publication found | https://nsm.no/ |
|
||||
|
|
@ -12,7 +12,7 @@ human supervision. Claude Code is an agentic system and maps directly to these r
|
|||
|
||||
## ASI01 — Agent Goal Hijack
|
||||
|
||||
**Category:** Goal and instruction integrity
|
||||
**Category:** Goal and instruction integrity | **MITRE ATLAS:** AML.T0051 (LLM Prompt Injection), AML.T0058 (AI Agent Context Poisoning)
|
||||
|
||||
### Description
|
||||
Attackers alter agent objectives by embedding hidden instructions in external content that the agent
|
||||
|
|
@ -54,7 +54,7 @@ Real incident: EchoLeak — copilots turned into silent exfiltration engines via
|
|||
|
||||
## ASI02 — Tool Misuse and Exploitation
|
||||
|
||||
**Category:** Tool integrity and authorization
|
||||
**Category:** Tool integrity and authorization | **MITRE ATLAS:** AML.T0061 (AI Agent Tools)
|
||||
|
||||
### Description
|
||||
Agents misuse legitimate tools due to ambiguous prompts, manipulated input, or over-provisioned
|
||||
|
|
@ -97,7 +97,7 @@ Real incident: Amazon Q and GitHub Actions compromised via repository content tr
|
|||
|
||||
## ASI03 — Identity and Privilege Abuse
|
||||
|
||||
**Category:** Identity, credentials, and delegation
|
||||
**Category:** Identity, credentials, and delegation | **MITRE ATLAS:** AML.T0012 (Valid Accounts)
|
||||
|
||||
### Description
|
||||
Agents often inherit user or system identities including high-privilege credentials, session tokens,
|
||||
|
|
@ -140,7 +140,7 @@ exercise.
|
|||
|
||||
## ASI04 — Agentic Supply Chain Vulnerabilities
|
||||
|
||||
**Category:** Component integrity and provenance
|
||||
**Category:** Component integrity and provenance | **MITRE ATLAS:** AML.T0010 (ML Supply Chain Compromise)
|
||||
|
||||
### Description
|
||||
Tools, plugins, prompt templates, MCP servers, and agent definitions fetched or loaded dynamically
|
||||
|
|
@ -183,7 +183,7 @@ Real incident: Malicious MCP servers impersonating legitimate ones, altering too
|
|||
|
||||
## ASI05 — Unexpected Code Execution
|
||||
|
||||
**Category:** Code generation and execution safety
|
||||
**Category:** Code generation and execution safety | **MITRE ATLAS:** AML.T0011 (User Execution)
|
||||
|
||||
### Description
|
||||
Agents generate or execute code unsafely through shell commands, eval-like constructs, script
|
||||
|
|
@ -225,7 +225,7 @@ Coding agents like Claude Code are high-risk because code generation and executi
|
|||
|
||||
## ASI06 — Memory and Context Poisoning
|
||||
|
||||
**Category:** State integrity and persistence
|
||||
**Category:** State integrity and persistence | **MITRE ATLAS:** AML.T0058 (AI Agent Context Poisoning), AML.T0020 (Poison Training Data)
|
||||
|
||||
### Description
|
||||
Agents rely on memory systems, embeddings, RAG databases, context windows, and summaries to maintain
|
||||
|
|
@ -266,7 +266,7 @@ Unlike one-shot injection, memory poisoning executes on every future session wit
|
|||
|
||||
## ASI07 — Insecure Inter-Agent Communication
|
||||
|
||||
**Category:** Multi-agent protocol integrity
|
||||
**Category:** Multi-agent protocol integrity | **MITRE ATLAS:** AML.T0062 (Exfiltration via AI Agent Tool Invocation)
|
||||
|
||||
### Description
|
||||
In multi-agent architectures, agents coordinate through message passing over MCP, RPC, shared files,
|
||||
|
|
@ -309,7 +309,7 @@ downstream agents through compromised peers.
|
|||
|
||||
## ASI08 — Cascading Failures
|
||||
|
||||
**Category:** System resilience and blast radius
|
||||
**Category:** System resilience and blast radius | **MITRE ATLAS:** AML.T0029 (Denial of ML Service)
|
||||
|
||||
### Description
|
||||
In interconnected multi-agent architectures, a single compromised or hallucinating agent can
|
||||
|
|
@ -353,7 +353,7 @@ execute without verification, multiplying the blast radius.
|
|||
|
||||
## ASI09 — Human-Agent Trust Exploitation
|
||||
|
||||
**Category:** Human oversight and social engineering
|
||||
**Category:** Human oversight and social engineering | **MITRE ATLAS:** AML.T0043 (Craft Adversarial Data)
|
||||
|
||||
### Description
|
||||
Users and operators over-trust agent recommendations due to their confident, authoritative
|
||||
|
|
@ -400,7 +400,7 @@ copilots approving fraudulent transactions; support agents soliciting credential
|
|||
|
||||
## ASI10 — Rogue Agents
|
||||
|
||||
**Category:** Agent alignment and containment
|
||||
**Category:** Agent alignment and containment | **MITRE ATLAS:** AML.T0018 (Backdoor ML Model)
|
||||
|
||||
### Description
|
||||
Rogue agents are compromised or misaligned agents that act harmfully while appearing legitimate.
|
||||
|
|
|
|||
|
|
@ -10,6 +10,8 @@ Source: https://genai.owasp.org/llm-top-10/ — OWASP GenAI Security Project v20
|
|||
|
||||
## LLM01 — Prompt Injection
|
||||
|
||||
**MITRE ATLAS:** AML.T0051 (LLM Prompt Injection)
|
||||
|
||||
**Risk:** Attackers manipulate LLM behavior by crafting inputs that override system
|
||||
instructions, bypass guardrails, or cause the model to execute unintended actions.
|
||||
|
||||
|
|
@ -63,6 +65,8 @@ instructions, bypass guardrails, or cause the model to execute unintended action
|
|||
|
||||
## LLM02 — Sensitive Information Disclosure
|
||||
|
||||
**MITRE ATLAS:** AML.T0024 (Exfiltration via ML Inference API)
|
||||
|
||||
**Risk:** LLMs unintentionally expose private, proprietary, or credential data through
|
||||
outputs, memorized training content, or cross-session leakage.
|
||||
|
||||
|
|
@ -113,6 +117,8 @@ outputs, memorized training content, or cross-session leakage.
|
|||
|
||||
## LLM03 — Supply Chain Vulnerabilities
|
||||
|
||||
**MITRE ATLAS:** AML.T0010 (ML Supply Chain Compromise)
|
||||
|
||||
**Risk:** Compromised third-party models, datasets, plugins, MCP servers, or
|
||||
dependencies introduce backdoors, malicious behavior, or known vulnerabilities.
|
||||
|
||||
|
|
@ -161,6 +167,8 @@ dependencies introduce backdoors, malicious behavior, or known vulnerabilities.
|
|||
|
||||
## LLM04 — Data and Model Poisoning
|
||||
|
||||
**MITRE ATLAS:** AML.T0020 (Poison Training Data), AML.T0018 (Backdoor ML Model)
|
||||
|
||||
**Risk:** Malicious or accidental contamination of training data, fine-tuning datasets,
|
||||
RAG knowledge bases, or embeddings degrades model behavior or introduces backdoors.
|
||||
|
||||
|
|
@ -208,6 +216,8 @@ RAG knowledge bases, or embeddings degrades model behavior or introduces backdoo
|
|||
|
||||
## LLM05 — Improper Output Handling
|
||||
|
||||
**MITRE ATLAS:** AML.T0043 (Craft Adversarial Data)
|
||||
|
||||
**Risk:** LLM-generated output is passed to downstream systems without adequate
|
||||
validation or sanitization, enabling injection attacks, privilege escalation, or
|
||||
unintended side effects.
|
||||
|
|
@ -262,6 +272,8 @@ unintended side effects.
|
|||
|
||||
## LLM06 — Excessive Agency
|
||||
|
||||
**MITRE ATLAS:** AML.T0061 (AI Agent Tools)
|
||||
|
||||
**Risk:** LLMs granted excessive functionality, permissions, or autonomy take
|
||||
unintended high-impact actions with real-world consequences.
|
||||
|
||||
|
|
@ -317,6 +329,8 @@ unintended high-impact actions with real-world consequences.
|
|||
|
||||
## LLM07 — System Prompt Leakage
|
||||
|
||||
**MITRE ATLAS:** AML.T0024 (Exfiltration via ML Inference API)
|
||||
|
||||
**Risk:** Internal system prompts containing sensitive instructions, credentials, or
|
||||
behavioral guardrails are exposed to users or attackers, enabling bypass or
|
||||
credential theft.
|
||||
|
|
@ -368,6 +382,8 @@ credential theft.
|
|||
|
||||
## LLM08 — Vector and Embedding Weaknesses
|
||||
|
||||
**MITRE ATLAS:** AML.T0020 (Poison Training Data), AML.T0019 (Publish Poisoned Datasets)
|
||||
|
||||
**Risk:** Vulnerabilities in how embeddings are generated, stored, or retrieved allow
|
||||
unauthorized data access, information leakage, or manipulation of RAG-based agent
|
||||
behavior.
|
||||
|
|
@ -421,6 +437,8 @@ behavior.
|
|||
|
||||
## LLM09 — Misinformation
|
||||
|
||||
**MITRE ATLAS:** AML.T0031 (Erode ML Model Integrity)
|
||||
|
||||
**Risk:** LLMs generate plausible but factually incorrect outputs (hallucinations) that
|
||||
are acted upon without verification, leading to incorrect decisions, security bypasses,
|
||||
or dependency on non-existent resources.
|
||||
|
|
@ -475,6 +493,8 @@ or dependency on non-existent resources.
|
|||
|
||||
## LLM10 — Unbounded Consumption
|
||||
|
||||
**MITRE ATLAS:** AML.T0029 (Denial of ML Service), AML.T0034 (Cost Harvesting)
|
||||
|
||||
**Risk:** Uncontrolled resource usage by LLM applications enables denial of service,
|
||||
financial exploitation via excessive API costs, or unauthorized model capability
|
||||
extraction through systematic querying.
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ skill-scanner-agent threat model, and cross-mapped to OWASP LLM Top 10 and Agent
|
|||
|
||||
## AST01 — Prompt Injection via Skill Content
|
||||
|
||||
**Category:** Instruction integrity | **Maps to:** LLM01, ASI01 | **Severity:** CRITICAL in frontmatter; HIGH in body
|
||||
**Category:** Instruction integrity | **Maps to:** LLM01, ASI01 | **Severity:** CRITICAL in frontmatter; HIGH in body | **MITRE ATLAS:** AML.T0051 (LLM Prompt Injection)
|
||||
|
||||
Instructions embedded in skill/command/agent files that override model operating rules. Frontmatter
|
||||
`name`/`description` fields load directly into the system prompt — injections here bypass all hooks.
|
||||
|
|
@ -32,7 +32,7 @@ in any frontmatter field; spoofed headers or identity phrases anywhere in skill
|
|||
|
||||
## AST02 — Data Exfiltration from Skills
|
||||
|
||||
**Category:** Data protection | **Maps to:** LLM02, ASI02 | **Severity:** CRITICAL (credential+network); HIGH (file reads alone)
|
||||
**Category:** Data protection | **Maps to:** LLM02, ASI02 | **Severity:** CRITICAL (credential+network); HIGH (file reads alone) | **MITRE ATLAS:** AML.T0024 (Exfiltration via ML Inference API), AML.T0062 (Exfiltration via AI Agent Tool Invocation)
|
||||
|
||||
Skills instructing the agent to read sensitive local files and transmit their contents externally.
|
||||
ToxicSkills found 17.7% of scanned skills fetch from or post to untrusted URLs.
|
||||
|
|
@ -52,7 +52,7 @@ reads to `~/.ssh/`, `~/.env`, `~/.aws/credentials`, `~/.npmrc`; `| base64` on en
|
|||
|
||||
## AST03 — Privilege Escalation via Skill Tools
|
||||
|
||||
**Category:** Authorization | **Maps to:** LLM06, ASI03 | **Severity:** CRITICAL (hook/settings writes); HIGH (unjustified Bash)
|
||||
**Category:** Authorization | **Maps to:** LLM06, ASI03 | **Severity:** CRITICAL (hook/settings writes); HIGH (unjustified Bash) | **MITRE ATLAS:** AML.T0012 (Valid Accounts)
|
||||
|
||||
Skills requesting tool permissions beyond their stated function, or instructing the agent to modify
|
||||
the plugin/hook infrastructure. Excess tools expand blast radius and enable chained attacks.
|
||||
|
|
@ -72,7 +72,7 @@ in non-execution skills as HIGH. `pre-write-pathguard.mjs` blocks writes to hook
|
|||
|
||||
## AST04 — Scope Creep and Credential Access
|
||||
|
||||
**Category:** Credential protection | **Maps to:** LLM02, LLM06, ASI03 | **Severity:** CRITICAL (wallet/SSH/cloud); HIGH (dev tokens)
|
||||
**Category:** Credential protection | **Maps to:** LLM02, LLM06, ASI03 | **Severity:** CRITICAL (wallet/SSH/cloud); HIGH (dev tokens) | **MITRE ATLAS:** AML.T0035 (ML Artifact Collection)
|
||||
|
||||
Skills that exceed their documented purpose by reading sensitive credential files. The "rug-pull"
|
||||
attack: skill gains adoption legitimately, then an update introduces harvesting framed as diagnostics.
|
||||
|
|
@ -94,7 +94,7 @@ is an escalating severity signal. Update `pre-bash-destructive.mjs` pattern list
|
|||
|
||||
## AST05 — Hidden Instructions in Skills
|
||||
|
||||
**Category:** Instruction integrity | **Maps to:** LLM01, ASI01 | **Severity:** CRITICAL for any confirmed instance
|
||||
**Category:** Instruction integrity | **Maps to:** LLM01, ASI01 | **Severity:** CRITICAL for any confirmed instance | **MITRE ATLAS:** AML.T0051 (LLM Prompt Injection)
|
||||
|
||||
Malicious content concealed from human review but interpreted by LLMs. Unicode steganography,
|
||||
base64-encoded payloads, and HTML comment injection are documented ClawHavoc techniques. Effective
|
||||
|
|
@ -117,7 +117,7 @@ high density of U+200B-U+200D in plain-English files; base64 strings >40 chars a
|
|||
|
||||
## AST06 — Toolchain Manipulation via Skills
|
||||
|
||||
**Category:** Supply chain | **Maps to:** LLM03, ASI04 | **Severity:** CRITICAL (registry redirection); HIGH (package install)
|
||||
**Category:** Supply chain | **Maps to:** LLM03, ASI04 | **Severity:** CRITICAL (registry redirection); HIGH (package install) | **MITRE ATLAS:** AML.T0010 (ML Supply Chain Compromise)
|
||||
|
||||
Skills that modify the dependency graph or package manager configuration to introduce malicious
|
||||
packages. Registry redirection poisons all subsequent installs, not just the immediate one.
|
||||
|
|
@ -138,7 +138,7 @@ any package a skill recommends installing. Flag any registry URL change as CRITI
|
|||
|
||||
## AST07 — Persistence Mechanisms via Skills
|
||||
|
||||
**Category:** System integrity | **Maps to:** LLM01, LLM03, ASI10 | **Severity:** CRITICAL for all variants
|
||||
**Category:** System integrity | **Maps to:** LLM01, LLM03, ASI10 | **Severity:** CRITICAL for all variants | **MITRE ATLAS:** AML.T0018 (Backdoor ML Model)
|
||||
|
||||
Skills that attempt to survive session termination via system startup modification, scheduled tasks,
|
||||
or hook registration. AMOS (ClawHavoc) used macOS LaunchAgents; Claude Code hooks are an additional
|
||||
|
|
@ -161,7 +161,7 @@ persistence commands. `pre-write-pathguard.mjs` blocks plugin/hook path writes.
|
|||
|
||||
## AST08 — Skill Description Mismatch
|
||||
|
||||
**Category:** Trust boundary | **Maps to:** LLM06, ASI09 | **Severity:** HIGH; CRITICAL if mismatch enables privilege escalation
|
||||
**Category:** Trust boundary | **Maps to:** LLM06, ASI09 | **Severity:** HIGH; CRITICAL if mismatch enables privilege escalation | **MITRE ATLAS:** AML.T0043 (Craft Adversarial Data)
|
||||
|
||||
Frontmatter description claims read-only or safe analysis, but `allowed-tools`/`tools` grant
|
||||
write/execution capabilities. Users approve installation based on stated description, not actual
|
||||
|
|
@ -183,7 +183,7 @@ security agents. Re-scan all frontmatter after plugin updates — description dr
|
|||
|
||||
## AST09 — Over-Privileged Knowledge Access
|
||||
|
||||
**Category:** Data trust | **Maps to:** LLM04, ASI06 | **Severity:** HIGH (bulk loads); MEDIUM (missing attribution)
|
||||
**Category:** Data trust | **Maps to:** LLM04, ASI06 | **Severity:** HIGH (bulk loads); MEDIUM (missing attribution) | **MITRE ATLAS:** AML.T0035 (ML Artifact Collection), AML.T0036 (Data from Information Repositories)
|
||||
|
||||
Knowledge files treated as trusted instructions rather than reference data. Skills loading entire
|
||||
`knowledge/` directories without selection violate the context budget rule (max 3 files per
|
||||
|
|
@ -207,7 +207,7 @@ with explicit data framing before passing to subagents.
|
|||
|
||||
## AST10 — Uncontrolled Skill Execution
|
||||
|
||||
**Category:** Resource control | **Maps to:** LLM10, ASI08 | **Severity:** HIGH; CRITICAL if combined with AST01 trigger
|
||||
**Category:** Resource control | **Maps to:** LLM10, ASI08 | **Severity:** HIGH; CRITICAL if combined with AST01 trigger | **MITRE ATLAS:** AML.T0011 (User Execution)
|
||||
|
||||
Skills or commands without iteration limits, file count caps, or circuit breakers in loop contexts.
|
||||
Enables Denial of Wallet attacks and runaway autonomous pipelines. Especially dangerous in harness
|
||||
|
|
|
|||
82
plugins/llm-security/tests/scanners/knowledge-atlas.test.mjs
Normal file
82
plugins/llm-security/tests/scanners/knowledge-atlas.test.mjs
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
// knowledge-atlas.test.mjs — Tests for MITRE ATLAS IDs in knowledge files + Norwegian context
|
||||
// Verifies: each knowledge file contains AML.T references; norwegian-context.md exists and has content
|
||||
|
||||
import { describe, it } from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { resolve } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const __dirname = fileURLToPath(new URL('.', import.meta.url));
|
||||
const ROOT = resolve(__dirname, '../..');
|
||||
const KNOWLEDGE = resolve(ROOT, 'knowledge');
|
||||
|
||||
function readKnowledge(filename) {
|
||||
try { return readFileSync(resolve(KNOWLEDGE, filename), 'utf-8'); }
|
||||
catch { return null; }
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// ATLAS IDs in OWASP and threat pattern files
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const atlasFiles = [
|
||||
{ file: 'owasp-llm-top10.md', minRefs: 10 },
|
||||
{ file: 'owasp-agentic-top10.md', minRefs: 10 },
|
||||
{ file: 'owasp-skills-top10.md', minRefs: 10 },
|
||||
{ file: 'mcp-threat-patterns.md', minRefs: 5 },
|
||||
{ file: 'deepmind-agent-traps.md', minRefs: 5 },
|
||||
{ file: 'mitigation-matrix.md', minRefs: 5 },
|
||||
];
|
||||
|
||||
describe('MITRE ATLAS IDs in knowledge files', () => {
|
||||
for (const { file, minRefs } of atlasFiles) {
|
||||
describe(file, () => {
|
||||
const content = readKnowledge(file);
|
||||
|
||||
it('file exists', () => {
|
||||
assert.ok(content !== null, `${file} should exist`);
|
||||
});
|
||||
|
||||
it(`contains at least ${minRefs} AML.T references`, () => {
|
||||
const matches = content.match(/AML\.T\d{4}/g) || [];
|
||||
assert.ok(
|
||||
matches.length >= minRefs,
|
||||
`Expected >= ${minRefs} AML.T refs, found ${matches.length}`
|
||||
);
|
||||
});
|
||||
|
||||
it('contains AML.T0051 (LLM Prompt Injection)', () => {
|
||||
assert.ok(content.includes('AML.T0051'), `${file} should reference AML.T0051`);
|
||||
});
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Norwegian regulatory context
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('knowledge/norwegian-context.md', () => {
|
||||
const content = readKnowledge('norwegian-context.md');
|
||||
|
||||
it('file exists', () => {
|
||||
assert.ok(content !== null, 'norwegian-context.md should exist');
|
||||
});
|
||||
|
||||
it('references Datatilsynet', () => {
|
||||
assert.ok(content.includes('Datatilsynet'), 'Should reference Datatilsynet');
|
||||
});
|
||||
|
||||
it('references NSM', () => {
|
||||
assert.ok(content.includes('NSM'), 'Should reference NSM');
|
||||
});
|
||||
|
||||
it('references Digdir', () => {
|
||||
assert.ok(content.includes('Digdir'), 'Should reference Digdir');
|
||||
});
|
||||
|
||||
it('contains verification URLs', () => {
|
||||
assert.ok(/https?:\/\//.test(content), 'Should contain verification URLs');
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue