ktg-plugin-marketplace/plugins/llm-security-copilot/scanners/network-mapper.mjs
Kjell Tore Guttormsen f418a8fe08 feat(llm-security-copilot): port llm-security v5.1.0 to GitHub Copilot CLI
Full port of llm-security plugin for internal use on Windows with GitHub
Copilot CLI. Protocol translation layer (copilot-hook-runner.mjs)
normalizes Copilot camelCase I/O to Claude Code snake_case format — all
original hook scripts run unmodified.

- 8 hooks with protocol translation (stdin/stdout/exit code)
- 18 SKILL.md skills (Agent Skills Open Standard)
- 6 .agent.md agent definitions
- 20 scanners + 14 scanner lib modules (unchanged)
- 14 knowledge files (unchanged)
- 39 test files including copilot-port-verify.mjs (17 tests)
- Windows-ready: node:path, os.tmpdir(), process.execPath, no bash

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-09 21:56:10 +02:00

594 lines
22 KiB
JavaScript

// network-mapper.mjs — Discovers and classifies all outbound URLs and network references
// Zero dependencies (Node.js builtins only via lib helpers + node:dns).
//
// Rationale: Malicious skills and MCP servers frequently phone home to attacker-controlled
// infrastructure — data exfiltration webhooks, tunneling services, URL shorteners that
// redirect to C2 endpoints, or hardcoded IP addresses that bypass DNS/cert validation.
// This scanner catalogs every network reference and flags anything suspicious.
//
// References:
// - OWASP LLM02 (Sensitive Information Disclosure — exfiltration endpoints)
// - OWASP LLM03 (Supply Chain — third-party network dependencies)
// - MCPTox research: rug-pull via domain reassignment after install
// - Pillar Security: MCP tool description injection with exfiltration callbacks
import { readTextFile } from './lib/file-discovery.mjs';
import { finding, scannerResult } from './lib/output.mjs';
import { SEVERITY } from './lib/severity.mjs';
import { redact } from './lib/string-utils.mjs';
import dns from 'node:dns';
import { promisify } from 'node:util';
// ---------------------------------------------------------------------------
// DNS helpers
// ---------------------------------------------------------------------------
const resolve4 = promisify(dns.resolve4);
// ---------------------------------------------------------------------------
// URL extraction patterns
// ---------------------------------------------------------------------------
/** Standard http/https URLs including query strings and fragments. */
const URL_REGEX = /https?:\/\/[^\s'"<>\]\)}{,]+/g;
/** IP-based URLs — numeric host in http/https scheme. */
const IP_URL_REGEX = /https?:\/\/(\d{1,3}\.){3}\d{1,3}(?:[:/][^\s'"<>\]\)}{,]*)?/g;
/** Bare IP addresses in source code, only matched when near network-related keywords. */
const BARE_IP_REGEX = /(?<!\d)(\d{1,3}\.){3}\d{1,3}(?!\d)/g;
/** Network-context keywords that make a bare IP worth reporting. */
const NETWORK_KEYWORDS = /\b(?:fetch|http|https|connect|socket|tcp|udp|url|endpoint|host|addr|address|server|client|request|xhr|axios|got|superagent|node-fetch|undici)\b/i;
// ---------------------------------------------------------------------------
// Domain classification sets
// ---------------------------------------------------------------------------
/**
* Trusted domains — documentation sites, package registries, major cloud providers,
* and RFC 2606 reserved example domains. No finding generated for these.
*/
const TRUSTED_DOMAINS = new Set([
// Source forges and package registries
'github.com', 'api.github.com', 'raw.githubusercontent.com', 'gist.github.com',
'gitlab.com', 'bitbucket.org',
'npmjs.org', 'www.npmjs.com', 'registry.npmjs.org',
'pypi.org', 'files.pythonhosted.org',
'crates.io', 'static.crates.io',
'pkg.go.dev', 'proxy.golang.org',
'rubygems.org', 'packagist.org',
'nuget.org', 'api.nuget.org',
// Microsoft ecosystem
'microsoft.com', 'learn.microsoft.com', 'aka.ms', 'azure.com',
'azurewebsites.net', 'azurestaticapps.net',
'dev.azure.com', 'management.azure.com',
'login.microsoftonline.com', 'graph.microsoft.com',
'schemas.microsoft.com',
'outlook.com', 'office.com', 'office365.com',
// AI providers (primary)
'anthropic.com', 'api.anthropic.com',
'openai.com', 'api.openai.com',
'huggingface.co', 'api-inference.huggingface.co',
// Google / GCP
'google.com', 'googleapis.com', 'gstatic.com', 'googleusercontent.com',
'cloud.google.com',
// AWS
'amazonaws.com', 'aws.amazon.com', 'awsstatic.com',
// Standards and documentation
'stackoverflow.com',
'developer.mozilla.org', 'mdn.io',
'wikipedia.org', 'en.wikipedia.org',
'www.w3.org', 'w3.org',
'json-schema.org',
'spdx.org',
'creativecommons.org',
'owasp.org',
'ietf.org', 'rfc-editor.org', 'tools.ietf.org',
'ecma-international.org',
// CI/CD and devtools
'travis-ci.com', 'travis-ci.org',
'circleci.com',
'codecov.io', 'coveralls.io',
'snyk.io',
'semver.org',
'shields.io', 'img.shields.io', // badge URLs in README
// RFC 2606 reserved (safe by design)
'example.com', 'example.org', 'example.net',
'test.com', 'localhost',
// Local addresses (handled as trusted, not flagged as IP-based)
'127.0.0.1', '0.0.0.0', '::1',
]);
/**
* Suspicious domains known to be used for data exfiltration, webhook interception,
* tunneling, URL shortening (which can redirect to C2), or paste-bin style exfiltration.
*
* Severity: HIGH
*/
const SUSPICIOUS_DOMAINS = new Set([
// Webhook inspection / interception services
'webhook.site', 'webhookinbox.com',
'requestbin.com', 'requestbin.net', 'requestbin.org',
'pipedream.net',
'hookbin.com',
'beeceptor.com',
'requestcatcher.com',
'smee.io',
'hookdeck.com',
// HTTP tunneling / ngrok-alikes
'ngrok.io', 'ngrok.app', 'ngrok-free.app', 'ngrok.com',
'serveo.net',
'localtunnel.me',
'localhost.run',
'bore.pub',
'telebit.cloud',
'zrok.io',
'pagekite.me',
// Paste / ephemeral file sharing (exfiltration vectors)
'pastebin.com', 'pastebin.pl',
'paste.ee',
'hastebin.com',
'dpaste.org', 'dpaste.com',
'sprunge.us',
'ix.io',
'clbin.com',
// Ephemeral file hosting
'transfer.sh',
'file.io',
'filedropper.com',
'filebin.net',
'tmpfiles.org',
'temp.sh',
// URL shorteners (can mask final destination)
'bit.ly',
'tinyurl.com',
'is.gd',
't.co',
'goo.gl',
'ow.ly',
'buff.ly',
'rebrand.ly',
'shorturl.at',
'cutt.ly',
'tiny.cc',
// Chat platform webhooks (legitimate, but suspicious in plugin code)
'discord.gg',
'discord.com', // discord.com/api/webhooks is a common exfil target
'slack.com', // slack.com/api with a bot token in code is suspicious
'telegram.org', 'api.telegram.org',
]);
// ---------------------------------------------------------------------------
// Local / loopback address detection
// ---------------------------------------------------------------------------
/** Pattern matching loopback and non-routable addresses. These get MEDIUM (not HIGH). */
const LOOPBACK_PATTERNS = [
/^127\.\d+\.\d+\.\d+$/, // 127.x.x.x loopback range
/^0\.0\.0\.0$/, // wildcard bind
/^10\.\d+\.\d+\.\d+$/, // RFC 1918 — Class A private
/^192\.168\.\d+\.\d+$/, // RFC 1918 — Class C private
/^172\.(1[6-9]|2\d|3[01])\.\d+\.\d+$/, // RFC 1918 — Class B private
/^169\.254\.\d+\.\d+$/, // Link-local
/^::1$/, // IPv6 loopback
];
function isPrivateOrLoopback(ip) {
return LOOPBACK_PATTERNS.some((p) => p.test(ip));
}
// ---------------------------------------------------------------------------
// URL normalization helpers
// ---------------------------------------------------------------------------
/**
* Extract the effective domain from a URL string, stripping port and path.
* Returns null if the URL cannot be parsed.
*/
function extractDomain(rawUrl) {
try {
const u = new URL(rawUrl);
return u.hostname.toLowerCase().replace(/\.$/, ''); // strip trailing dot
} catch {
// Fallback: strip scheme and extract up to first / : ? #
const m = rawUrl.match(/^https?:\/\/([^/:?#]+)/i);
return m ? m[1].toLowerCase() : null;
}
}
/**
* Check whether a hostname is purely numeric (IPv4 address).
*/
function isIpAddress(host) {
return /^(\d{1,3}\.){3}\d{1,3}$/.test(host);
}
/**
* Validate that each octet of an IPv4 string is 0-255.
*/
function isValidIpv4(host) {
const parts = host.split('.');
if (parts.length !== 4) return false;
return parts.every((p) => {
const n = Number(p);
return Number.isInteger(n) && n >= 0 && n <= 255;
});
}
// ---------------------------------------------------------------------------
// DNS resolution with timeout
// ---------------------------------------------------------------------------
const DNS_TIMEOUT_MS = 3000;
const DNS_MAX_LOOKUPS = 50;
/**
* Attempt to resolve a domain to IPv4, with a hard timeout.
* Returns { resolved: boolean, addresses: string[], lowTtl: boolean } or null on timeout/error.
*/
async function resolveDomain(domain) {
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), DNS_TIMEOUT_MS);
try {
// node:dns resolve4 does not natively support AbortController — we race with a
// timeout promise instead.
const raceResult = await Promise.race([
resolve4(domain),
new Promise((_, reject) =>
setTimeout(() => reject(new Error('dns_timeout')), DNS_TIMEOUT_MS)
),
]);
clearTimeout(timer);
// Check for suspiciously low TTL (infrastructure churn indicator — common in rug-pulls).
// node:dns.resolve4 with options is available from Node >=18.
let lowTtl = false;
try {
const withTtl = await Promise.race([
dns.promises.resolve4(domain, { ttl: true }),
new Promise((_, reject) =>
setTimeout(() => reject(new Error('dns_timeout')), DNS_TIMEOUT_MS)
),
]);
if (Array.isArray(withTtl)) {
lowTtl = withTtl.some((r) => typeof r === 'object' && r.ttl < 60);
}
} catch {
// TTL check failed — non-fatal, ignore
}
return { resolved: true, addresses: raceResult, lowTtl };
} catch {
clearTimeout(timer);
return { resolved: false, addresses: [], lowTtl: false };
}
}
// ---------------------------------------------------------------------------
// Per-file scanning
// ---------------------------------------------------------------------------
/**
* Scan a single file for URLs and bare IP references.
*
* @param {string} content - File text content
* @param {string} relPath - Relative file path for finding output
* @returns {{ urlOccurrences: Map<string, { relPath: string, line: number }[]>,
* ipUrlOccurrences: Map<string, { relPath: string, line: number }[]>,
* bareIpOccurrences: Map<string, { relPath: string, line: number }[]> }}
*/
function scanFileContent(content, relPath) {
const urlOccurrences = new Map(); // normalized URL → [{relPath, line}]
const ipUrlOccurrences = new Map(); // ip-based URL → [{relPath, line}]
const bareIpOccurrences = new Map(); // bare IP → [{relPath, line}]
const lines = content.split('\n');
for (let lineIdx = 0; lineIdx < lines.length; lineIdx++) {
const line = lines[lineIdx];
const lineNo = lineIdx + 1;
// --- Extract standard http/https URLs ---
const urlMatches = [...line.matchAll(URL_REGEX)];
for (const m of urlMatches) {
const rawUrl = m[0].replace(/[.,;:!?]+$/, ''); // strip trailing punctuation
const domain = extractDomain(rawUrl);
if (!domain) continue;
if (isIpAddress(domain)) {
// Record as IP-based URL
const key = rawUrl;
if (!ipUrlOccurrences.has(key)) ipUrlOccurrences.set(key, []);
ipUrlOccurrences.get(key).push({ relPath, line: lineNo });
} else {
// Record as domain-based URL
const key = rawUrl;
if (!urlOccurrences.has(key)) urlOccurrences.set(key, []);
urlOccurrences.get(key).push({ relPath, line: lineNo });
}
}
// --- Extract bare IP addresses (only when near network-context keywords) ---
if (NETWORK_KEYWORDS.test(line)) {
const ipMatches = [...line.matchAll(BARE_IP_REGEX)];
for (const m of ipMatches) {
const ip = m[0];
if (!isValidIpv4(ip)) continue;
// Skip IPs already captured as part of a URL in this line
if (urlMatches.some((u) => u[0].includes(ip))) continue;
const key = ip;
if (!bareIpOccurrences.has(key)) bareIpOccurrences.set(key, []);
bareIpOccurrences.get(key).push({ relPath, line: lineNo });
}
}
}
return { urlOccurrences, ipUrlOccurrences, bareIpOccurrences };
}
// ---------------------------------------------------------------------------
// Merge occurrence maps across files
// ---------------------------------------------------------------------------
function mergeOccurrences(target, source) {
for (const [key, locs] of source) {
if (!target.has(key)) {
target.set(key, [...locs]);
} else {
target.get(key).push(...locs);
}
}
}
// ---------------------------------------------------------------------------
// Evidence formatter
// ---------------------------------------------------------------------------
/**
* Build a compact evidence string from an occurrence list.
* Shows up to 3 file+line references to keep findings readable.
*/
function formatLocations(occurrences) {
const unique = [];
const seenFiles = new Set();
for (const loc of occurrences) {
const key = `${loc.relPath}:${loc.line}`;
if (!seenFiles.has(key)) {
seenFiles.add(key);
unique.push(loc);
}
}
const shown = unique.slice(0, 3);
const overflow = unique.length - shown.length;
const parts = shown.map((l) => `${l.relPath}:${l.line}`);
if (overflow > 0) parts.push(`+${overflow} more`);
return parts.join(', ');
}
// ---------------------------------------------------------------------------
// Public scanner entry point
// ---------------------------------------------------------------------------
/**
* Scan a target path for outbound URLs and network references.
*
* @param {string} targetPath - Absolute path to scan (file or directory root)
* @param {{ files: Array<{ absPath: string, relPath: string, ext: string, size: number }> }} discovery
* Pre-computed file discovery result from the orchestrator.
* @returns {Promise<object>} Scanner result envelope
*/
export async function scan(targetPath, discovery) {
const startMs = Date.now();
const allFindings = [];
let filesScanned = 0;
// Aggregate occurrence maps across all files
const allUrlOccurrences = new Map(); // rawUrl → [{relPath, line}]
const allIpUrlOccurrences = new Map(); // rawUrl → [{relPath, line}]
const allBareIpOccurrences = new Map(); // ip → [{relPath, line}]
try {
// --- Phase 1: File scanning ---
for (const fileInfo of discovery.files) {
const content = await readTextFile(fileInfo.absPath);
if (content === null) continue;
filesScanned++;
const { urlOccurrences, ipUrlOccurrences, bareIpOccurrences } =
scanFileContent(content, fileInfo.relPath);
mergeOccurrences(allUrlOccurrences, urlOccurrences);
mergeOccurrences(allIpUrlOccurrences, ipUrlOccurrences);
mergeOccurrences(allBareIpOccurrences, bareIpOccurrences);
}
// --- Phase 2: Domain deduplication and classification ---
// Collect unique domains from standard URLs, keyed by domain → [rawUrls]
const domainToUrls = new Map();
for (const rawUrl of allUrlOccurrences.keys()) {
const domain = extractDomain(rawUrl);
if (!domain) continue;
if (!domainToUrls.has(domain)) domainToUrls.set(domain, []);
domainToUrls.get(domain).push(rawUrl);
}
// --- Phase 3: DNS resolution for suspicious + unknown domains (optional) ---
let dnsLookupCount = 0;
const dnsResults = new Map(); // domain → { resolved, addresses, lowTtl }
const suspiciousAndUnknown = [...domainToUrls.keys()].filter(
(d) => !TRUSTED_DOMAINS.has(d) && !isIpAddress(d)
);
for (const domain of suspiciousAndUnknown) {
if (dnsLookupCount >= DNS_MAX_LOOKUPS) break;
dnsLookupCount++;
const result = await resolveDomain(domain);
dnsResults.set(domain, result);
}
// --- Phase 4: Generate findings for domain-based URLs ---
for (const [domain, rawUrls] of domainToUrls) {
// Skip trusted domains entirely
if (TRUSTED_DOMAINS.has(domain)) continue;
// Gather all occurrence locations for this domain
const allLocs = rawUrls.flatMap((u) => allUrlOccurrences.get(u) || []);
const locationStr = formatLocations(allLocs);
// Choose a representative URL for evidence (shortest/cleanest)
const repUrl = rawUrls.sort((a, b) => a.length - b.length)[0];
const repUrlRedacted = redact(repUrl, 60, 0);
const dnsInfo = dnsResults.get(domain);
const dnsNote = dnsInfo
? dnsInfo.resolved
? dnsInfo.lowTtl
? ` DNS resolved (LOW TTL <60s — suspicious infrastructure churn).`
: ` DNS resolved to: ${dnsInfo.addresses.slice(0, 3).join(', ')}.`
: ` DNS: NXDOMAIN or unreachable.`
: '';
if (SUSPICIOUS_DOMAINS.has(domain)) {
// HIGH: known exfiltration/tunneling/shortener domain
allFindings.push(
finding({
scanner: 'NET',
severity: SEVERITY.HIGH,
title: `Suspicious network endpoint: ${domain}`,
description:
`Domain "${domain}" is known to be used for data exfiltration, webhook interception, ` +
`tunneling (bypasses corporate egress filtering), URL shortening (masks final destination), ` +
`or ephemeral file sharing. Its presence in plugin/skill code is a strong indicator of ` +
`malicious intent or accidental exfiltration risk.${dnsNote}`,
file: allLocs[0]?.relPath || null,
line: allLocs[0]?.line || null,
evidence: `${repUrlRedacted} | found at: ${locationStr}`,
owasp: 'LLM02',
recommendation:
'This domain is commonly used for data exfiltration or tunneling. ' +
'Verify this URL is necessary and intended. If this is test code, move it to ' +
'a properly isolated test fixture. If it is production code, remove it.',
})
);
} else {
// INFO: unknown domain — catalog for review, no automatic blocking
const lowTtlNote =
dnsInfo?.resolved && dnsInfo?.lowTtl
? ' Low DNS TTL detected — possible domain reassignment risk (rug-pull vector).'
: '';
allFindings.push(
finding({
scanner: 'NET',
severity: SEVERITY.INFO,
title: `Unknown external domain: ${domain}`,
description:
`Domain "${domain}" is referenced in the codebase but is not on the trusted allowlist. ` +
`This may be a legitimate third-party dependency, or it may be an unexpected outbound call. ` +
`Review all network references to verify they are necessary and intentional.${dnsNote}${lowTtlNote}`,
file: allLocs[0]?.relPath || null,
line: allLocs[0]?.line || null,
evidence: `${repUrlRedacted} | found at: ${locationStr}`,
owasp: 'LLM03',
recommendation:
'Verify this external domain is a known, trusted dependency. ' +
'Document its purpose if it is legitimate.',
})
);
}
}
// --- Phase 5: IP-based URL findings ---
for (const [rawUrl, locs] of allIpUrlOccurrences) {
const domain = extractDomain(rawUrl);
if (!domain) continue;
if (!isValidIpv4(domain)) continue;
// Skip loopback/private — these are MEDIUM, not HIGH
const isPrivate = isPrivateOrLoopback(domain);
const severity = isPrivate ? SEVERITY.MEDIUM : SEVERITY.HIGH;
const locationStr = formatLocations(locs);
const urlRedacted = redact(rawUrl, 60, 0);
allFindings.push(
finding({
scanner: 'NET',
severity,
title: `IP-based URL: ${domain}`,
description:
isPrivate
? `URL "${urlRedacted}" uses a private/loopback IP address instead of a domain name. ` +
`While likely targeting a local service, hardcoded private IPs reduce portability ` +
`and can indicate development-time infrastructure left in production code.`
: `URL "${urlRedacted}" uses a public IP address instead of a domain name. ` +
`IP-based URLs bypass DNS-based security controls, certificate transparency, ` +
`and many proxy/firewall filtering mechanisms. This is a common technique used ` +
`by malware to connect to C2 infrastructure that avoids domain reputation checks.`,
file: locs[0]?.relPath || null,
line: locs[0]?.line || null,
evidence: `${urlRedacted} | found at: ${locationStr}`,
owasp: isPrivate ? 'LLM03' : 'LLM02',
recommendation: isPrivate
? 'Replace hardcoded private IP with a configurable hostname or environment variable.'
: 'IP-based URLs bypass DNS and certificate validation. Use a domain name instead.',
})
);
}
// --- Phase 6: Bare IP findings ---
for (const [ip, locs] of allBareIpOccurrences) {
if (!isValidIpv4(ip)) continue;
if (isPrivateOrLoopback(ip)) continue; // Low signal for bare private IPs — skip
const locationStr = formatLocations(locs);
allFindings.push(
finding({
scanner: 'NET',
severity: SEVERITY.MEDIUM,
title: `Bare public IP address in network context: ${ip}`,
description:
`A public IP address "${ip}" appears near network-related code (fetch, http, connect, etc.) ` +
`without being part of a URL. This may indicate a hardcoded server address that bypasses ` +
`DNS resolution and certificate validation controls.`,
file: locs[0]?.relPath || null,
line: locs[0]?.line || null,
evidence: `IP: ${ip} | found at: ${locationStr}`,
owasp: 'LLM02',
recommendation:
'IP-based URLs bypass DNS and certificate validation. Use a domain name instead.',
})
);
}
const durationMs = Date.now() - startMs;
return scannerResult('network-mapper', 'ok', allFindings, filesScanned, durationMs);
} catch (err) {
const durationMs = Date.now() - startMs;
return scannerResult(
'network-mapper',
'error',
allFindings,
filesScanned,
durationMs,
String(err?.message || err)
);
}
}