import { parse } from "csv-parse/sync"; import { readFileSync } from "node:fs"; import type { PostAnalytics, AnalyticsBatch, PostMetrics } from "../models/types.js"; /** * Detects delimiter (comma vs semicolon) by checking first line */ function detectDelimiter(content: string): string { const firstLine = content.split("\n")[0]; const commaCount = (firstLine.match(/,/g) || []).length; const semicolonCount = (firstLine.match(/;/g) || []).length; return semicolonCount > commaCount ? ";" : ","; } /** * Finds column value using fuzzy pattern matching */ function findColumn(record: Record, patterns: string[]): string { const keys = Object.keys(record); for (const pattern of patterns) { const key = keys.find((k) => k.toLowerCase().includes(pattern.toLowerCase()) ); if (key) { return record[key]; } } return ""; } /** * Parses metric value, handling both US (4,523) and EU (4.523) thousand separators * Clamps negative values to 0 */ function parseMetric(value: string): number { if (!value) return 0; // Remove quotes and trim const cleaned = value.replace(/"/g, "").trim(); // Check if it looks like EU format (4.523) or US format (4,523) // EU format has dots as thousand separators, US has commas // If there's both comma and dot, the last one is decimal separator const lastComma = cleaned.lastIndexOf(","); const lastDot = cleaned.lastIndexOf("."); let normalized = cleaned; if (lastComma > lastDot) { // US format: remove commas (thousand separator), keep dots normalized = cleaned.replace(/,/g, ""); } else { // EU format: remove dots (thousand separator), replace comma with dot normalized = cleaned.replace(/\./g, "").replace(/,/g, "."); } const parsed = parseFloat(normalized) || 0; // Clamp negative values to 0 return Math.max(0, parsed); } /** * Normalizes date to YYYY-MM-DD format * Handles: DD.MM.YYYY, MM/DD/YYYY, YYYY-MM-DD * Returns null if date is invalid */ function normalizeDate(dateStr: string): string | null { if (!dateStr) return null; const cleaned = dateStr.replace(/"/g, "").trim(); // Already in YYYY-MM-DD format if (/^\d{4}-\d{2}-\d{2}$/.test(cleaned)) { return cleaned; } // DD.MM.YYYY format if (/^\d{2}\.\d{2}\.\d{4}$/.test(cleaned)) { const [day, month, year] = cleaned.split("."); return `${year}-${month}-${day}`; } // MM/DD/YYYY format if (/^\d{2}\/\d{2}\/\d{4}$/.test(cleaned)) { const [month, day, year] = cleaned.split("/"); return `${year}-${month}-${day}`; } // YYYY/MM/DD format if (/^\d{4}\/\d{2}\/\d{2}$/.test(cleaned)) { return cleaned.replace(/\//g, "-"); } // Invalid date format return null; } /** * Simple string hash function for generating deterministic post IDs */ function simpleHash(str: string): string { let hash = 0; for (let i = 0; i < str.length; i++) { const char = str.charCodeAt(i); hash = (hash << 5) - hash + char; hash = hash & hash; // Convert to 32bit integer } return Math.abs(hash).toString(36); } /** * Generates deterministic post ID from title and date */ function generatePostId(title: string, date: string): string { return simpleHash(`${title}:${date}`); } /** * Generates batch ID using timestamp */ function generateBatchId(): string { const now = new Date(); const timestamp = now.getTime(); return `batch-${timestamp}-${simpleHash(timestamp.toString())}`; } /** * Parses LinkedIn CSV export into structured AnalyticsBatch */ export function parseLinkedInCSV( filePath: string, filename: string ): AnalyticsBatch { // Read file let content = readFileSync(filePath, "utf-8"); // Strip BOM if present if (content.charCodeAt(0) === 0xfeff) { content = content.slice(1); } // Detect delimiter const delimiter = detectDelimiter(content); // Parse CSV const records = parse(content, { columns: true, skip_empty_lines: true, delimiter, quote: '"', trim: true, }) as Record[]; // Normalize records into PostAnalytics, skipping invalid records const posts: PostAnalytics[] = records .map((record, index) => { const title = findColumn(record, ["content", "title", "post"]); const dateStr = findColumn(record, ["date", "published", "posted"]); const date = normalizeDate(dateStr); // Skip records with empty titles if (!title || title.trim() === "") { console.warn(`Warning: Skipping record at line ${index + 2}: empty title`); return null; } // Skip records with invalid dates if (!date) { console.warn(`Warning: Skipping record at line ${index + 2}: invalid date "${dateStr}"`); return null; } const impressions = parseMetric(findColumn(record, ["impression", "view"])); const reactions = parseMetric(findColumn(record, ["reaction", "like"])); const comments = parseMetric(findColumn(record, ["comment"])); const shares = parseMetric(findColumn(record, ["share", "repost"])); const clicks = parseMetric(findColumn(record, ["click"])); // Calculate engagement rate const totalEngagement = reactions + comments + shares + clicks; const engagementRate = impressions > 0 ? (totalEngagement / impressions) * 100 : 0; const metrics: PostMetrics = { impressions, reactions, comments, shares, clicks, engagementRate, }; return { id: generatePostId(title, date), title, publishedDate: date, metrics, importedAt: new Date().toISOString(), exportSource: filename, }; }) .filter((post): post is PostAnalytics => post !== null); // Find date range const dates = posts.map((p) => p.publishedDate).filter((d) => d); const sortedDates = dates.sort(); const dateRange = { from: sortedDates[0] || "", to: sortedDates[sortedDates.length - 1] || "", }; // Build AnalyticsBatch const batch: AnalyticsBatch = { batchId: generateBatchId(), importedAt: new Date().toISOString(), exportFilename: filename, dateRange, postCount: posts.length, posts, }; return batch; }