ktg-plugin-marketplace/plugins/linkedin-thought-leadership/scripts/analytics/src/parsers/csv-parser.ts
Kjell Tore Guttormsen 39f8b275a6 feat(linkedin-thought-leadership): v1.0.0 — initial open-source import
Build LinkedIn thought leadership with algorithmic understanding,
strategic consistency, and AI-assisted content creation. Updated for
the January 2026 360Brew algorithm change.

16 agents, 25 commands, 6 skills, 9 hooks, 24 reference docs.

Personal data sanitized: voice samples generalized to template,
high-engagement posts cleared, region-specific references replaced
with placeholders.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-07 22:09:03 +02:00

221 lines
6.1 KiB
TypeScript

import { parse } from "csv-parse/sync";
import { readFileSync } from "node:fs";
import type { PostAnalytics, AnalyticsBatch, PostMetrics } from "../models/types.js";
/**
* Detects delimiter (comma vs semicolon) by checking first line
*/
function detectDelimiter(content: string): string {
const firstLine = content.split("\n")[0];
const commaCount = (firstLine.match(/,/g) || []).length;
const semicolonCount = (firstLine.match(/;/g) || []).length;
return semicolonCount > commaCount ? ";" : ",";
}
/**
* Finds column value using fuzzy pattern matching
*/
function findColumn(record: Record<string, string>, patterns: string[]): string {
const keys = Object.keys(record);
for (const pattern of patterns) {
const key = keys.find((k) =>
k.toLowerCase().includes(pattern.toLowerCase())
);
if (key) {
return record[key];
}
}
return "";
}
/**
* Parses metric value, handling both US (4,523) and EU (4.523) thousand separators
* Clamps negative values to 0
*/
function parseMetric(value: string): number {
if (!value) return 0;
// Remove quotes and trim
const cleaned = value.replace(/"/g, "").trim();
// Check if it looks like EU format (4.523) or US format (4,523)
// EU format has dots as thousand separators, US has commas
// If there's both comma and dot, the last one is decimal separator
const lastComma = cleaned.lastIndexOf(",");
const lastDot = cleaned.lastIndexOf(".");
let normalized = cleaned;
if (lastComma > lastDot) {
// US format: remove commas (thousand separator), keep dots
normalized = cleaned.replace(/,/g, "");
} else {
// EU format: remove dots (thousand separator), replace comma with dot
normalized = cleaned.replace(/\./g, "").replace(/,/g, ".");
}
const parsed = parseFloat(normalized) || 0;
// Clamp negative values to 0
return Math.max(0, parsed);
}
/**
* Normalizes date to YYYY-MM-DD format
* Handles: DD.MM.YYYY, MM/DD/YYYY, YYYY-MM-DD
* Returns null if date is invalid
*/
function normalizeDate(dateStr: string): string | null {
if (!dateStr) return null;
const cleaned = dateStr.replace(/"/g, "").trim();
// Already in YYYY-MM-DD format
if (/^\d{4}-\d{2}-\d{2}$/.test(cleaned)) {
return cleaned;
}
// DD.MM.YYYY format
if (/^\d{2}\.\d{2}\.\d{4}$/.test(cleaned)) {
const [day, month, year] = cleaned.split(".");
return `${year}-${month}-${day}`;
}
// MM/DD/YYYY format
if (/^\d{2}\/\d{2}\/\d{4}$/.test(cleaned)) {
const [month, day, year] = cleaned.split("/");
return `${year}-${month}-${day}`;
}
// YYYY/MM/DD format
if (/^\d{4}\/\d{2}\/\d{2}$/.test(cleaned)) {
return cleaned.replace(/\//g, "-");
}
// Invalid date format
return null;
}
/**
* Simple string hash function for generating deterministic post IDs
*/
function simpleHash(str: string): string {
let hash = 0;
for (let i = 0; i < str.length; i++) {
const char = str.charCodeAt(i);
hash = (hash << 5) - hash + char;
hash = hash & hash; // Convert to 32bit integer
}
return Math.abs(hash).toString(36);
}
/**
* Generates deterministic post ID from title and date
*/
function generatePostId(title: string, date: string): string {
return simpleHash(`${title}:${date}`);
}
/**
* Generates batch ID using timestamp
*/
function generateBatchId(): string {
const now = new Date();
const timestamp = now.getTime();
return `batch-${timestamp}-${simpleHash(timestamp.toString())}`;
}
/**
* Parses LinkedIn CSV export into structured AnalyticsBatch
*/
export function parseLinkedInCSV(
filePath: string,
filename: string
): AnalyticsBatch {
// Read file
let content = readFileSync(filePath, "utf-8");
// Strip BOM if present
if (content.charCodeAt(0) === 0xfeff) {
content = content.slice(1);
}
// Detect delimiter
const delimiter = detectDelimiter(content);
// Parse CSV
const records = parse(content, {
columns: true,
skip_empty_lines: true,
delimiter,
quote: '"',
trim: true,
}) as Record<string, string>[];
// Normalize records into PostAnalytics, skipping invalid records
const posts: PostAnalytics[] = records
.map((record, index) => {
const title = findColumn(record, ["content", "title", "post"]);
const dateStr = findColumn(record, ["date", "published", "posted"]);
const date = normalizeDate(dateStr);
// Skip records with empty titles
if (!title || title.trim() === "") {
console.warn(`Warning: Skipping record at line ${index + 2}: empty title`);
return null;
}
// Skip records with invalid dates
if (!date) {
console.warn(`Warning: Skipping record at line ${index + 2}: invalid date "${dateStr}"`);
return null;
}
const impressions = parseMetric(findColumn(record, ["impression", "view"]));
const reactions = parseMetric(findColumn(record, ["reaction", "like"]));
const comments = parseMetric(findColumn(record, ["comment"]));
const shares = parseMetric(findColumn(record, ["share", "repost"]));
const clicks = parseMetric(findColumn(record, ["click"]));
// Calculate engagement rate
const totalEngagement = reactions + comments + shares + clicks;
const engagementRate = impressions > 0
? (totalEngagement / impressions) * 100
: 0;
const metrics: PostMetrics = {
impressions,
reactions,
comments,
shares,
clicks,
engagementRate,
};
return {
id: generatePostId(title, date),
title,
publishedDate: date,
metrics,
importedAt: new Date().toISOString(),
exportSource: filename,
};
})
.filter((post): post is PostAnalytics => post !== null);
// Find date range
const dates = posts.map((p) => p.publishedDate).filter((d) => d);
const sortedDates = dates.sort();
const dateRange = {
from: sortedDates[0] || "",
to: sortedDates[sortedDates.length - 1] || "",
};
// Build AnalyticsBatch
const batch: AnalyticsBatch = {
batchId: generateBatchId(),
importedAt: new Date().toISOString(),
exportFilename: filename,
dateRange,
postCount: posts.length,
posts,
};
return batch;
}