Neah/lib/server/email-parser.ts
2025-04-25 17:04:18 +02:00

71 lines
2.5 KiB
TypeScript

import { simpleParser } from 'mailparser';
function cleanHtml(html: string | null | undefined): string {
if (!html) return '';
try {
// Basic HTML cleaning without DOMPurify
return html
.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '') // Remove script tags
.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '') // Remove style tags
.replace(/<meta[^>]*>/gi, '') // Remove meta tags
.replace(/<head[^>]*>[\s\S]*?<\/head>/gi, '') // Remove head
.replace(/<title[^>]*>[\s\S]*?<\/title>/gi, '') // Remove title
.replace(/<body[^>]*>/gi, '') // Remove body opening tag
.replace(/<\/body>/gi, '') // Remove body closing tag
.replace(/<html[^>]*>/gi, '') // Remove html opening tag
.replace(/<\/html>/gi, '') // Remove html closing tag
.replace(/\s+/g, ' ') // Clean up whitespace
.trim();
} catch (error) {
console.error('Error cleaning HTML:', error);
return html || '';
}
}
function getAddressText(address: any): string | null {
if (!address) return null;
if (Array.isArray(address)) {
return address.map(addr => addr.value?.[0]?.address || '').filter(Boolean).join(', ');
}
return address.value?.[0]?.address || null;
}
export async function parseEmail(emailContent: string) {
try {
// Add debug logging for the raw content length
console.log(`Starting to parse email content (length: ${emailContent ? emailContent.length : 0})`);
const parsed = await simpleParser(emailContent);
// Add debug logging for the parsed content
console.log('Parsed email fields:', {
hasSubject: !!parsed.subject,
hasHtml: !!parsed.html,
htmlLength: parsed.html?.length || 0,
hasText: !!parsed.text,
textLength: parsed.text?.length || 0,
attachmentsCount: parsed.attachments?.length || 0,
});
// Clean the HTML content if it exists
const cleanedHtml = parsed.html ? cleanHtml(parsed.html) : null;
// Return a properly structured object with all fields explicitly specified
return {
subject: parsed.subject || null,
from: getAddressText(parsed.from),
to: getAddressText(parsed.to),
cc: getAddressText(parsed.cc),
bcc: getAddressText(parsed.bcc),
date: parsed.date || null,
html: cleanedHtml,
text: parsed.text || null,
attachments: parsed.attachments || [],
headers: Object.fromEntries(parsed.headers)
};
} catch (error) {
console.error('Error parsing email:', error);
throw error;
}
}