Neah/lib/email-parser.ts
2025-04-17 11:39:15 +02:00

93 lines
2.5 KiB
TypeScript

interface EmailHeaders {
from: string;
subject: string;
date: string;
to?: string;
}
export function parseEmailHeaders(headerContent: string): EmailHeaders {
const headers: { [key: string]: string } = {};
let currentHeader = '';
let currentValue = '';
// Split the header content into lines
const lines = headerContent.split(/\r?\n/);
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// If line starts with whitespace, it's a continuation of the previous header
if (/^\s+/.test(line)) {
currentValue += ' ' + line.trim();
continue;
}
// If we have a current header being processed, save it
if (currentHeader && currentValue) {
headers[currentHeader.toLowerCase()] = currentValue.trim();
}
// Start processing new header
const match = line.match(/^([^:]+):\s*(.*)$/);
if (match) {
currentHeader = match[1];
currentValue = match[2];
}
}
// Save the last header
if (currentHeader && currentValue) {
headers[currentHeader.toLowerCase()] = currentValue.trim();
}
return {
from: headers['from'] || '',
subject: headers['subject'] || '',
date: headers['date'] || new Date().toISOString(),
to: headers['to']
};
}
export function decodeEmailBody(content: string, contentType: string): string {
try {
// Remove email client-specific markers
content = content.replace(/\r\n/g, '\n')
.replace(/=\n/g, '')
.replace(/=3D/g, '=')
.replace(/=09/g, '\t');
// If it's HTML content
if (contentType.includes('text/html')) {
return extractTextFromHtml(content);
}
return content;
} catch (error) {
console.error('Error decoding email body:', error);
return content;
}
}
function extractTextFromHtml(html: string): string {
// Remove scripts and style tags
html = html.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '');
// Convert <br> and <p> to newlines
html = html.replace(/<br[^>]*>/gi, '\n')
.replace(/<p[^>]*>/gi, '\n')
.replace(/<\/p>/gi, '\n');
// Remove all other HTML tags
html = html.replace(/<[^>]+>/g, '');
// Decode HTML entities
html = html.replace(/&nbsp;/g, ' ')
.replace(/&amp;/g, '&')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&quot;/g, '"');
// Clean up whitespace
return html.replace(/\n\s*\n/g, '\n\n').trim();
}