Neah/lib/utils/email-mime-decoder.ts
2025-04-27 11:03:34 +02:00

275 lines
8.0 KiB
TypeScript

/**
* Email MIME Decoder
*
* This module provides functions to decode MIME-encoded email content
* for proper display in a frontend application.
*/
/**
* Decode a MIME encoded string (quoted-printable or base64)
* @param {string} text - The encoded text
* @param {string} encoding - The encoding type ('quoted-printable', 'base64', etc)
* @param {string} charset - The character set (utf-8, iso-8859-1, etc)
* @returns {string} - The decoded text
*/
export function decodeMIME(text: string, encoding?: string, charset = 'utf-8'): string {
if (!text) return '';
// Normalize encoding to lowercase
encoding = (encoding || '').toLowerCase();
charset = (charset || 'utf-8').toLowerCase();
try {
// Handle different encoding types
if (encoding === 'quoted-printable') {
return decodeQuotedPrintable(text, charset);
} else if (encoding === 'base64') {
return decodeBase64(text, charset);
} else {
// Plain text or other encoding
return text;
}
} catch (error) {
console.error('Error decoding MIME:', error);
return text; // Return original text if decoding fails
}
}
/**
* Decode a quoted-printable encoded string
* @param {string} text - The quoted-printable encoded text
* @param {string} charset - The character set
* @returns {string} - The decoded text
*/
export function decodeQuotedPrintable(text: string, charset: string): string {
// Replace soft line breaks (=\r\n or =\n)
let decoded = text.replace(/=(?:\r\n|\n)/g, '');
// Replace quoted-printable encoded characters
decoded = decoded.replace(/=([0-9A-F]{2})/gi, (match, p1) => {
return String.fromCharCode(parseInt(p1, 16));
});
// Handle character encoding
if (charset !== 'utf-8' && typeof TextDecoder !== 'undefined') {
try {
const bytes = new Uint8Array(decoded.length);
for (let i = 0; i < decoded.length; i++) {
bytes[i] = decoded.charCodeAt(i);
}
return new TextDecoder(charset).decode(bytes);
} catch (e) {
console.warn('TextDecoder error:', e);
}
}
return decoded;
}
/**
* Decode a base64 encoded string
* @param {string} text - The base64 encoded text
* @param {string} charset - The character set
* @returns {string} - The decoded text
*/
export function decodeBase64(text: string, charset: string): string {
// Remove whitespace that might be present in the base64 string
const cleanText = text.replace(/\s/g, '');
try {
// Use built-in atob function and TextDecoder for charset handling
const binary = atob(cleanText);
if (charset !== 'utf-8' && typeof TextDecoder !== 'undefined') {
// If TextDecoder is available and the charset is not utf-8
const bytes = new Uint8Array(binary.length);
for (let i = 0; i < binary.length; i++) {
bytes[i] = binary.charCodeAt(i);
}
return new TextDecoder(charset).decode(bytes);
}
return binary;
} catch (e) {
console.error('Base64 decoding error:', e);
return text;
}
}
/**
* Parse email headers to extract content type, encoding and charset
* @param {string} headers - The raw email headers
* @returns {Object} - Object containing content type, encoding and charset
*/
export function parseEmailHeaders(headers: string): {
contentType: string;
encoding: string;
charset: string;
} {
const result = {
contentType: 'text/plain',
encoding: 'quoted-printable',
charset: 'utf-8'
};
// Extract content type
const contentTypeMatch = headers.match(/Content-Type:\s*([^;]+)(?:;\s*charset=([^;]+))?/i);
if (contentTypeMatch) {
result.contentType = contentTypeMatch[1].trim().toLowerCase();
if (contentTypeMatch[2]) {
result.charset = contentTypeMatch[2].trim().replace(/"/g, '').toLowerCase();
}
}
// Extract content transfer encoding
const encodingMatch = headers.match(/Content-Transfer-Encoding:\s*([^\s]+)/i);
if (encodingMatch) {
result.encoding = encodingMatch[1].trim().toLowerCase();
}
return result;
}
/**
* Decode an email body based on its headers
* @param {string} emailRaw - The raw email content (headers + body)
* @returns {Object} - Object containing decoded text and html parts
*/
export function decodeEmail(emailRaw: string): {
contentType: string;
charset: string;
encoding: string;
decodedBody: string;
headers: string;
} {
// Separate headers and body
const parts = emailRaw.split(/\r?\n\r?\n/);
const headers = parts[0];
const body = parts.slice(1).join('\n\n');
// Parse headers
const { contentType, encoding, charset } = parseEmailHeaders(headers);
// Decode the body
const decodedBody = decodeMIME(body, encoding, charset);
return {
contentType,
charset,
encoding,
decodedBody,
headers
};
}
interface EmailContent {
text: string;
html: string;
attachments: Array<{
contentType: string;
content: string;
filename?: string;
}>;
}
/**
* Process a multipart email to extract text and HTML parts
* @param {string} emailRaw - The raw email content
* @param {string} boundary - The multipart boundary
* @returns {Object} - Object containing text and html parts
*/
export function processMultipartEmail(emailRaw: string, boundary: string): EmailContent {
const result: EmailContent = {
text: '',
html: '',
attachments: []
};
// Split by boundary
const boundaryRegex = new RegExp(`--${boundary}\\r?\\n|--${boundary}--\\r?\\n?`, 'g');
const parts = emailRaw.split(boundaryRegex).filter(part => part.trim());
// Process each part
parts.forEach(part => {
const decoded = decodeEmail(part);
if (decoded.contentType === 'text/plain') {
result.text = decoded.decodedBody;
} else if (decoded.contentType === 'text/html') {
result.html = decoded.decodedBody;
} else if (decoded.contentType.startsWith('image/') ||
decoded.contentType.startsWith('application/')) {
// Extract filename if available
const filenameMatch = decoded.headers.match(/filename=["']?([^"';\r\n]+)/i);
const filename = filenameMatch ? filenameMatch[1] : 'attachment';
// Handle attachments
result.attachments.push({
contentType: decoded.contentType,
content: decoded.decodedBody,
filename
});
}
});
return result;
}
/**
* Extract boundary from Content-Type header
* @param {string} contentType - The Content-Type header value
* @returns {string|null} - The boundary string or null if not found
*/
export function extractBoundary(contentType: string): string | null {
const boundaryMatch = contentType.match(/boundary=["']?([^"';]+)/i);
return boundaryMatch ? boundaryMatch[1] : null;
}
/**
* Parse an email from its raw content
* @param {string} rawEmail - The raw email content
* @returns {Object} - The parsed email with text and html parts
*/
export function parseRawEmail(rawEmail: string): EmailContent {
// Default result structure
const result: EmailContent = {
text: '',
html: '',
attachments: []
};
try {
// Split headers and body
const headerBodySplit = rawEmail.split(/\r?\n\r?\n/);
const headers = headerBodySplit[0];
const body = headerBodySplit.slice(1).join('\n\n');
// Check if multipart
const contentTypeHeader = headers.match(/Content-Type:\s*([^\r\n]+)/i);
if (contentTypeHeader && contentTypeHeader[1].includes('multipart/')) {
// Get boundary
const boundary = extractBoundary(contentTypeHeader[1]);
if (boundary) {
// Process multipart email
return processMultipartEmail(rawEmail, boundary);
}
}
// Not multipart, decode as a single part
const { contentType, encoding, charset, decodedBody } = decodeEmail(rawEmail);
// Set content based on type
if (contentType.includes('text/html')) {
result.html = decodedBody;
} else {
result.text = decodedBody;
}
return result;
} catch (error) {
console.error('Error parsing raw email:', error);
// Return raw content as text on error
result.text = rawEmail;
return result;
}
}