Neah/lib/utils/email-mime-decoder.ts
2025-04-30 21:42:42 +02:00

412 lines
12 KiB
TypeScript

/**
* Infomaniak Email MIME Decoder
*
* This module provides specialized functions to decode MIME-encoded email content
* from Infomaniak servers for proper display in a frontend application.
* It handles multipart messages, different encodings, and character set conversions.
*/
import { LegacyEmailMessage } from '@/types/email';
export interface DecodedEmail {
subject: string;
from: string;
to: string;
cc?: string;
bcc?: string;
date: string;
text?: string;
html?: string;
attachments?: Array<{
filename: string;
contentType: string;
encoding?: string;
content?: string;
}>;
headers?: Record<string, string>;
}
export interface EmailHeaderInfo {
contentType: string;
encoding: string;
charset: string;
boundary?: string;
}
/**
* Main function to decode Infomaniak Email in MIME format
*/
export function decodeInfomaniakEmail(rawEmailContent: string): DecodedEmail {
// Check if it's a multipart email
const headers = extractHeaders(rawEmailContent);
const headerInfo = parseEmailHeaders(headers);
if (headerInfo.contentType.includes('multipart')) {
return processMultipartEmail(rawEmailContent, headerInfo);
} else {
return processSinglePartEmail(rawEmailContent, headerInfo);
}
}
/**
* Process a single part email
*/
function processSinglePartEmail(rawEmail: string, headerInfo: EmailHeaderInfo): DecodedEmail {
const splitEmail = rawEmail.split('\r\n\r\n');
const headers = splitEmail[0];
const body = splitEmail.slice(1).join('\r\n\r\n');
const parsedHeaders = parseHeadersToObject(headers);
const decodedBody = decodeMimeContent(body, headerInfo.encoding);
const content = convertCharset(decodedBody, headerInfo.charset);
const result: DecodedEmail = {
subject: decodeHeaderValue(parsedHeaders['subject'] || ''),
from: decodeHeaderValue(parsedHeaders['from'] || ''),
to: decodeHeaderValue(parsedHeaders['to'] || ''),
cc: parsedHeaders['cc'] ? decodeHeaderValue(parsedHeaders['cc']) : undefined,
bcc: parsedHeaders['bcc'] ? decodeHeaderValue(parsedHeaders['bcc']) : undefined,
date: parsedHeaders['date'] || '',
headers: parsedHeaders
};
if (headerInfo.contentType.includes('text/plain')) {
result.text = content;
} else if (headerInfo.contentType.includes('text/html')) {
result.html = content;
}
return result;
}
/**
* Process a multipart email
*/
function processMultipartEmail(rawEmail: string, headerInfo: EmailHeaderInfo): DecodedEmail {
if (!headerInfo.boundary) {
throw new Error('Multipart email missing boundary');
}
const boundary = headerInfo.boundary;
const splitEmail = rawEmail.split('\r\n\r\n');
const headers = splitEmail[0];
const parsedHeaders = parseHeadersToObject(headers);
const result: DecodedEmail = {
subject: decodeHeaderValue(parsedHeaders['subject'] || ''),
from: decodeHeaderValue(parsedHeaders['from'] || ''),
to: decodeHeaderValue(parsedHeaders['to'] || ''),
cc: parsedHeaders['cc'] ? decodeHeaderValue(parsedHeaders['cc']) : undefined,
bcc: parsedHeaders['bcc'] ? decodeHeaderValue(parsedHeaders['bcc']) : undefined,
date: parsedHeaders['date'] || '',
attachments: [],
headers: parsedHeaders
};
// Split by boundary
const bodyContent = rawEmail.split('--' + boundary);
// Process each part (skip first as it's headers and last as it's boundary end)
for (let i = 1; i < bodyContent.length - 1; i++) {
const part = bodyContent[i];
const partHeaders = extractHeaders(part);
const partHeaderInfo = parseEmailHeaders(partHeaders);
// Handle sub-multipart (nested multipart)
if (partHeaderInfo.contentType.includes('multipart') && partHeaderInfo.boundary) {
const subMultipart = processMultipartEmail(part, partHeaderInfo);
if (subMultipart.html) result.html = subMultipart.html;
if (subMultipart.text) result.text = subMultipart.text;
if (subMultipart.attachments) {
result.attachments = [...(result.attachments || []), ...subMultipart.attachments];
}
continue;
}
// Get content after headers
const partContent = part.split('\r\n\r\n').slice(1).join('\r\n\r\n');
const decodedContent = decodeMimeContent(partContent, partHeaderInfo.encoding);
const content = convertCharset(decodedContent, partHeaderInfo.charset);
// Check content disposition
const contentDisposition = getHeaderValue(partHeaders, 'Content-Disposition') || '';
if (contentDisposition.includes('attachment')) {
// This is an attachment
const filename = extractFilename(contentDisposition);
if (result.attachments && filename) {
result.attachments.push({
filename,
contentType: partHeaderInfo.contentType,
encoding: partHeaderInfo.encoding,
content: decodedContent
});
}
} else {
// This is a content part
if (partHeaderInfo.contentType.includes('text/plain')) {
result.text = content;
} else if (partHeaderInfo.contentType.includes('text/html')) {
result.html = content;
}
}
}
return result;
}
/**
* Extract headers from an email or part
*/
function extractHeaders(content: string): string {
const headerEnd = content.indexOf('\r\n\r\n');
if (headerEnd === -1) return content;
return content.substring(0, headerEnd);
}
/**
* Parse email headers into an object
*/
function parseHeadersToObject(headers: string): Record<string, string> {
const result: Record<string, string> = {};
const lines = headers.split('\r\n');
let currentHeader = '';
let currentValue = '';
for (const line of lines) {
// If line starts with a space or tab, it's a continuation
if (line.startsWith(' ') || line.startsWith('\t')) {
currentValue += ' ' + line.trim();
} else {
// Save previous header if exists
if (currentHeader) {
result[currentHeader.toLowerCase()] = currentValue;
}
const colonIndex = line.indexOf(':');
if (colonIndex !== -1) {
currentHeader = line.substring(0, colonIndex).trim();
currentValue = line.substring(colonIndex + 1).trim();
}
}
}
// Save the last header
if (currentHeader) {
result[currentHeader.toLowerCase()] = currentValue;
}
return result;
}
/**
* Parse email headers to extract content type, encoding and charset
*/
function parseEmailHeaders(headers: string): EmailHeaderInfo {
const contentType = getHeaderValue(headers, 'Content-Type') || 'text/plain';
const encoding = getHeaderValue(headers, 'Content-Transfer-Encoding') || '7bit';
// Extract charset
let charset = 'utf-8';
const charsetMatch = contentType.match(/charset\s*=\s*["']?([^"';\s]+)/i);
if (charsetMatch) {
charset = charsetMatch[1];
}
// Extract boundary for multipart emails
let boundary;
const boundaryMatch = contentType.match(/boundary\s*=\s*["']?([^"';\s]+)/i);
if (boundaryMatch) {
boundary = boundaryMatch[1];
}
return { contentType, encoding, charset, boundary };
}
/**
* Get a specific header value
*/
function getHeaderValue(headers: string, name: string): string | null {
const regex = new RegExp(`${name}:\\s*([^\\r\\n]+)`, 'i');
const match = headers.match(regex);
return match ? match[1].trim() : null;
}
/**
* Extract filename from Content-Disposition header
*/
function extractFilename(contentDisposition: string): string {
const filenameMatch = contentDisposition.match(/filename\s*=\s*["']?([^"';\s]+)/i);
if (filenameMatch) return filenameMatch[1];
// For encoded filenames
const encodedFilenameMatch = contentDisposition.match(/filename\*=([^']*)'[^']*'([^;]+)/i);
if (encodedFilenameMatch) {
try {
return decodeURIComponent(encodedFilenameMatch[2].replace(/%([\dA-F]{2})/g, '%$1'));
} catch (e) {
return encodedFilenameMatch[2];
}
}
return 'attachment';
}
/**
* Decode MIME content based on encoding
*/
function decodeMimeContent(content: string, encoding: string): string {
switch (encoding.toLowerCase()) {
case 'quoted-printable':
return decodeQuotedPrintable(content);
case 'base64':
return decodeBase64(content);
case '7bit':
case '8bit':
case 'binary':
default:
return content;
}
}
/**
* Decode quoted-printable content
*/
function decodeQuotedPrintable(content: string): string {
return content
.replace(/=\r\n/g, '') // Remove soft line breaks
.replace(/=([a-fA-F0-9]{2})/g, (match, p1) => { // Replace hex codes with chars
return String.fromCharCode(parseInt(p1, 16));
});
}
/**
* Decode base64 content
*/
function decodeBase64(content: string): string {
// Remove any whitespace
const cleanContent = content.replace(/\s+/g, '');
try {
return atob(cleanContent);
} catch (e) {
console.error('Error decoding base64', e);
return content;
}
}
/**
* Convert content from specified charset to UTF-8
*/
function convertCharset(content: string, charset: string): string {
// Basic charset conversion - for more complex cases, consider TextDecoder
if (charset.toLowerCase() === 'utf-8' || charset.toLowerCase() === 'utf8') {
return content;
}
try {
// For browsers that support TextDecoder
if (typeof TextDecoder !== 'undefined') {
// Convert string to ArrayBuffer
const buffer = new Uint8Array(content.length);
for (let i = 0; i < content.length; i++) {
buffer[i] = content.charCodeAt(i);
}
const decoder = new TextDecoder(charset);
return decoder.decode(buffer);
}
} catch (e) {
console.warn('TextDecoder not supported or failed for charset:', charset);
}
// Fallback for simpler encodings
if (charset.toLowerCase() === 'iso-8859-1' || charset.toLowerCase() === 'latin1') {
return content; // Browser will handle this reasonably
}
console.warn('Unsupported charset:', charset);
return content; // Return as-is if we can't convert
}
/**
* Decode encoded header values (RFC 2047)
*/
function decodeHeaderValue(value: string): string {
// Decode headers like =?UTF-8?Q?Subject?=
return value.replace(/=\?([^?]+)\?([BQ])\?([^?]*)\?=/gi, (match, charset, encoding, text) => {
if (encoding.toUpperCase() === 'B') {
// Base64 encoding
try {
const decoded = atob(text);
return convertCharset(decoded, charset);
} catch (e) {
return text;
}
} else if (encoding.toUpperCase() === 'Q') {
// Quoted-printable
try {
const decoded = text
.replace(/_/g, ' ')
.replace(/=([\da-fA-F]{2})/g, (m: string, hex: string) =>
String.fromCharCode(parseInt(hex, 16))
);
return convertCharset(decoded, charset);
} catch (e) {
return text;
}
}
return text;
});
}
/**
* Clean HTML content for safe rendering
*/
export function cleanHtml(html: string): string {
// Basic sanitization - consider using DOMPurify in a real app
return html
.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '')
.replace(/on\w+="[^"]*"/g, '')
.replace(/on\w+='[^']*'/g, '')
.replace(/on\w+=\w+/g, '');
}
/**
* Check if email content is likely in MIME format
*/
export function isMimeFormat(content: string | undefined): boolean {
if (!content) return false;
// Check for common MIME headers
return (
content.includes('Content-Type:') &&
content.includes('MIME-Version:') &&
/\r\n\r\n/.test(content)
);
}
/**
* Adapt legacy email to use the decoded MIME content
*/
export function adaptMimeEmail(legacyEmail: LegacyEmailMessage): LegacyEmailMessage {
if (!legacyEmail.content || typeof legacyEmail.content !== 'string' || !isMimeFormat(legacyEmail.content)) {
return legacyEmail;
}
try {
const decoded = decodeInfomaniakEmail(legacyEmail.content);
return {
...legacyEmail,
html: decoded.html,
text: decoded.text || '',
subject: decoded.subject || legacyEmail.subject,
// Keep original content for reference
content: decoded.html || decoded.text || ''
};
} catch (e) {
console.error('Failed to decode MIME email:', e);
return legacyEmail;
}
}