mail page imap connection mime 2

This commit is contained in:
alma 2025-04-15 22:15:16 +02:00
parent bc1dab3b2b
commit b1662fce90

View File

@ -41,20 +41,170 @@ interface Email {
category: string;
}
// MIME Decoder Implementation
// Improved MIME Decoder Implementation for Infomaniak
function decodeInfomaniakEmail(rawEmail: string) {
// Check if the email is multipart
const boundaryMatch = rawEmail.match(/boundary="?([^"\r\n;]+)"?/i);
if (boundaryMatch) {
// Handle multipart email
return processMultipartEmail(rawEmail, boundaryMatch[1]);
} else {
// Handle simple email
return processSinglePartEmail(rawEmail);
}
}
function processSinglePartEmail(rawEmail: string) {
// Split headers and body
const headerBodySplit = rawEmail.split(/\r?\n\r?\n/);
const headers = headerBodySplit[0];
const body = headerBodySplit.slice(1).join('\n\n');
// Parse headers to get content type, encoding, etc.
const emailInfo = parseEmailHeaders(headers);
// Decode the body based on its encoding
const decodedBody = decodeMIME(body, emailInfo.encoding, emailInfo.charset);
return {
subject: extractHeader(headers, 'Subject'),
from: extractHeader(headers, 'From'),
to: extractHeader(headers, 'To'),
date: extractHeader(headers, 'Date'),
contentType: emailInfo.contentType,
text: emailInfo.contentType.includes('html') ? null : decodedBody,
html: emailInfo.contentType.includes('html') ? decodedBody : null,
raw: {
headers,
body
}
};
}
function processMultipartEmail(rawEmail: string, boundary: string): {
text: string;
html: string;
attachments: { filename: string; contentType: string; encoding: string; content: string; }[];
subject?: string;
from?: string;
to?: string;
date?: string;
} {
// Split headers and body
const headerBodySplit = rawEmail.split(/\r?\n\r?\n/);
const headers = headerBodySplit[0];
const fullBody = headerBodySplit.slice(1).join('\n\n');
// Create the result object
const result = {
subject: extractHeader(headers, 'Subject'),
from: extractHeader(headers, 'From'),
to: extractHeader(headers, 'To'),
date: extractHeader(headers, 'Date'),
text: '',
html: '',
attachments: [] as { filename: string; contentType: string; encoding: string; content: string; }[]
};
// Split the body by boundary
const boundaryRegex = new RegExp(`--${boundary}\\r?\\n|--${boundary}--\\r?\\n?`, 'g');
const parts = fullBody.split(boundaryRegex).filter(part => part.trim());
// Process each part
parts.forEach(part => {
if (!part.trim()) return;
// Split headers and content for this part
const partHeadersEnd = part.match(/\r?\n\r?\n/);
if (!partHeadersEnd) return;
const partHeadersEndPos = partHeadersEnd.index!;
const partHeaders = part.substring(0, partHeadersEndPos);
const partContent = part.substring(partHeadersEndPos + partHeadersEnd[0].length);
// Get content info for this part
const partInfo = parseEmailHeaders(partHeaders);
// Handle different content types
if (partInfo.contentType.includes('text/plain')) {
result.text = decodeMIME(partContent, partInfo.encoding, partInfo.charset);
} else if (partInfo.contentType.includes('text/html')) {
result.html = cleanHtml(decodeMIME(partContent, partInfo.encoding, partInfo.charset));
} else if (
partInfo.contentType.startsWith('image/') ||
partInfo.contentType.startsWith('application/')
) {
const filename = extractFilename(partHeaders);
result.attachments.push({
filename,
contentType: partInfo.contentType,
encoding: partInfo.encoding,
content: partContent
});
}
});
return result;
}
function extractHeader(headers: string, headerName: string): string {
const regex = new RegExp(`^${headerName}:\\s*(.+?)(?:\\r?\\n(?!\\s)|$)`, 'im');
const match = headers.match(regex);
return match ? match[1].trim() : '';
}
function extractFilename(headers: string): string {
const filenameMatch = headers.match(/filename="?([^"\r\n;]+)"?/i);
return filenameMatch ? filenameMatch[1].trim() : 'attachment';
}
function parseEmailHeaders(headers: string): { contentType: string; encoding: string; charset: string } {
const result = {
contentType: 'text/plain',
encoding: '7bit',
charset: 'utf-8'
};
// Extract content type and charset
const contentTypeMatch = headers.match(/Content-Type:\s*([^;]+)(?:;\s*charset=([^;"\r\n]+)|(?:;\s*charset="([^"]+)"))?/i);
if (contentTypeMatch) {
result.contentType = contentTypeMatch[1].trim().toLowerCase();
if (contentTypeMatch[2]) {
result.charset = contentTypeMatch[2].trim().toLowerCase();
} else if (contentTypeMatch[3]) {
result.charset = contentTypeMatch[3].trim().toLowerCase();
}
}
// Extract content transfer encoding
const encodingMatch = headers.match(/Content-Transfer-Encoding:\s*([^\s;\r\n]+)/i);
if (encodingMatch) {
result.encoding = encodingMatch[1].trim().toLowerCase();
}
return result;
}
function decodeMIME(text: string, encoding?: string, charset: string = 'utf-8'): string {
if (!text) return '';
// Normalize encoding and charset
encoding = (encoding || '').toLowerCase();
charset = (charset || 'utf-8').toLowerCase();
try {
// Handle different encoding types
if (encoding === 'quoted-printable') {
return decodeQuotedPrintable(text, charset);
} else if (encoding === 'base64') {
return decodeBase64(text, charset);
} else if (encoding === '7bit' || encoding === '8bit' || encoding === 'binary') {
// For these encodings, we still need to handle the character set
return convertCharset(text, charset);
} else {
return text;
// Unknown encoding, return as is but still handle charset
return convertCharset(text, charset);
}
} catch (error) {
console.error('Error decoding MIME:', error);
@ -63,133 +213,128 @@ function decodeMIME(text: string, encoding?: string, charset: string = 'utf-8'):
}
function decodeQuotedPrintable(text: string, charset: string): string {
// Replace soft line breaks
let decoded = text.replace(/=(?:\r\n|\n)/g, '');
// Replace quoted-printable hex sequences
decoded = decoded.replace(/=([0-9A-F]{2})/gi, (match, p1) => {
return String.fromCharCode(parseInt(p1, 16));
});
if (charset !== 'utf-8' && typeof window !== 'undefined' && typeof TextDecoder !== 'undefined') {
try {
const bytes = new Uint8Array(decoded.length);
for (let i = 0; i < decoded.length; i++) {
bytes[i] = decoded.charCodeAt(i);
}
return new TextDecoder(charset).decode(bytes);
} catch (e) {
console.error('Charset decoding error:', e);
return decoded;
}
}
// Handle Infomaniak specific issues with special characters
decoded = decoded.replace(/\xA0/g, ' ');
return decoded;
// Handle character set conversion
return convertCharset(decoded, charset);
}
function decodeBase64(text: string, charset: string): string {
const cleanText = text.replace(/\s/g, '');
let binaryString;
try {
const binary = atob(cleanText);
if (charset !== 'utf-8' && typeof TextDecoder !== 'undefined') {
const bytes = new Uint8Array(binary.length);
for (let i = 0; i < binary.length; i++) {
bytes[i] = binary.charCodeAt(i);
}
return new TextDecoder(charset).decode(bytes);
}
return binary;
binaryString = atob(cleanText);
} catch (e) {
console.error('Base64 decoding error:', e);
return text;
}
return convertCharset(binaryString, charset);
}
function parseEmailHeaders(headers: string): { contentType: string; encoding: string; charset: string } {
const result = {
contentType: 'text/plain',
encoding: 'quoted-printable',
charset: 'utf-8'
};
const contentTypeMatch = headers.match(/Content-Type:\s*([^;]+)(?:;\s*charset=([^;]+))?/i);
if (contentTypeMatch) {
result.contentType = contentTypeMatch[1].trim().toLowerCase();
if (contentTypeMatch[2]) {
result.charset = contentTypeMatch[2].trim().replace(/"/g, '').toLowerCase();
function convertCharset(text: string, fromCharset: string): string {
try {
if (typeof TextDecoder !== 'undefined') {
const bytes = new Uint8Array(text.length);
for (let i = 0; i < text.length; i++) {
bytes[i] = text.charCodeAt(i) & 0xFF;
}
let normalizedCharset = fromCharset.toLowerCase();
// Normalize charset names
if (normalizedCharset === 'iso-8859-1' || normalizedCharset === 'latin1') {
normalizedCharset = 'iso-8859-1';
} else if (normalizedCharset === 'windows-1252' || normalizedCharset === 'cp1252') {
normalizedCharset = 'windows-1252';
}
const decoder = new TextDecoder(normalizedCharset);
return decoder.decode(bytes);
}
}
const encodingMatch = headers.match(/Content-Transfer-Encoding:\s*([^\s]+)/i);
if (encodingMatch) {
result.encoding = encodingMatch[1].trim().toLowerCase();
}
return result;
}
function decodeEmail(emailRaw: string): { contentType: string; charset: string; encoding: string; decodedBody: string; headers: string } {
const parts = emailRaw.split(/\r?\n\r?\n/);
const headers = parts[0];
const body = parts.slice(1).join('\n\n');
const { contentType, encoding, charset } = parseEmailHeaders(headers);
const decodedBody = decodeMIME(body, encoding, charset);
return {
contentType,
charset,
encoding,
decodedBody,
headers
};
}
function processMultipartEmail(emailRaw: string, boundary: string): { text: string; html: string; attachments: Array<{ contentType: string; content: string }> } {
const result = {
text: '',
html: '',
attachments: []
};
const boundaryRegex = new RegExp(`--${boundary}\\r?\\n|--${boundary}--\\r?\\n?`, 'g');
const parts = emailRaw.split(boundaryRegex).filter(part => part.trim());
parts.forEach(part => {
const decoded = decodeEmail(part);
if (decoded.contentType === 'text/plain') {
result.text = decoded.decodedBody;
} else if (decoded.contentType === 'text/html') {
result.html = decoded.decodedBody;
} else if (decoded.contentType.startsWith('image/') || decoded.contentType.startsWith('application/')) {
result.attachments.push({
contentType: decoded.contentType,
content: decoded.decodedBody
});
// Fallback for older browsers or unsupported charsets
if (fromCharset.toLowerCase() === 'iso-8859-1' || fromCharset.toLowerCase() === 'windows-1252') {
return text
.replace(/\xC3\xA0/g, 'à')
.replace(/\xC3\xA2/g, 'â')
.replace(/\xC3\xA9/g, 'é')
.replace(/\xC3\xA8/g, 'è')
.replace(/\xC3\xAA/g, 'ê')
.replace(/\xC3\xAB/g, 'ë')
.replace(/\xC3\xB4/g, 'ô')
.replace(/\xC3\xB9/g, 'ù')
.replace(/\xC3\xBB/g, 'û')
.replace(/\xC3\x80/g, 'À')
.replace(/\xC3\x89/g, 'É')
.replace(/\xC3\x87/g, 'Ç')
.replace(/\xC2\xA0/g, ' ');
}
});
return result;
return text;
} catch (e) {
console.error('Character set conversion error:', e, 'charset:', fromCharset);
return text;
}
}
// Replace the old decodeMimeContent function with a new implementation that uses the above functions
function extractHtmlBody(htmlContent: string): string {
const bodyMatch = htmlContent.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
return bodyMatch ? bodyMatch[1] : htmlContent;
}
function cleanHtml(html: string): string {
if (!html) return '';
return html
.replace(/&Atilde;&sect;/g, 'ç')
.replace(/&Atilde;&copy;/g, 'é')
.replace(/&Atilde;&uml;/g, 'ë')
.replace(/&Atilde;&ordf;/g, 'ª')
.replace(/&Atilde;&laquo;/g, '«')
.replace(/&Atilde;&raquo;/g, '»')
.replace(/&nbsp;/g, ' ')
.replace(/\xA0/g, ' ');
}
// Update the decodeMimeContent function to use the new implementation
function decodeMimeContent(content: string): string {
if (!content) return '';
try {
// Check if the content includes headers
if (content.includes('Content-Type:') || content.includes('Content-Transfer-Encoding:')) {
// If it's a complete email with headers, use the full decoding process
const decoded = decodeEmail(content);
return decoded.decodedBody;
// Try to decode as a complete email first
const decoded = decodeInfomaniakEmail(content);
// If we have HTML content, prefer that
if (decoded.html) {
return extractHtmlBody(decoded.html);
}
// If no headers are present, try to detect the encoding and decode accordingly
// Otherwise use the text content
if (decoded.text) {
return decoded.text;
}
// If neither HTML nor text was found, try simple decoding
if (content.includes('Content-Type:') || content.includes('Content-Transfer-Encoding:')) {
const simpleDecoded = processSinglePartEmail(content);
return simpleDecoded.text || simpleDecoded.html || content;
}
// Try to detect encoding and decode accordingly
if (content.includes('=?UTF-8?B?') || content.includes('=?utf-8?B?')) {
// Base64 encoded content
return decodeMIME(content, 'base64', 'utf-8');
} else if (content.includes('=?UTF-8?Q?') || content.includes('=?utf-8?Q?') || content.includes('=20')) {
// Quoted-printable content
return decodeMIME(content, 'quoted-printable', 'utf-8');
}
@ -199,7 +344,7 @@ function decodeMimeContent(content: string): string {
return qpDecoded;
}
// If quoted-printable didn't change anything, return the original content
// If nothing else worked, return the original content
return content;
} catch (error) {
console.error('Error decoding email content:', error);