diff --git a/app/mail/page.tsx b/app/mail/page.tsx index c7bcdd6..917b098 100644 --- a/app/mail/page.tsx +++ b/app/mail/page.tsx @@ -41,20 +41,170 @@ interface Email { category: string; } -// MIME Decoder Implementation +// Improved MIME Decoder Implementation for Infomaniak +function decodeInfomaniakEmail(rawEmail: string) { + // Check if the email is multipart + const boundaryMatch = rawEmail.match(/boundary="?([^"\r\n;]+)"?/i); + + if (boundaryMatch) { + // Handle multipart email + return processMultipartEmail(rawEmail, boundaryMatch[1]); + } else { + // Handle simple email + return processSinglePartEmail(rawEmail); + } +} + +function processSinglePartEmail(rawEmail: string) { + // Split headers and body + const headerBodySplit = rawEmail.split(/\r?\n\r?\n/); + const headers = headerBodySplit[0]; + const body = headerBodySplit.slice(1).join('\n\n'); + + // Parse headers to get content type, encoding, etc. + const emailInfo = parseEmailHeaders(headers); + + // Decode the body based on its encoding + const decodedBody = decodeMIME(body, emailInfo.encoding, emailInfo.charset); + + return { + subject: extractHeader(headers, 'Subject'), + from: extractHeader(headers, 'From'), + to: extractHeader(headers, 'To'), + date: extractHeader(headers, 'Date'), + contentType: emailInfo.contentType, + text: emailInfo.contentType.includes('html') ? null : decodedBody, + html: emailInfo.contentType.includes('html') ? decodedBody : null, + raw: { + headers, + body + } + }; +} + +function processMultipartEmail(rawEmail: string, boundary: string): { + text: string; + html: string; + attachments: { filename: string; contentType: string; encoding: string; content: string; }[]; + subject?: string; + from?: string; + to?: string; + date?: string; +} { + // Split headers and body + const headerBodySplit = rawEmail.split(/\r?\n\r?\n/); + const headers = headerBodySplit[0]; + const fullBody = headerBodySplit.slice(1).join('\n\n'); + + // Create the result object + const result = { + subject: extractHeader(headers, 'Subject'), + from: extractHeader(headers, 'From'), + to: extractHeader(headers, 'To'), + date: extractHeader(headers, 'Date'), + text: '', + html: '', + attachments: [] as { filename: string; contentType: string; encoding: string; content: string; }[] + }; + + // Split the body by boundary + const boundaryRegex = new RegExp(`--${boundary}\\r?\\n|--${boundary}--\\r?\\n?`, 'g'); + const parts = fullBody.split(boundaryRegex).filter(part => part.trim()); + + // Process each part + parts.forEach(part => { + if (!part.trim()) return; + + // Split headers and content for this part + const partHeadersEnd = part.match(/\r?\n\r?\n/); + if (!partHeadersEnd) return; + + const partHeadersEndPos = partHeadersEnd.index!; + const partHeaders = part.substring(0, partHeadersEndPos); + const partContent = part.substring(partHeadersEndPos + partHeadersEnd[0].length); + + // Get content info for this part + const partInfo = parseEmailHeaders(partHeaders); + + // Handle different content types + if (partInfo.contentType.includes('text/plain')) { + result.text = decodeMIME(partContent, partInfo.encoding, partInfo.charset); + } else if (partInfo.contentType.includes('text/html')) { + result.html = cleanHtml(decodeMIME(partContent, partInfo.encoding, partInfo.charset)); + } else if ( + partInfo.contentType.startsWith('image/') || + partInfo.contentType.startsWith('application/') + ) { + const filename = extractFilename(partHeaders); + result.attachments.push({ + filename, + contentType: partInfo.contentType, + encoding: partInfo.encoding, + content: partContent + }); + } + }); + + return result; +} + +function extractHeader(headers: string, headerName: string): string { + const regex = new RegExp(`^${headerName}:\\s*(.+?)(?:\\r?\\n(?!\\s)|$)`, 'im'); + const match = headers.match(regex); + return match ? match[1].trim() : ''; +} + +function extractFilename(headers: string): string { + const filenameMatch = headers.match(/filename="?([^"\r\n;]+)"?/i); + return filenameMatch ? filenameMatch[1].trim() : 'attachment'; +} + +function parseEmailHeaders(headers: string): { contentType: string; encoding: string; charset: string } { + const result = { + contentType: 'text/plain', + encoding: '7bit', + charset: 'utf-8' + }; + + // Extract content type and charset + const contentTypeMatch = headers.match(/Content-Type:\s*([^;]+)(?:;\s*charset=([^;"\r\n]+)|(?:;\s*charset="([^"]+)"))?/i); + if (contentTypeMatch) { + result.contentType = contentTypeMatch[1].trim().toLowerCase(); + if (contentTypeMatch[2]) { + result.charset = contentTypeMatch[2].trim().toLowerCase(); + } else if (contentTypeMatch[3]) { + result.charset = contentTypeMatch[3].trim().toLowerCase(); + } + } + + // Extract content transfer encoding + const encodingMatch = headers.match(/Content-Transfer-Encoding:\s*([^\s;\r\n]+)/i); + if (encodingMatch) { + result.encoding = encodingMatch[1].trim().toLowerCase(); + } + + return result; +} + function decodeMIME(text: string, encoding?: string, charset: string = 'utf-8'): string { if (!text) return ''; + // Normalize encoding and charset encoding = (encoding || '').toLowerCase(); charset = (charset || 'utf-8').toLowerCase(); try { + // Handle different encoding types if (encoding === 'quoted-printable') { return decodeQuotedPrintable(text, charset); } else if (encoding === 'base64') { return decodeBase64(text, charset); + } else if (encoding === '7bit' || encoding === '8bit' || encoding === 'binary') { + // For these encodings, we still need to handle the character set + return convertCharset(text, charset); } else { - return text; + // Unknown encoding, return as is but still handle charset + return convertCharset(text, charset); } } catch (error) { console.error('Error decoding MIME:', error); @@ -63,133 +213,128 @@ function decodeMIME(text: string, encoding?: string, charset: string = 'utf-8'): } function decodeQuotedPrintable(text: string, charset: string): string { + // Replace soft line breaks let decoded = text.replace(/=(?:\r\n|\n)/g, ''); + // Replace quoted-printable hex sequences decoded = decoded.replace(/=([0-9A-F]{2})/gi, (match, p1) => { return String.fromCharCode(parseInt(p1, 16)); }); - if (charset !== 'utf-8' && typeof window !== 'undefined' && typeof TextDecoder !== 'undefined') { - try { - const bytes = new Uint8Array(decoded.length); - for (let i = 0; i < decoded.length; i++) { - bytes[i] = decoded.charCodeAt(i); - } - return new TextDecoder(charset).decode(bytes); - } catch (e) { - console.error('Charset decoding error:', e); - return decoded; - } - } + // Handle Infomaniak specific issues with special characters + decoded = decoded.replace(/\xA0/g, ' '); - return decoded; + // Handle character set conversion + return convertCharset(decoded, charset); } function decodeBase64(text: string, charset: string): string { const cleanText = text.replace(/\s/g, ''); + let binaryString; try { - const binary = atob(cleanText); - if (charset !== 'utf-8' && typeof TextDecoder !== 'undefined') { - const bytes = new Uint8Array(binary.length); - for (let i = 0; i < binary.length; i++) { - bytes[i] = binary.charCodeAt(i); - } - return new TextDecoder(charset).decode(bytes); - } - return binary; + binaryString = atob(cleanText); } catch (e) { console.error('Base64 decoding error:', e); return text; } + + return convertCharset(binaryString, charset); } -function parseEmailHeaders(headers: string): { contentType: string; encoding: string; charset: string } { - const result = { - contentType: 'text/plain', - encoding: 'quoted-printable', - charset: 'utf-8' - }; - - const contentTypeMatch = headers.match(/Content-Type:\s*([^;]+)(?:;\s*charset=([^;]+))?/i); - if (contentTypeMatch) { - result.contentType = contentTypeMatch[1].trim().toLowerCase(); - if (contentTypeMatch[2]) { - result.charset = contentTypeMatch[2].trim().replace(/"/g, '').toLowerCase(); +function convertCharset(text: string, fromCharset: string): string { + try { + if (typeof TextDecoder !== 'undefined') { + const bytes = new Uint8Array(text.length); + for (let i = 0; i < text.length; i++) { + bytes[i] = text.charCodeAt(i) & 0xFF; + } + + let normalizedCharset = fromCharset.toLowerCase(); + + // Normalize charset names + if (normalizedCharset === 'iso-8859-1' || normalizedCharset === 'latin1') { + normalizedCharset = 'iso-8859-1'; + } else if (normalizedCharset === 'windows-1252' || normalizedCharset === 'cp1252') { + normalizedCharset = 'windows-1252'; + } + + const decoder = new TextDecoder(normalizedCharset); + return decoder.decode(bytes); } - } - - const encodingMatch = headers.match(/Content-Transfer-Encoding:\s*([^\s]+)/i); - if (encodingMatch) { - result.encoding = encodingMatch[1].trim().toLowerCase(); - } - - return result; -} - -function decodeEmail(emailRaw: string): { contentType: string; charset: string; encoding: string; decodedBody: string; headers: string } { - const parts = emailRaw.split(/\r?\n\r?\n/); - const headers = parts[0]; - const body = parts.slice(1).join('\n\n'); - - const { contentType, encoding, charset } = parseEmailHeaders(headers); - const decodedBody = decodeMIME(body, encoding, charset); - - return { - contentType, - charset, - encoding, - decodedBody, - headers - }; -} - -function processMultipartEmail(emailRaw: string, boundary: string): { text: string; html: string; attachments: Array<{ contentType: string; content: string }> } { - const result = { - text: '', - html: '', - attachments: [] - }; - - const boundaryRegex = new RegExp(`--${boundary}\\r?\\n|--${boundary}--\\r?\\n?`, 'g'); - const parts = emailRaw.split(boundaryRegex).filter(part => part.trim()); - - parts.forEach(part => { - const decoded = decodeEmail(part); - if (decoded.contentType === 'text/plain') { - result.text = decoded.decodedBody; - } else if (decoded.contentType === 'text/html') { - result.html = decoded.decodedBody; - } else if (decoded.contentType.startsWith('image/') || decoded.contentType.startsWith('application/')) { - result.attachments.push({ - contentType: decoded.contentType, - content: decoded.decodedBody - }); + // Fallback for older browsers or unsupported charsets + if (fromCharset.toLowerCase() === 'iso-8859-1' || fromCharset.toLowerCase() === 'windows-1252') { + return text + .replace(/\xC3\xA0/g, 'à') + .replace(/\xC3\xA2/g, 'â') + .replace(/\xC3\xA9/g, 'é') + .replace(/\xC3\xA8/g, 'è') + .replace(/\xC3\xAA/g, 'ê') + .replace(/\xC3\xAB/g, 'ë') + .replace(/\xC3\xB4/g, 'ô') + .replace(/\xC3\xB9/g, 'ù') + .replace(/\xC3\xBB/g, 'û') + .replace(/\xC3\x80/g, 'À') + .replace(/\xC3\x89/g, 'É') + .replace(/\xC3\x87/g, 'Ç') + .replace(/\xC2\xA0/g, ' '); } - }); - - return result; + + return text; + } catch (e) { + console.error('Character set conversion error:', e, 'charset:', fromCharset); + return text; + } } -// Replace the old decodeMimeContent function with a new implementation that uses the above functions +function extractHtmlBody(htmlContent: string): string { + const bodyMatch = htmlContent.match(/]*>([\s\S]*?)<\/body>/i); + return bodyMatch ? bodyMatch[1] : htmlContent; +} + +function cleanHtml(html: string): string { + if (!html) return ''; + + return html + .replace(/ç/g, 'ç') + .replace(/é/g, 'é') + .replace(/è/g, 'ë') + .replace(/ê/g, 'ª') + .replace(/ë/g, '«') + .replace(/û/g, '»') + .replace(/ /g, ' ') + .replace(/\xA0/g, ' '); +} + +// Update the decodeMimeContent function to use the new implementation function decodeMimeContent(content: string): string { if (!content) return ''; try { - // Check if the content includes headers - if (content.includes('Content-Type:') || content.includes('Content-Transfer-Encoding:')) { - // If it's a complete email with headers, use the full decoding process - const decoded = decodeEmail(content); - return decoded.decodedBody; + // Try to decode as a complete email first + const decoded = decodeInfomaniakEmail(content); + + // If we have HTML content, prefer that + if (decoded.html) { + return extractHtmlBody(decoded.html); } - // If no headers are present, try to detect the encoding and decode accordingly + // Otherwise use the text content + if (decoded.text) { + return decoded.text; + } + + // If neither HTML nor text was found, try simple decoding + if (content.includes('Content-Type:') || content.includes('Content-Transfer-Encoding:')) { + const simpleDecoded = processSinglePartEmail(content); + return simpleDecoded.text || simpleDecoded.html || content; + } + + // Try to detect encoding and decode accordingly if (content.includes('=?UTF-8?B?') || content.includes('=?utf-8?B?')) { - // Base64 encoded content return decodeMIME(content, 'base64', 'utf-8'); } else if (content.includes('=?UTF-8?Q?') || content.includes('=?utf-8?Q?') || content.includes('=20')) { - // Quoted-printable content return decodeMIME(content, 'quoted-printable', 'utf-8'); } @@ -199,7 +344,7 @@ function decodeMimeContent(content: string): string { return qpDecoded; } - // If quoted-printable didn't change anything, return the original content + // If nothing else worked, return the original content return content; } catch (error) { console.error('Error decoding email content:', error);