diff --git a/app/courrier/page.tsx b/app/courrier/page.tsx index 55bc47cb..146dca20 100644 --- a/app/courrier/page.tsx +++ b/app/courrier/page.tsx @@ -28,6 +28,16 @@ import { } from 'lucide-react'; import { ScrollArea } from '@/components/ui/scroll-area'; import { useSession } from 'next-auth/react'; +import { + decodeQuotedPrintable, + decodeBase64, + convertCharset, + cleanHtml, + parseEmailHeaders, + extractBoundary, + extractFilename, + extractHeader +} from '@/lib/infomaniak-mime-decoder'; interface Account { id: number; @@ -62,14 +72,8 @@ interface Attachment { } interface ParsedEmailContent { - text: string | null; - html: string | null; - attachments: Array<{ - filename: string; - contentType: string; - encoding: string; - content: string; - }>; + headers: string; + body: string; } interface ParsedEmailMetadata { @@ -86,156 +90,69 @@ interface ParsedEmailMetadata { }; } -// Improved MIME Decoder Implementation for Infomaniak -function extractBoundary(headers: string): string | null { - const boundaryMatch = headers.match(/boundary="?([^"\r\n;]+)"?/i) || - headers.match(/boundary=([^\r\n;]+)/i); - - return boundaryMatch ? boundaryMatch[1].trim() : null; -} - -function decodeQuotedPrintable(text: string, charset: string): string { - if (!text) return ''; - - // Replace soft line breaks (=\r\n or =\n or =\r) - let decoded = text.replace(/=(?:\r\n|\n|\r)/g, ''); - - // Replace quoted-printable encoded characters (including non-ASCII characters) - decoded = decoded.replace(/=([0-9A-F]{2})/gi, (match, p1) => { - return String.fromCharCode(parseInt(p1, 16)); - }); - - // Handle character encoding - try { - // For browsers with TextDecoder support - if (typeof TextDecoder !== 'undefined') { - // Convert string to array of byte values - const bytes = new Uint8Array(Array.from(decoded).map(c => c.charCodeAt(0))); - return new TextDecoder(charset).decode(bytes); - } - - // Fallback for older browsers or when charset handling is not critical - return decoded; - } catch (e) { - console.warn('Charset conversion error:', e); - return decoded; - } -} - -function parseFullEmail(emailRaw: string): ParsedEmailContent { - console.log('=== parseFullEmail Debug ==='); - console.log('Input email length:', emailRaw.length); - console.log('First 200 chars:', emailRaw.substring(0, 200)); +function parseFullEmail(emailContent: string): ParsedEmailContent { + if (!emailContent) return { headers: '', body: '' }; // Split headers and body - const headerBodySplit = emailRaw.split(/\r?\n\r?\n/); - const headers = headerBodySplit[0]; - const body = headerBodySplit.slice(1).join('\n\n'); + const headerEnd = emailContent.indexOf('\r\n\r\n'); + if (headerEnd === -1) return { headers: '', body: emailContent }; - // Parse content type from headers - const contentTypeMatch = headers.match(/Content-Type:\s*([^;]+)/i); - const contentType = contentTypeMatch ? contentTypeMatch[1].trim().toLowerCase() : 'text/plain'; + const headers = emailContent.substring(0, headerEnd); + const body = emailContent.substring(headerEnd + 4); - // Initialize result - const result: ParsedEmailContent = { - text: null, - html: null, - attachments: [] - }; + // Parse headers + const headerInfo = parseEmailHeaders(headers); + const boundary = extractBoundary(headers); // Handle multipart content - if (contentType.includes('multipart')) { - const boundaryMatch = emailRaw.match(/boundary="?([^"\r\n;]+)"?/i) || - emailRaw.match(/boundary=([^\r\n;]+)/i); - - if (boundaryMatch) { - const boundary = boundaryMatch[1].trim(); - const parts = emailRaw.split(new RegExp(`--${boundary}(?:--)?(\\r?\\n|$)`)); - - for (const part of parts) { - if (!part.trim()) continue; - - const partHeaderBodySplit = part.split(/\r?\n\r?\n/); - const partHeaders = partHeaderBodySplit[0]; - const partBody = partHeaderBodySplit.slice(1).join('\n\n'); - - const partContentTypeMatch = partHeaders.match(/Content-Type:\s*([^;]+)/i); - const partContentType = partContentTypeMatch ? partContentTypeMatch[1].trim().toLowerCase() : 'text/plain'; - - if (partContentType.includes('text/plain')) { - result.text = decodeEmailBody(partBody, partContentType); - } else if (partContentType.includes('text/html')) { - result.html = decodeEmailBody(partBody, partContentType); - } else if (partContentType.startsWith('image/') || partContentType.startsWith('application/')) { - const filenameMatch = partHeaders.match(/filename="?([^"\r\n;]+)"?/i); - const filename = filenameMatch ? filenameMatch[1] : 'attachment'; - - result.attachments.push({ - filename, - contentType: partContentType, - encoding: 'base64', - content: partBody - }); + if (boundary && headerInfo.contentType.startsWith('multipart/')) { + const parts = body.split(`--${boundary}`); + const processedParts = parts + .filter(part => part.trim() && !part.includes('--')) + .map(part => { + const partHeaderEnd = part.indexOf('\r\n\r\n'); + if (partHeaderEnd === -1) return part; + + const partHeaders = part.substring(0, partHeaderEnd); + const partBody = part.substring(partHeaderEnd + 4); + const partInfo = parseEmailHeaders(partHeaders); + + let decodedContent = partBody; + if (partInfo.encoding === 'quoted-printable') { + decodedContent = decodeQuotedPrintable(partBody, partInfo.charset); + } else if (partInfo.encoding === 'base64') { + decodedContent = decodeBase64(partBody, partInfo.charset); } - } - } - } else { - // Single part content - if (contentType.includes('text/html')) { - result.html = decodeEmailBody(body, contentType); - } else { - result.text = decodeEmailBody(body, contentType); - } + + if (partInfo.contentType.includes('text/html')) { + decodedContent = cleanHtml(decodedContent); + } + + return decodedContent; + }); + + return { + headers, + body: processedParts.join('\n\n') + }; } - // If no content was found, try to extract content directly - if (!result.text && !result.html) { - // Try to extract HTML content - const htmlMatch = emailRaw.match(/]*>[\s\S]*?<\/html>/i); - if (htmlMatch) { - result.html = decodeEmailBody(htmlMatch[0], 'text/html'); - } else { - // Try to extract plain text - const textContent = emailRaw - .replace(/<[^>]+>/g, '') - .replace(/ /g, ' ') - .replace(/&/g, '&') - .replace(/</g, '<') - .replace(/>/g, '>') - .replace(/"/g, '"') - .replace(/\r\n/g, '\n') - .replace(/=\n/g, '') - .replace(/=3D/g, '=') - .replace(/=09/g, '\t') - .trim(); - - if (textContent) { - result.text = textContent; - } - } + // Handle single part content + let decodedBody = body; + if (headerInfo.encoding === 'quoted-printable') { + decodedBody = decodeQuotedPrintable(body, headerInfo.charset); + } else if (headerInfo.encoding === 'base64') { + decodedBody = decodeBase64(body, headerInfo.charset); } - return result; -} - -function decodeEmailBody(content: string, contentType: string): string { - try { - // Remove email client-specific markers - content = content.replace(/\r\n/g, '\n') - .replace(/=\n/g, '') - .replace(/=3D/g, '=') - .replace(/=09/g, '\t'); - - // If it's HTML content - if (contentType.includes('text/html')) { - return extractTextFromHtml(content); - } - - return content; - } catch (error) { - console.error('Error decoding email body:', error); - return content; + if (headerInfo.contentType.includes('text/html')) { + decodedBody = cleanHtml(decodedBody); } + + return { + headers, + body: decodedBody + }; } function extractTextFromHtml(html: string): string { @@ -262,44 +179,6 @@ function extractTextFromHtml(html: string): string { return html.replace(/\n\s*\n/g, '\n\n').trim(); } -function extractHeader(headers: string, headerName: string): string { - const regex = new RegExp(`^${headerName}:\\s*(.+?)(?:\\r?\\n(?!\\s)|$)`, 'im'); - const match = headers.match(regex); - return match ? match[1].trim() : ''; -} - -function extractFilename(headers: string): string { - const filenameMatch = headers.match(/filename="?([^"\r\n;]+)"?/i); - return filenameMatch ? filenameMatch[1].trim() : 'attachment'; -} - -function parseEmailHeaders(headers: string): { contentType: string; encoding: string; charset: string } { - const result = { - contentType: 'text/plain', - encoding: '7bit', - charset: 'utf-8' - }; - - // Extract content type and charset - const contentTypeMatch = headers.match(/Content-Type:\s*([^;]+)(?:;\s*charset=([^;"\r\n]+)|(?:;\s*charset="([^"]+)"))?/i); - if (contentTypeMatch) { - result.contentType = contentTypeMatch[1].trim().toLowerCase(); - if (contentTypeMatch[2]) { - result.charset = contentTypeMatch[2].trim().toLowerCase(); - } else if (contentTypeMatch[3]) { - result.charset = contentTypeMatch[3].trim().toLowerCase(); - } - } - - // Extract content transfer encoding - const encodingMatch = headers.match(/Content-Transfer-Encoding:\s*([^\s;\r\n]+)/i); - if (encodingMatch) { - result.encoding = encodingMatch[1].trim().toLowerCase(); - } - - return result; -} - function decodeMIME(text: string, encoding?: string, charset: string = 'utf-8'): string { if (!text) return ''; @@ -326,114 +205,11 @@ function decodeMIME(text: string, encoding?: string, charset: string = 'utf-8'): } } -function decodeBase64(text: string, charset: string): string { - const cleanText = text.replace(/\s/g, ''); - - let binaryString; - try { - binaryString = atob(cleanText); - } catch (e) { - console.error('Base64 decoding error:', e); - return text; - } - - return convertCharset(binaryString, charset); -} - -function convertCharset(text: string, fromCharset: string): string { - try { - if (typeof TextDecoder !== 'undefined') { - const bytes = new Uint8Array(text.length); - for (let i = 0; i < text.length; i++) { - bytes[i] = text.charCodeAt(i) & 0xFF; - } - - let normalizedCharset = fromCharset.toLowerCase(); - - // Normalize charset names - if (normalizedCharset === 'iso-8859-1' || normalizedCharset === 'latin1') { - normalizedCharset = 'iso-8859-1'; - } else if (normalizedCharset === 'windows-1252' || normalizedCharset === 'cp1252') { - normalizedCharset = 'windows-1252'; - } - - const decoder = new TextDecoder(normalizedCharset); - return decoder.decode(bytes); - } - - // Fallback for older browsers or unsupported charsets - if (fromCharset.toLowerCase() === 'iso-8859-1' || fromCharset.toLowerCase() === 'windows-1252') { - return text - .replace(/\xC3\xA0/g, 'à') - .replace(/\xC3\xA2/g, 'â') - .replace(/\xC3\xA9/g, 'é') - .replace(/\xC3\xA8/g, 'è') - .replace(/\xC3\xAA/g, 'ê') - .replace(/\xC3\xAB/g, 'ë') - .replace(/\xC3\xB4/g, 'ô') - .replace(/\xC3\xB9/g, 'ù') - .replace(/\xC3\xBB/g, 'û') - .replace(/\xC3\x80/g, 'À') - .replace(/\xC3\x89/g, 'É') - .replace(/\xC3\x87/g, 'Ç') - // Clean up HTML entities - .replace(/ç/g, 'ç') - .replace(/é/g, 'é') - .replace(/è/g, 'ë') - .replace(/ê/g, 'ª') - .replace(/ë/g, '«') - .replace(/û/g, '»') - .replace(/ /g, ' ') - .replace(/\xA0/g, ' '); - } - - return text; - } catch (e) { - console.error('Character set conversion error:', e, 'charset:', fromCharset); - return text; - } -} - function extractHtmlBody(htmlContent: string): string { const bodyMatch = htmlContent.match(/
]*>([\s\S]*?)<\/body>/i); return bodyMatch ? bodyMatch[1] : htmlContent; } -function cleanHtml(html: string): string { - if (!html) return ''; - - return html - // Fix common Infomaniak-specific character encodings - .replace(/=C2=A0/g, ' ') // non-breaking space - .replace(/=E2=80=93/g, '\u2013') // en dash - .replace(/=E2=80=94/g, '\u2014') // em dash - .replace(/=E2=80=98/g, '\u2018') // left single quote - .replace(/=E2=80=99/g, '\u2019') // right single quote - .replace(/=E2=80=9C/g, '\u201C') // left double quote - .replace(/=E2=80=9D/g, '\u201D') // right double quote - .replace(/=C3=A0/g, 'à') - .replace(/=C3=A2/g, 'â') - .replace(/=C3=A9/g, 'é') - .replace(/=C3=A8/g, 'è') - .replace(/=C3=AA/g, 'ê') - .replace(/=C3=AB/g, 'ë') - .replace(/=C3=B4/g, 'ô') - .replace(/=C3=B9/g, 'ù') - .replace(/=C3=xBB/g, 'û') - .replace(/=C3=80/g, 'À') - .replace(/=C3=89/g, 'É') - .replace(/=C3=87/g, 'Ç') - // Clean up HTML entities - .replace(/ç/g, 'ç') - .replace(/é/g, 'é') - .replace(/è/g, 'ë') - .replace(/ê/g, 'ª') - .replace(/ë/g, '«') - .replace(/û/g, '»') - .replace(/ /g, ' ') - .replace(/\xA0/g, ' '); -} - function decodeMimeContent(content: string): string { if (!content) return ''; @@ -479,22 +255,22 @@ function renderEmailContent(email: Email) { // First try to parse the full email const parsed = parseFullEmail(email.body); console.log('Parsed content:', { - hasText: !!parsed.text, - hasHtml: !!parsed.html, - hasAttachments: parsed.attachments.length > 0 + hasText: !!parsed.body, + hasHtml: !!parsed.headers, + hasAttachments: parsed.headers.length > 0 }); // Determine content and type let content = ''; let isHtml = false; - if (parsed.html) { + if (parsed.headers) { // Use our existing MIME decoding for HTML content - content = decodeMIME(parsed.html, 'quoted-printable', 'utf-8'); + content = decodeMIME(parsed.headers, 'quoted-printable', 'utf-8'); isHtml = true; - } else if (parsed.text) { + } else if (parsed.body) { // Use our existing MIME decoding for plain text content - content = decodeMIME(parsed.text, 'quoted-printable', 'utf-8'); + content = decodeMIME(parsed.body, 'quoted-printable', 'utf-8'); isHtml = false; } else { // Try to extract content directly from body using our existing functions @@ -515,11 +291,11 @@ function renderEmailContent(email: Email) { } // Handle attachments - const attachmentElements = parsed.attachments.map((attachment, index) => ( + const attachmentElements = parsed.headers.split('\n').filter(header => header.startsWith('Content-Type:')).map((header, index) => (