diff --git a/app/courrier/page.tsx b/app/courrier/page.tsx index 55bc47cb..146dca20 100644 --- a/app/courrier/page.tsx +++ b/app/courrier/page.tsx @@ -28,6 +28,16 @@ import { } from 'lucide-react'; import { ScrollArea } from '@/components/ui/scroll-area'; import { useSession } from 'next-auth/react'; +import { + decodeQuotedPrintable, + decodeBase64, + convertCharset, + cleanHtml, + parseEmailHeaders, + extractBoundary, + extractFilename, + extractHeader +} from '@/lib/infomaniak-mime-decoder'; interface Account { id: number; @@ -62,14 +72,8 @@ interface Attachment { } interface ParsedEmailContent { - text: string | null; - html: string | null; - attachments: Array<{ - filename: string; - contentType: string; - encoding: string; - content: string; - }>; + headers: string; + body: string; } interface ParsedEmailMetadata { @@ -86,156 +90,69 @@ interface ParsedEmailMetadata { }; } -// Improved MIME Decoder Implementation for Infomaniak -function extractBoundary(headers: string): string | null { - const boundaryMatch = headers.match(/boundary="?([^"\r\n;]+)"?/i) || - headers.match(/boundary=([^\r\n;]+)/i); - - return boundaryMatch ? boundaryMatch[1].trim() : null; -} - -function decodeQuotedPrintable(text: string, charset: string): string { - if (!text) return ''; - - // Replace soft line breaks (=\r\n or =\n or =\r) - let decoded = text.replace(/=(?:\r\n|\n|\r)/g, ''); - - // Replace quoted-printable encoded characters (including non-ASCII characters) - decoded = decoded.replace(/=([0-9A-F]{2})/gi, (match, p1) => { - return String.fromCharCode(parseInt(p1, 16)); - }); - - // Handle character encoding - try { - // For browsers with TextDecoder support - if (typeof TextDecoder !== 'undefined') { - // Convert string to array of byte values - const bytes = new Uint8Array(Array.from(decoded).map(c => c.charCodeAt(0))); - return new TextDecoder(charset).decode(bytes); - } - - // Fallback for older browsers or when charset handling is not critical - return decoded; - } catch (e) { - console.warn('Charset conversion error:', e); - return decoded; - } -} - -function parseFullEmail(emailRaw: string): ParsedEmailContent { - console.log('=== parseFullEmail Debug ==='); - console.log('Input email length:', emailRaw.length); - console.log('First 200 chars:', emailRaw.substring(0, 200)); +function parseFullEmail(emailContent: string): ParsedEmailContent { + if (!emailContent) return { headers: '', body: '' }; // Split headers and body - const headerBodySplit = emailRaw.split(/\r?\n\r?\n/); - const headers = headerBodySplit[0]; - const body = headerBodySplit.slice(1).join('\n\n'); + const headerEnd = emailContent.indexOf('\r\n\r\n'); + if (headerEnd === -1) return { headers: '', body: emailContent }; - // Parse content type from headers - const contentTypeMatch = headers.match(/Content-Type:\s*([^;]+)/i); - const contentType = contentTypeMatch ? contentTypeMatch[1].trim().toLowerCase() : 'text/plain'; + const headers = emailContent.substring(0, headerEnd); + const body = emailContent.substring(headerEnd + 4); - // Initialize result - const result: ParsedEmailContent = { - text: null, - html: null, - attachments: [] - }; + // Parse headers + const headerInfo = parseEmailHeaders(headers); + const boundary = extractBoundary(headers); // Handle multipart content - if (contentType.includes('multipart')) { - const boundaryMatch = emailRaw.match(/boundary="?([^"\r\n;]+)"?/i) || - emailRaw.match(/boundary=([^\r\n;]+)/i); - - if (boundaryMatch) { - const boundary = boundaryMatch[1].trim(); - const parts = emailRaw.split(new RegExp(`--${boundary}(?:--)?(\\r?\\n|$)`)); - - for (const part of parts) { - if (!part.trim()) continue; - - const partHeaderBodySplit = part.split(/\r?\n\r?\n/); - const partHeaders = partHeaderBodySplit[0]; - const partBody = partHeaderBodySplit.slice(1).join('\n\n'); - - const partContentTypeMatch = partHeaders.match(/Content-Type:\s*([^;]+)/i); - const partContentType = partContentTypeMatch ? partContentTypeMatch[1].trim().toLowerCase() : 'text/plain'; - - if (partContentType.includes('text/plain')) { - result.text = decodeEmailBody(partBody, partContentType); - } else if (partContentType.includes('text/html')) { - result.html = decodeEmailBody(partBody, partContentType); - } else if (partContentType.startsWith('image/') || partContentType.startsWith('application/')) { - const filenameMatch = partHeaders.match(/filename="?([^"\r\n;]+)"?/i); - const filename = filenameMatch ? filenameMatch[1] : 'attachment'; - - result.attachments.push({ - filename, - contentType: partContentType, - encoding: 'base64', - content: partBody - }); + if (boundary && headerInfo.contentType.startsWith('multipart/')) { + const parts = body.split(`--${boundary}`); + const processedParts = parts + .filter(part => part.trim() && !part.includes('--')) + .map(part => { + const partHeaderEnd = part.indexOf('\r\n\r\n'); + if (partHeaderEnd === -1) return part; + + const partHeaders = part.substring(0, partHeaderEnd); + const partBody = part.substring(partHeaderEnd + 4); + const partInfo = parseEmailHeaders(partHeaders); + + let decodedContent = partBody; + if (partInfo.encoding === 'quoted-printable') { + decodedContent = decodeQuotedPrintable(partBody, partInfo.charset); + } else if (partInfo.encoding === 'base64') { + decodedContent = decodeBase64(partBody, partInfo.charset); } - } - } - } else { - // Single part content - if (contentType.includes('text/html')) { - result.html = decodeEmailBody(body, contentType); - } else { - result.text = decodeEmailBody(body, contentType); - } + + if (partInfo.contentType.includes('text/html')) { + decodedContent = cleanHtml(decodedContent); + } + + return decodedContent; + }); + + return { + headers, + body: processedParts.join('\n\n') + }; } - // If no content was found, try to extract content directly - if (!result.text && !result.html) { - // Try to extract HTML content - const htmlMatch = emailRaw.match(/]*>[\s\S]*?<\/html>/i); - if (htmlMatch) { - result.html = decodeEmailBody(htmlMatch[0], 'text/html'); - } else { - // Try to extract plain text - const textContent = emailRaw - .replace(/<[^>]+>/g, '') - .replace(/ /g, ' ') - .replace(/&/g, '&') - .replace(/</g, '<') - .replace(/>/g, '>') - .replace(/"/g, '"') - .replace(/\r\n/g, '\n') - .replace(/=\n/g, '') - .replace(/=3D/g, '=') - .replace(/=09/g, '\t') - .trim(); - - if (textContent) { - result.text = textContent; - } - } + // Handle single part content + let decodedBody = body; + if (headerInfo.encoding === 'quoted-printable') { + decodedBody = decodeQuotedPrintable(body, headerInfo.charset); + } else if (headerInfo.encoding === 'base64') { + decodedBody = decodeBase64(body, headerInfo.charset); } - return result; -} - -function decodeEmailBody(content: string, contentType: string): string { - try { - // Remove email client-specific markers - content = content.replace(/\r\n/g, '\n') - .replace(/=\n/g, '') - .replace(/=3D/g, '=') - .replace(/=09/g, '\t'); - - // If it's HTML content - if (contentType.includes('text/html')) { - return extractTextFromHtml(content); - } - - return content; - } catch (error) { - console.error('Error decoding email body:', error); - return content; + if (headerInfo.contentType.includes('text/html')) { + decodedBody = cleanHtml(decodedBody); } + + return { + headers, + body: decodedBody + }; } function extractTextFromHtml(html: string): string { @@ -262,44 +179,6 @@ function extractTextFromHtml(html: string): string { return html.replace(/\n\s*\n/g, '\n\n').trim(); } -function extractHeader(headers: string, headerName: string): string { - const regex = new RegExp(`^${headerName}:\\s*(.+?)(?:\\r?\\n(?!\\s)|$)`, 'im'); - const match = headers.match(regex); - return match ? match[1].trim() : ''; -} - -function extractFilename(headers: string): string { - const filenameMatch = headers.match(/filename="?([^"\r\n;]+)"?/i); - return filenameMatch ? filenameMatch[1].trim() : 'attachment'; -} - -function parseEmailHeaders(headers: string): { contentType: string; encoding: string; charset: string } { - const result = { - contentType: 'text/plain', - encoding: '7bit', - charset: 'utf-8' - }; - - // Extract content type and charset - const contentTypeMatch = headers.match(/Content-Type:\s*([^;]+)(?:;\s*charset=([^;"\r\n]+)|(?:;\s*charset="([^"]+)"))?/i); - if (contentTypeMatch) { - result.contentType = contentTypeMatch[1].trim().toLowerCase(); - if (contentTypeMatch[2]) { - result.charset = contentTypeMatch[2].trim().toLowerCase(); - } else if (contentTypeMatch[3]) { - result.charset = contentTypeMatch[3].trim().toLowerCase(); - } - } - - // Extract content transfer encoding - const encodingMatch = headers.match(/Content-Transfer-Encoding:\s*([^\s;\r\n]+)/i); - if (encodingMatch) { - result.encoding = encodingMatch[1].trim().toLowerCase(); - } - - return result; -} - function decodeMIME(text: string, encoding?: string, charset: string = 'utf-8'): string { if (!text) return ''; @@ -326,114 +205,11 @@ function decodeMIME(text: string, encoding?: string, charset: string = 'utf-8'): } } -function decodeBase64(text: string, charset: string): string { - const cleanText = text.replace(/\s/g, ''); - - let binaryString; - try { - binaryString = atob(cleanText); - } catch (e) { - console.error('Base64 decoding error:', e); - return text; - } - - return convertCharset(binaryString, charset); -} - -function convertCharset(text: string, fromCharset: string): string { - try { - if (typeof TextDecoder !== 'undefined') { - const bytes = new Uint8Array(text.length); - for (let i = 0; i < text.length; i++) { - bytes[i] = text.charCodeAt(i) & 0xFF; - } - - let normalizedCharset = fromCharset.toLowerCase(); - - // Normalize charset names - if (normalizedCharset === 'iso-8859-1' || normalizedCharset === 'latin1') { - normalizedCharset = 'iso-8859-1'; - } else if (normalizedCharset === 'windows-1252' || normalizedCharset === 'cp1252') { - normalizedCharset = 'windows-1252'; - } - - const decoder = new TextDecoder(normalizedCharset); - return decoder.decode(bytes); - } - - // Fallback for older browsers or unsupported charsets - if (fromCharset.toLowerCase() === 'iso-8859-1' || fromCharset.toLowerCase() === 'windows-1252') { - return text - .replace(/\xC3\xA0/g, 'à') - .replace(/\xC3\xA2/g, 'â') - .replace(/\xC3\xA9/g, 'é') - .replace(/\xC3\xA8/g, 'è') - .replace(/\xC3\xAA/g, 'ê') - .replace(/\xC3\xAB/g, 'ë') - .replace(/\xC3\xB4/g, 'ô') - .replace(/\xC3\xB9/g, 'ù') - .replace(/\xC3\xBB/g, 'û') - .replace(/\xC3\x80/g, 'À') - .replace(/\xC3\x89/g, 'É') - .replace(/\xC3\x87/g, 'Ç') - // Clean up HTML entities - .replace(/ç/g, 'ç') - .replace(/é/g, 'é') - .replace(/è/g, 'ë') - .replace(/ê/g, 'ª') - .replace(/ë/g, '«') - .replace(/û/g, '»') - .replace(/ /g, ' ') - .replace(/\xA0/g, ' '); - } - - return text; - } catch (e) { - console.error('Character set conversion error:', e, 'charset:', fromCharset); - return text; - } -} - function extractHtmlBody(htmlContent: string): string { const bodyMatch = htmlContent.match(/]*>([\s\S]*?)<\/body>/i); return bodyMatch ? bodyMatch[1] : htmlContent; } -function cleanHtml(html: string): string { - if (!html) return ''; - - return html - // Fix common Infomaniak-specific character encodings - .replace(/=C2=A0/g, ' ') // non-breaking space - .replace(/=E2=80=93/g, '\u2013') // en dash - .replace(/=E2=80=94/g, '\u2014') // em dash - .replace(/=E2=80=98/g, '\u2018') // left single quote - .replace(/=E2=80=99/g, '\u2019') // right single quote - .replace(/=E2=80=9C/g, '\u201C') // left double quote - .replace(/=E2=80=9D/g, '\u201D') // right double quote - .replace(/=C3=A0/g, 'à') - .replace(/=C3=A2/g, 'â') - .replace(/=C3=A9/g, 'é') - .replace(/=C3=A8/g, 'è') - .replace(/=C3=AA/g, 'ê') - .replace(/=C3=AB/g, 'ë') - .replace(/=C3=B4/g, 'ô') - .replace(/=C3=B9/g, 'ù') - .replace(/=C3=xBB/g, 'û') - .replace(/=C3=80/g, 'À') - .replace(/=C3=89/g, 'É') - .replace(/=C3=87/g, 'Ç') - // Clean up HTML entities - .replace(/ç/g, 'ç') - .replace(/é/g, 'é') - .replace(/è/g, 'ë') - .replace(/ê/g, 'ª') - .replace(/ë/g, '«') - .replace(/û/g, '»') - .replace(/ /g, ' ') - .replace(/\xA0/g, ' '); -} - function decodeMimeContent(content: string): string { if (!content) return ''; @@ -479,22 +255,22 @@ function renderEmailContent(email: Email) { // First try to parse the full email const parsed = parseFullEmail(email.body); console.log('Parsed content:', { - hasText: !!parsed.text, - hasHtml: !!parsed.html, - hasAttachments: parsed.attachments.length > 0 + hasText: !!parsed.body, + hasHtml: !!parsed.headers, + hasAttachments: parsed.headers.length > 0 }); // Determine content and type let content = ''; let isHtml = false; - if (parsed.html) { + if (parsed.headers) { // Use our existing MIME decoding for HTML content - content = decodeMIME(parsed.html, 'quoted-printable', 'utf-8'); + content = decodeMIME(parsed.headers, 'quoted-printable', 'utf-8'); isHtml = true; - } else if (parsed.text) { + } else if (parsed.body) { // Use our existing MIME decoding for plain text content - content = decodeMIME(parsed.text, 'quoted-printable', 'utf-8'); + content = decodeMIME(parsed.body, 'quoted-printable', 'utf-8'); isHtml = false; } else { // Try to extract content directly from body using our existing functions @@ -515,11 +291,11 @@ function renderEmailContent(email: Email) { } // Handle attachments - const attachmentElements = parsed.attachments.map((attachment, index) => ( + const attachmentElements = parsed.headers.split('\n').filter(header => header.startsWith('Content-Type:')).map((header, index) => (
- {attachment.filename} + {header.split(': ')[1]}
)); @@ -1335,18 +1111,18 @@ export default function CourrierPage() { try { const parsed = parseFullEmail(email.body); console.log('Parsed content:', { - hasText: !!parsed.text, - hasHtml: !!parsed.html, - textPreview: parsed.text?.substring(0, 100) || 'No text', - htmlPreview: parsed.html?.substring(0, 100) || 'No HTML' + hasText: !!parsed.body, + hasHtml: !!parsed.headers, + textPreview: parsed.body?.substring(0, 100) || 'No text', + htmlPreview: parsed.headers?.substring(0, 100) || 'No HTML' }); let preview = ''; - if (parsed.text) { - preview = parsed.text; + if (parsed.body) { + preview = parsed.body; console.log('Using text content for preview'); - } else if (parsed.html) { - preview = parsed.html + } else if (parsed.headers) { + preview = parsed.headers .replace(/]*>[\s\S]*?<\/style>/gi, '') .replace(/]*>[\s\S]*?<\/script>/gi, '') .replace(/<[^>]+>/g, ' ') @@ -1613,64 +1389,33 @@ export default function CourrierPage() { const getReplyBody = () => { if (!selectedEmail?.body) return ''; - try { - // Parse the full email content - const parsed = parseFullEmail(selectedEmail.body); - let originalContent = ''; - - // Get the content from either HTML or text part - if (parsed.html) { - // Use MIME decoding for HTML content - originalContent = decodeMIME(parsed.html, 'quoted-printable', 'utf-8'); - - // Convert HTML to plain text for the reply - originalContent = originalContent - .replace(/]*>[\s\S]*?<\/style>/gi, '') - .replace(/]*>[\s\S]*?<\/script>/gi, '') - .replace(//gi, '\n') - .replace(/]*>/gi, '\n') - .replace(/<\/div>/gi, '') - .replace(/]*>/gi, '\n') - .replace(/<\/p>/gi, '') - .replace(/<[^>]+>/g, '') - .replace(/ |‌|»|«|>/g, match => { - switch (match) { - case ' ': return ' '; - case '‌': return ''; - case '»': return '»'; - case '«': return '«'; - case '>': return '>'; - case '<': return '<'; - case '&': return '&'; - default: return match; - } - }) - .replace(/^\s+$/gm, '') - .replace(/\n{3,}/g, '\n\n') - .trim(); - } else if (parsed.text) { - // Use MIME decoding for plain text content - originalContent = decodeMIME(parsed.text, 'quoted-printable', 'utf-8').trim(); - } else { - // Fallback to raw body if parsing fails, but still try to decode it - originalContent = decodeMIME( - selectedEmail.body.replace(/<[^>]+>/g, ''), - 'quoted-printable', - 'utf-8' - ).trim(); - } + const parsed = parseFullEmail(selectedEmail.body); + if (!parsed) return ''; - // Format the reply with proper indentation - const formattedContent = originalContent - .split('\n') - .map(line => `> ${line}`) - .join('\n'); + const body = parsed.body; - return `\n\n${formattedContent}\n\n`; - } catch (error) { - console.error('Error preparing reply body:', error); - return ''; - } + // Convert HTML to plain text if needed + const plainText = body + .replace(//gi, '\n') + .replace(/]*>/gi, '\n') + .replace(/<\/div>/gi, '') + .replace(/]*>/gi, '\n') + .replace(/<\/p>/gi, '') + .replace(/ /g, ' ') + .replace(/>/g, '>') + .replace(/</g, '<') + .replace(/&/g, '&') + .replace(/"/g, '"') + .replace(/<[^>]+>/g, '') + .replace(/^\s+$/gm, '') + .replace(/\n{3,}/g, '\n\n') + .trim(); + + // Add reply prefix to each line + return plainText + .split('\n') + .map(line => `> ${line}`) + .join('\n'); }; // Prepare the reply email diff --git a/app/mail/page.tsx b/app/mail/page.tsx index d8b47b3b..5cfd3daa 100644 --- a/app/mail/page.tsx +++ b/app/mail/page.tsx @@ -27,6 +27,16 @@ import { AlertOctagon, Archive, RefreshCw } from 'lucide-react'; import { ScrollArea } from '@/components/ui/scroll-area'; +import { + decodeQuotedPrintable, + decodeBase64, + convertCharset, + cleanHtml, + parseEmailHeaders, + extractBoundary, + extractFilename, + extractHeader +} from '@/lib/infomaniak-mime-decoder'; interface Account { id: number; @@ -60,43 +70,36 @@ interface Attachment { encoding: string; } -// Improved MIME Decoder Implementation for Infomaniak -function extractBoundary(headers: string): string | null { - const boundaryMatch = headers.match(/boundary="?([^"\r\n;]+)"?/i) || - headers.match(/boundary=([^\r\n;]+)/i); - - return boundaryMatch ? boundaryMatch[1].trim() : null; +interface EmailAttachment { + filename: string; + contentType: string; + encoding: string; + content: string; } -function decodeQuotedPrintable(text: string, charset: string): string { - if (!text) return ''; - - // Replace soft line breaks (=\r\n or =\n or =\r) - let decoded = text.replace(/=(?:\r\n|\n|\r)/g, ''); - - // Replace quoted-printable encoded characters (including non-ASCII characters) - decoded = decoded.replace(/=([0-9A-F]{2})/gi, (match, p1) => { - return String.fromCharCode(parseInt(p1, 16)); - }); - - // Handle character encoding - try { - // For browsers with TextDecoder support - if (typeof TextDecoder !== 'undefined') { - // Convert string to array of byte values - const bytes = new Uint8Array(Array.from(decoded).map(c => c.charCodeAt(0))); - return new TextDecoder(charset).decode(bytes); - } - - // Fallback for older browsers or when charset handling is not critical - return decoded; - } catch (e) { - console.warn('Charset conversion error:', e); - return decoded; - } +interface ParsedEmail { + text: string; + html: string; + attachments: EmailAttachment[]; + headers?: string; } -function parseFullEmail(emailRaw: string) { +interface EmailMessage { + subject: string; + from: string; + to: string; + date: string; + contentType: string; + text: string | null; + html: string | null; + attachments: EmailAttachment[]; + raw: { + headers: string; + body: string; + }; +} + +function parseFullEmail(emailRaw: string): ParsedEmail | EmailMessage { // Check if this is a multipart message by looking for boundary definition const boundaryMatch = emailRaw.match(/boundary="?([^"\r\n;]+)"?/i) || emailRaw.match(/boundary=([^\r\n;]+)/i); @@ -119,127 +122,72 @@ function parseFullEmail(emailRaw: string) { return processMultipartEmail(emailRaw, boundary, mainHeaders); } else { - // This is a single part message - return processSinglePartEmail(emailRaw); + // Split headers and body + const [headers, body] = emailRaw.split(/\r?\n\r?\n/, 2); + + // If no boundary is found, treat as a single part message + const emailInfo = parseEmailHeaders(headers); + return { + subject: extractHeader(headers, 'Subject'), + from: extractHeader(headers, 'From'), + to: extractHeader(headers, 'To'), + date: extractHeader(headers, 'Date'), + contentType: emailInfo.contentType, + text: emailInfo.contentType.includes('text/plain') ? body : null, + html: emailInfo.contentType.includes('text/html') ? body : null, + attachments: [], // Add empty attachments array for single part messages + raw: { + headers, + body + } + }; } } -function processMultipartEmail(emailRaw: string, boundary: string, mainHeaders: string = ''): { - text: string; - html: string; - attachments: { filename: string; contentType: string; encoding: string; content: string; }[]; - headers?: string; -} { - const result = { +function processMultipartEmail(emailRaw: string, boundary: string, mainHeaders: string): ParsedEmail { + const parts = emailRaw.split(new RegExp(`--${boundary}(?:--)?\\s*`, 'm')); + const result: ParsedEmail = { text: '', html: '', - attachments: [] as { filename: string; contentType: string; encoding: string; content: string; }[], - headers: mainHeaders + attachments: [] }; - - // Split by boundary (more robust pattern) - const boundaryRegex = new RegExp(`--${boundary}(?:--)?(\\r?\\n|$)`, 'g'); - - // Get all boundary positions - const matches = Array.from(emailRaw.matchAll(boundaryRegex)); - const boundaryPositions = matches.map(match => match.index!); - - // Extract content between boundaries - for (let i = 0; i < boundaryPositions.length - 1; i++) { - const startPos = boundaryPositions[i] + matches[i][0].length; - const endPos = boundaryPositions[i + 1]; - - if (endPos > startPos) { - const partContent = emailRaw.substring(startPos, endPos).trim(); + + for (const part of parts) { + if (!part.trim()) continue; + + const [partHeaders, ...bodyParts] = part.split(/\r?\n\r?\n/); + const partBody = bodyParts.join('\n\n'); + const partInfo = parseEmailHeaders(partHeaders); + + if (partInfo.contentType.startsWith('text/')) { + let decodedContent = ''; - if (partContent) { - const decoded = processSinglePartEmail(partContent); - - if (decoded.contentType.includes('text/plain')) { - result.text = decoded.text || ''; - } else if (decoded.contentType.includes('text/html')) { - result.html = cleanHtml(decoded.html || ''); - } else if ( - decoded.contentType.startsWith('image/') || - decoded.contentType.startsWith('application/') - ) { - const filename = extractFilename(partContent); - result.attachments.push({ - filename, - contentType: decoded.contentType, - encoding: decoded.raw?.headers ? parseEmailHeaders(decoded.raw.headers).encoding : '7bit', - content: decoded.raw?.body || '' - }); - } + if (partInfo.encoding === 'quoted-printable') { + decodedContent = decodeQuotedPrintable(partBody, partInfo.charset); + } else if (partInfo.encoding === 'base64') { + decodedContent = decodeBase64(partBody, partInfo.charset); + } else { + decodedContent = partBody; } + + if (partInfo.contentType.includes('html')) { + decodedContent = cleanHtml(decodedContent); + result.html = decodedContent; + } else { + result.text = decodedContent; + } + } else { + // Handle attachment + const filename = extractFilename(partHeaders); + result.attachments.push({ + filename, + contentType: partInfo.contentType, + encoding: partInfo.encoding, + content: partBody + }); } } - - return result; -} -function processSinglePartEmail(rawEmail: string) { - // Split headers and body - const headerBodySplit = rawEmail.split(/\r?\n\r?\n/); - const headers = headerBodySplit[0]; - const body = headerBodySplit.slice(1).join('\n\n'); - - // Parse headers to get content type, encoding, etc. - const emailInfo = parseEmailHeaders(headers); - - // Decode the body based on its encoding - const decodedBody = decodeMIME(body, emailInfo.encoding, emailInfo.charset); - - return { - subject: extractHeader(headers, 'Subject'), - from: extractHeader(headers, 'From'), - to: extractHeader(headers, 'To'), - date: extractHeader(headers, 'Date'), - contentType: emailInfo.contentType, - text: emailInfo.contentType.includes('html') ? null : decodedBody, - html: emailInfo.contentType.includes('html') ? decodedBody : null, - raw: { - headers, - body - } - }; -} - -function extractHeader(headers: string, headerName: string): string { - const regex = new RegExp(`^${headerName}:\\s*(.+?)(?:\\r?\\n(?!\\s)|$)`, 'im'); - const match = headers.match(regex); - return match ? match[1].trim() : ''; -} - -function extractFilename(headers: string): string { - const filenameMatch = headers.match(/filename="?([^"\r\n;]+)"?/i); - return filenameMatch ? filenameMatch[1].trim() : 'attachment'; -} - -function parseEmailHeaders(headers: string): { contentType: string; encoding: string; charset: string } { - const result = { - contentType: 'text/plain', - encoding: '7bit', - charset: 'utf-8' - }; - - // Extract content type and charset - const contentTypeMatch = headers.match(/Content-Type:\s*([^;]+)(?:;\s*charset=([^;"\r\n]+)|(?:;\s*charset="([^"]+)"))?/i); - if (contentTypeMatch) { - result.contentType = contentTypeMatch[1].trim().toLowerCase(); - if (contentTypeMatch[2]) { - result.charset = contentTypeMatch[2].trim().toLowerCase(); - } else if (contentTypeMatch[3]) { - result.charset = contentTypeMatch[3].trim().toLowerCase(); - } - } - - // Extract content transfer encoding - const encodingMatch = headers.match(/Content-Transfer-Encoding:\s*([^\s;\r\n]+)/i); - if (encodingMatch) { - result.encoding = encodingMatch[1].trim().toLowerCase(); - } - return result; } @@ -269,114 +217,6 @@ function decodeMIME(text: string, encoding?: string, charset: string = 'utf-8'): } } -function decodeBase64(text: string, charset: string): string { - const cleanText = text.replace(/\s/g, ''); - - let binaryString; - try { - binaryString = atob(cleanText); - } catch (e) { - console.error('Base64 decoding error:', e); - return text; - } - - return convertCharset(binaryString, charset); -} - -function convertCharset(text: string, fromCharset: string): string { - try { - if (typeof TextDecoder !== 'undefined') { - const bytes = new Uint8Array(text.length); - for (let i = 0; i < text.length; i++) { - bytes[i] = text.charCodeAt(i) & 0xFF; - } - - let normalizedCharset = fromCharset.toLowerCase(); - - // Normalize charset names - if (normalizedCharset === 'iso-8859-1' || normalizedCharset === 'latin1') { - normalizedCharset = 'iso-8859-1'; - } else if (normalizedCharset === 'windows-1252' || normalizedCharset === 'cp1252') { - normalizedCharset = 'windows-1252'; - } - - const decoder = new TextDecoder(normalizedCharset); - return decoder.decode(bytes); - } - - // Fallback for older browsers or unsupported charsets - if (fromCharset.toLowerCase() === 'iso-8859-1' || fromCharset.toLowerCase() === 'windows-1252') { - return text - .replace(/\xC3\xA0/g, 'à') - .replace(/\xC3\xA2/g, 'â') - .replace(/\xC3\xA9/g, 'é') - .replace(/\xC3\xA8/g, 'è') - .replace(/\xC3\xAA/g, 'ê') - .replace(/\xC3\xAB/g, 'ë') - .replace(/\xC3\xB4/g, 'ô') - .replace(/\xC3\xB9/g, 'ù') - .replace(/\xC3\xBB/g, 'û') - .replace(/\xC3\x80/g, 'À') - .replace(/\xC3\x89/g, 'É') - .replace(/\xC3\x87/g, 'Ç') - // Clean up HTML entities - .replace(/ç/g, 'ç') - .replace(/é/g, 'é') - .replace(/è/g, 'ë') - .replace(/ê/g, 'ª') - .replace(/ë/g, '«') - .replace(/û/g, '»') - .replace(/ /g, ' ') - .replace(/\xA0/g, ' '); - } - - return text; - } catch (e) { - console.error('Character set conversion error:', e, 'charset:', fromCharset); - return text; - } -} - -function extractHtmlBody(htmlContent: string): string { - const bodyMatch = htmlContent.match(/]*>([\s\S]*?)<\/body>/i); - return bodyMatch ? bodyMatch[1] : htmlContent; -} - -function cleanHtml(html: string): string { - if (!html) return ''; - - return html - // Fix common Infomaniak-specific character encodings - .replace(/=C2=A0/g, ' ') // non-breaking space - .replace(/=E2=80=93/g, '\u2013') // en dash - .replace(/=E2=80=94/g, '\u2014') // em dash - .replace(/=E2=80=98/g, '\u2018') // left single quote - .replace(/=E2=80=99/g, '\u2019') // right single quote - .replace(/=E2=80=9C/g, '\u201C') // left double quote - .replace(/=E2=80=9D/g, '\u201D') // right double quote - .replace(/=C3=A0/g, 'à') - .replace(/=C3=A2/g, 'â') - .replace(/=C3=A9/g, 'é') - .replace(/=C3=A8/g, 'è') - .replace(/=C3=AA/g, 'ê') - .replace(/=C3=AB/g, 'ë') - .replace(/=C3=B4/g, 'ô') - .replace(/=C3=B9/g, 'ù') - .replace(/=C3=xBB/g, 'û') - .replace(/=C3=80/g, 'À') - .replace(/=C3=89/g, 'É') - .replace(/=C3=87/g, 'Ç') - // Clean up HTML entities - .replace(/ç/g, 'ç') - .replace(/é/g, 'é') - .replace(/è/g, 'ë') - .replace(/ê/g, 'ª') - .replace(/ë/g, '«') - .replace(/û/g, '»') - .replace(/ /g, ' ') - .replace(/\xA0/g, ' '); -} - function decodeMimeContent(content: string): string { if (!content) return ''; diff --git a/lib/infomaniak-mime-decoder.ts b/lib/infomaniak-mime-decoder.ts new file mode 100644 index 00000000..6b97f47c --- /dev/null +++ b/lib/infomaniak-mime-decoder.ts @@ -0,0 +1,174 @@ +// Infomaniak-specific MIME decoder functions + +export function decodeQuotedPrintable(text: string, charset: string): string { + if (!text) return ''; + + // Replace soft line breaks (=\r\n or =\n or =\r) + let decoded = text.replace(/=(?:\r\n|\n|\r)/g, ''); + + // Replace quoted-printable encoded characters (including non-ASCII characters) + decoded = decoded.replace(/=([0-9A-F]{2})/gi, (match, p1) => { + return String.fromCharCode(parseInt(p1, 16)); + }); + + // Handle character encoding + try { + // For browsers with TextDecoder support + if (typeof TextDecoder !== 'undefined') { + // Convert string to array of byte values + const bytes = new Uint8Array(Array.from(decoded).map(c => c.charCodeAt(0))); + return new TextDecoder(charset).decode(bytes); + } + + // Fallback for older browsers or when charset handling is not critical + return decoded; + } catch (e) { + console.warn('Charset conversion error:', e); + return decoded; + } +} + +export function decodeBase64(text: string, charset: string): string { + if (!text) return ''; + + try { + // Remove any whitespace and line breaks + const cleanText = text.replace(/\s+/g, ''); + + // Decode base64 + const binary = atob(cleanText); + + // Convert to bytes + const bytes = new Uint8Array(binary.length); + for (let i = 0; i < binary.length; i++) { + bytes[i] = binary.charCodeAt(i); + } + + // Decode using specified charset + if (typeof TextDecoder !== 'undefined') { + return new TextDecoder(charset).decode(bytes); + } + + // Fallback + return binary; + } catch (e) { + console.warn('Base64 decoding error:', e); + return text; + } +} + +export function convertCharset(text: string, charset: string): string { + if (!text) return ''; + + try { + // For browsers with TextDecoder support + if (typeof TextDecoder !== 'undefined') { + // Convert string to array of byte values + const bytes = new Uint8Array(Array.from(text).map(c => c.charCodeAt(0))); + return new TextDecoder(charset).decode(bytes); + } + + // Fallback for older browsers + return text; + } catch (e) { + console.warn('Charset conversion error:', e); + return text; + } +} + +export function cleanHtml(html: string): string { + if (!html) return ''; + + // Remove or fix malformed URLs + html = html.replace(/=3D"(http[^"]+)"/g, (match, url) => { + try { + return `"${decodeURIComponent(url)}"`; + } catch { + return ''; + } + }); + + // Remove any remaining quoted-printable artifacts + html = html.replace(/=([0-9A-F]{2})/gi, (match, p1) => { + return String.fromCharCode(parseInt(p1, 16)); + }); + + // Clean up any remaining HTML issues + html = html + .replace(/]*>[\s\S]*?<\/style>/gi, '') + .replace(/]*>[\s\S]*?<\/script>/gi, '') + .replace(/]*>/gi, '') + .replace(/]*>/gi, '') + .replace(/]*>/gi, '') + .replace(/]*>[\s\S]*?<\/title>/gi, '') + .replace(/]*>[\s\S]*?<\/head>/gi, '') + .replace(/]*>/gi, '') + .replace(/<\/body>/gi, '') + .replace(/]*>/gi, '') + .replace(/<\/html>/gi, '') + .replace(//gi, '\n') + .replace(/]*>/gi, '\n') + .replace(/<\/div>/gi, '') + .replace(/]*>/gi, '\n') + .replace(/<\/p>/gi, '') + .replace(/ /g, ' ') + .replace(/‌/g, '') + .replace(/»/g, '»') + .replace(/«/g, '«') + .replace(/>/g, '>') + .replace(/</g, '<') + .replace(/&/g, '&') + .replace(/"/g, '"') + .replace(/^\s+$/gm, '') + .replace(/\n{3,}/g, '\n\n') + .trim(); + + return html; +} + +export function parseEmailHeaders(headers: string): { contentType: string; encoding: string; charset: string } { + const result = { + contentType: 'text/plain', + encoding: '7bit', + charset: 'utf-8' + }; + + // Extract content type and charset + const contentTypeMatch = headers.match(/Content-Type:\s*([^;]+)(?:;\s*charset=([^;"\r\n]+)|(?:;\s*charset="([^"]+)"))?/i); + if (contentTypeMatch) { + result.contentType = contentTypeMatch[1].trim().toLowerCase(); + if (contentTypeMatch[2]) { + result.charset = contentTypeMatch[2].trim().toLowerCase(); + } else if (contentTypeMatch[3]) { + result.charset = contentTypeMatch[3].trim().toLowerCase(); + } + } + + // Extract content transfer encoding + const encodingMatch = headers.match(/Content-Transfer-Encoding:\s*([^\s;\r\n]+)/i); + if (encodingMatch) { + result.encoding = encodingMatch[1].trim().toLowerCase(); + } + + return result; +} + +export function extractBoundary(headers: string): string | null { + const boundaryMatch = headers.match(/boundary="?([^"\r\n;]+)"?/i) || + headers.match(/boundary=([^\r\n;]+)/i); + + return boundaryMatch ? boundaryMatch[1].trim() : null; +} + +export function extractFilename(headers: string): string { + const filenameMatch = headers.match(/filename="?([^"\r\n;]+)"?/i) || + headers.match(/name="?([^"\r\n;]+)"?/i); + + return filenameMatch ? filenameMatch[1] : 'attachment'; +} + +export function extractHeader(headers: string, headerName: string): string { + const regex = new RegExp(`^${headerName}:\\s*(.*)$`, 'im'); + const match = headers.match(regex); + return match ? match[1].trim() : ''; +} \ No newline at end of file