courrier preview

This commit is contained in:
alma 2025-05-01 16:08:22 +02:00
parent cdd5bd98bc
commit 1dbf66cdec
3 changed files with 241 additions and 159 deletions

View File

@ -718,9 +718,32 @@ export async function getEmailContent(
// Convert flags from Set to boolean checks
const flagsArray = Array.from(flags as Set<string>);
// Preserve the raw HTML exactly as it was in the original email
// Process the raw HTML with CID attachments
const rawHtml = parsedEmail.html || '';
// Import processHtmlContent if needed
const { processHtmlContent } = await import('../utils/email-content');
// Process HTML content with attachments for CID image handling
let processedHtml = rawHtml;
let direction = 'ltr';
if (rawHtml) {
const processed = processHtmlContent(rawHtml, {
sanitize: true,
blockExternalContent: false,
attachments: parsedEmail.attachments?.map(att => ({
filename: att.filename || 'attachment',
contentType: att.contentType,
content: att.content?.toString('base64'), // Convert Buffer to base64 string
contentId: att.contentId
}))
});
processedHtml = processed.sanitizedContent;
direction = processed.direction;
}
const email: EmailMessage = {
id: emailId,
messageId: envelope.messageId,
@ -753,13 +776,15 @@ export async function getEmailContent(
attachments: parsedEmail.attachments?.map(att => ({
filename: att.filename || 'attachment',
contentType: att.contentType,
contentId: att.contentId,
content: att.content?.toString('base64'),
size: att.size || 0
})),
content: {
text: parsedEmail.text || '',
html: rawHtml || '',
isHtml: !!rawHtml,
direction: 'ltr' // Default to left-to-right
html: processedHtml || '',
isHtml: !!processedHtml,
direction
},
folder: normalizedFolder,
contentFetched: true,

View File

@ -13,20 +13,23 @@
import { sanitizeHtml } from './dom-purify-config';
import { detectTextDirection } from './text-direction';
import { EmailContent } from '@/types/email';
import { processCidReferences } from './email-utils';
/**
* Extract content from various possible email formats
* Centralized implementation to reduce duplication across the codebase
*/
export function extractEmailContent(email: any): { text: string; html: string } {
export function extractEmailContent(email: any): { text: string; html: string; isHtml: boolean; direction: 'ltr' | 'rtl'; } {
// Default empty values
let textContent = '';
let htmlContent = '';
let isHtml = false;
let direction: 'ltr' | 'rtl' = 'ltr';
// Early exit if no email
if (!email) {
console.log('extractEmailContent: No email provided');
return { text: '', html: '' };
return { text: '', html: '', isHtml: false, direction: 'ltr' };
}
try {
@ -35,6 +38,8 @@ export function extractEmailContent(email: any): { text: string; html: string }
// Standard format with content object
textContent = email.content.text || '';
htmlContent = email.content.html || '';
isHtml = email.content.isHtml || !!htmlContent;
direction = email.content.direction || 'ltr';
// Handle complex email formats where content might be nested
if (!textContent && !htmlContent) {
@ -44,13 +49,17 @@ export function extractEmailContent(email: any): { text: string; html: string }
// Determine if body is HTML or text
if (isHtmlContent(email.content.body)) {
htmlContent = email.content.body;
isHtml = true;
} else {
textContent = email.content.body;
isHtml = false;
}
} else if (typeof email.content.body === 'object' && email.content.body) {
// Some email formats nest content inside body
htmlContent = email.content.body.html || '';
textContent = email.content.body.text || '';
isHtml = email.content.body.isHtml || !!htmlContent;
direction = email.content.body.direction || 'ltr';
}
}
@ -60,8 +69,10 @@ export function extractEmailContent(email: any): { text: string; html: string }
// Check if data looks like HTML
if (isHtmlContent(email.content.data)) {
htmlContent = email.content.data;
isHtml = true;
} else {
textContent = email.content.data;
isHtml = false;
}
}
}
@ -70,19 +81,25 @@ export function extractEmailContent(email: any): { text: string; html: string }
// Check if content is likely HTML
if (isHtmlContent(email.content)) {
htmlContent = email.content;
isHtml = true;
} else {
textContent = email.content;
isHtml = false;
}
} else {
// Check other common properties
htmlContent = email.html || '';
textContent = email.text || '';
isHtml = email.isHtml || !!htmlContent;
direction = email.direction || 'ltr';
// If still no content, check for less common properties
if (!htmlContent && !textContent) {
// Try additional properties that some email clients use
htmlContent = email.body?.html || email.bodyHtml || email.htmlBody || '';
textContent = email.body?.text || email.bodyText || email.plainText || '';
isHtml = email.body?.isHtml || !!htmlContent;
direction = email.body?.direction || 'ltr';
}
}
} catch (error) {
@ -99,10 +116,12 @@ export function extractEmailContent(email: any): { text: string; html: string }
hasHtml: !!htmlContent,
htmlLength: htmlContent?.length || 0,
hasText: !!textContent,
textLength: textContent?.length || 0
textLength: textContent?.length || 0,
isHtml,
direction
});
return { text: textContent, html: htmlContent };
return { text: textContent, html: htmlContent, isHtml, direction };
}
/**
@ -160,28 +179,30 @@ export function formatEmailContent(email: any): string {
try {
// Extract content from email
const { text, html } = extractEmailContent(email);
const { text, html, isHtml, direction } = extractEmailContent(email);
console.log('formatEmailContent processing:', {
hasHtml: !!html,
htmlLength: html?.length || 0,
hasText: !!text,
textLength: text?.length || 0,
emailType: typeof email === 'string' ? 'string' : 'object'
emailType: typeof email === 'string' ? 'string' : 'object',
isHtml,
direction
});
// If we have HTML content, sanitize and standardize it
if (html) {
// Process HTML content
let processedHtml = processHtmlContent(html, text);
const processed = processHtmlContent(html, { sanitize: true });
console.log('HTML content processed:', {
processedLength: processedHtml?.length || 0,
isEmpty: !processedHtml || processedHtml.trim().length === 0
processedLength: processed.sanitizedContent?.length || 0,
isEmpty: !processed.sanitizedContent || processed.sanitizedContent.trim().length === 0
});
// Apply styling
return `<div class="email-content" style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif; line-height: 1.6; color: #333; max-width: 100%; overflow-x: auto; overflow-wrap: break-word; word-wrap: break-word;" dir="${detectTextDirection(text)}">${processedHtml}</div>`;
return `<div class="email-content" style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif; line-height: 1.6; color: #333; max-width: 100%; overflow-x: auto; overflow-wrap: break-word; word-wrap: break-word;" dir="${processed.direction}">${processed.sanitizedContent}</div>`;
}
// If we only have text content, format it properly
else if (text) {
@ -198,164 +219,102 @@ export function formatEmailContent(email: any): string {
}
/**
* Process HTML content to fix common email rendering issues
* Process HTML content to ensure safe rendering and proper formatting
*/
export function processHtmlContent(htmlContent: string, textContent?: string): string {
if (!htmlContent) return '';
export function processHtmlContent(
htmlContent: string,
options?: {
sanitize?: boolean;
blockExternalContent?: boolean;
attachments?: Array<{
filename?: string;
name?: string;
contentType?: string;
content?: string;
contentId?: string;
}>;
} | string // Support for legacy textContent parameter
): {
sanitizedContent: string;
hasImages: boolean;
hasExternalContent: boolean;
direction: 'ltr' | 'rtl';
} {
// Handle legacy string parameter (textContent)
if (typeof options === 'string') {
options = { sanitize: true };
}
console.log('Processing HTML content:', {
contentLength: htmlContent?.length || 0,
startsWithHtml: htmlContent?.startsWith('<html'),
startsWithDiv: htmlContent?.startsWith('<div'),
containsForwardedMessage: htmlContent?.includes('---------- Forwarded message ----------'),
containsQuoteHeader: htmlContent?.includes('<div class="gmail_quote"'),
sanitize: options?.sanitize,
blockExternalContent: options?.blockExternalContent,
hasAttachments: options?.attachments?.length || 0
});
if (!htmlContent) {
return {
sanitizedContent: '',
hasImages: false,
hasExternalContent: false,
direction: 'ltr',
};
}
// Store the original content for comparison
const originalContent = htmlContent;
// Process CID references before sanitization
if (options?.attachments?.length) {
console.log('Processing CID references in processHtmlContent');
htmlContent = processCidReferences(htmlContent, options.attachments);
}
try {
console.log('processHtmlContent input:', {
length: htmlContent.length,
startsWithHtml: htmlContent.trim().startsWith('<html'),
startsWithDiv: htmlContent.trim().startsWith('<div'),
hasBody: htmlContent.includes('<body'),
containsForwardedMessage: htmlContent.includes('---------- Forwarded message ----------'),
containsQuoteHeader: htmlContent.includes('wrote:'),
hasBlockquote: htmlContent.includes('<blockquote'),
hasTable: htmlContent.includes('<table')
});
// Apply sanitization by default unless explicitly turned off
let sanitizedContent = (options?.sanitize !== false) ? sanitizeHtml(htmlContent) : htmlContent;
// Check for browser environment (DOMParser is browser-only)
const hasHtmlTag = htmlContent.includes('<html');
const hasBodyTag = htmlContent.includes('<body');
// Preserve original HTML for debugging
let originalHtml = htmlContent;
// Extract body content if we have a complete HTML document and in browser environment
if (hasHtmlTag && hasBodyTag && typeof window !== 'undefined' && typeof DOMParser !== 'undefined') {
try {
// Create a DOM parser to extract just the body content
const parser = new DOMParser();
const doc = parser.parseFromString(htmlContent, 'text/html');
const bodyContent = doc.body.innerHTML;
if (bodyContent) {
console.log('Extracted body content from HTML document, length:', bodyContent.length);
htmlContent = bodyContent;
}
} catch (error) {
console.error('Error extracting body content:', error);
}
}
// Use the centralized sanitizeHtml function
let sanitizedContent = sanitizeHtml(htmlContent);
console.log('After sanitizeHtml:', {
originalLength: originalHtml.length,
// Log content changes from sanitization
console.log('HTML sanitization results:', {
originalLength: originalContent.length,
sanitizedLength: sanitizedContent.length,
difference: originalHtml.length - sanitizedContent.length,
percentRemoved: ((originalHtml.length - sanitizedContent.length) / originalHtml.length * 100).toFixed(2) + '%',
containsForwardedMessage: sanitizedContent.includes('---------- Forwarded message ----------'),
hasTable: sanitizedContent.includes('<table'),
hasBlockquote: sanitizedContent.includes('<blockquote')
difference: originalContent.length - sanitizedContent.length,
percentRemoved: ((originalContent.length - sanitizedContent.length) / originalContent.length * 100).toFixed(2) + '%',
isEmpty: !sanitizedContent || sanitizedContent.trim().length === 0
});
// Fix URL encoding issues and clean up content
try {
if (typeof window !== 'undefined' && typeof document !== 'undefined') {
// Temporary element to manipulate the HTML
const tempDiv = document.createElement('div');
tempDiv.innerHTML = sanitizedContent;
// Fix all links that might have been double-encoded
const links = tempDiv.querySelectorAll('a');
links.forEach(link => {
const href = link.getAttribute('href');
if (href && href.includes('%')) {
try {
// Try to decode URLs that might have been double-encoded
const decodedHref = decodeURIComponent(href);
link.setAttribute('href', decodedHref);
} catch (e) {
// If decoding fails, keep the original
console.warn('Failed to decode href:', href);
}
}
});
// Fix image URLs - preserve cid: URLs for email attachments
const images = tempDiv.querySelectorAll('img');
images.forEach(img => {
const src = img.getAttribute('src');
if (src) {
// Don't modify cid: URLs as they are handled specially in email clients
if (src.startsWith('cid:')) {
// Keep cid: URLs as they are
console.log('Preserving CID reference:', src);
}
// Fix http:// URLs to https:// for security
else if (src.startsWith('http://')) {
img.setAttribute('src', src.replace('http://', 'https://'));
}
// Handle relative URLs that might be broken
else if (!src.startsWith('https://') && !src.startsWith('data:')) {
if (src.startsWith('/')) {
img.setAttribute('src', `https://example.com${src}`);
} else {
img.setAttribute('src', `https://example.com/${src}`);
}
}
}
});
// Clean up excessive whitespace and empty elements
// Find all text nodes and normalize whitespace
const walker = document.createTreeWalker(
tempDiv,
NodeFilter.SHOW_TEXT,
null
);
const textNodes = [];
while (walker.nextNode()) {
textNodes.push(walker.currentNode);
}
// Process text nodes to normalize whitespace
textNodes.forEach(node => {
if (node.nodeValue) {
// Replace sequences of whitespace with a single space
node.nodeValue = node.nodeValue.replace(/\s+/g, ' ').trim();
}
});
// Remove empty paragraphs and divs that contain only whitespace
const emptyElements = tempDiv.querySelectorAll('p, div, span');
emptyElements.forEach(el => {
if (el.innerHTML.trim() === '' || el.innerHTML === '&nbsp;') {
el.parentNode?.removeChild(el);
}
});
// Remove excessive consecutive <br> tags (more than 2)
let html = tempDiv.innerHTML;
html = html.replace(/(<br\s*\/?>\s*){3,}/gi, '<br><br>');
tempDiv.innerHTML = html;
// Get the fixed HTML
sanitizedContent = tempDiv.innerHTML;
}
} catch (e) {
console.error('Error fixing content:', e);
}
// Fix common email client quirks without breaking cid: URLs
return sanitizedContent
sanitizedContent = sanitizedContent
// Fix for Outlook WebVML content
.replace(/<!--\[if\s+gte\s+mso/g, '<!--[if gte mso')
// Fix for broken image paths WITHOUT replacing cid: URLs
.replace(/(src|background)="(?!(?:https?:|data:|cid:))/gi, '$1="https://')
// Fix for base64 images that might be broken across lines
.replace(/src="data:image\/[^;]+;base64,\s*([^"]+)\s*"/gi, (match, p1) => {
return `src="data:image/png;base64,${p1.replace(/\s+/g, '')}"`;
})
// Fix for broken image paths starting with // (add https:)
.replace(/src="\/\//g, 'src="https://')
// Handle mixed content issues by converting http:// to https://
.replace(/src="http:\/\//g, 'src="https://')
// Fix email signature line breaks
.replace(/--<br>/g, '<hr style="border-top: 1px solid #ccc; margin: 15px 0;">')
.replace(/-- <br>/g, '<hr style="border-top: 1px solid #ccc; margin: 15px 0;">')
// Remove excessive whitespace from the HTML string itself
.replace(/>\s+</g, '> <');
return {
sanitizedContent,
hasImages: sanitizedContent.includes('<img'),
hasExternalContent: sanitizedContent.includes('https://'),
direction: detectTextDirection(sanitizedContent)
};
} catch (error) {
console.error('Error processing HTML content:', error);
return htmlContent;
return {
sanitizedContent: htmlContent,
hasImages: false,
hasExternalContent: false,
direction: 'ltr',
};
}
}

View File

@ -381,6 +381,98 @@ export function formatReplyEmail(originalEmail: EmailMessage | LegacyEmailMessag
return result;
}
/**
* Process and replace CID references with base64 data URLs using the email's attachments.
* This function should be called before sanitizing the content.
*/
export function processCidReferences(htmlContent: string, attachments?: Array<{
filename?: string;
name?: string;
contentType?: string;
content?: string;
contentId?: string;
}>): string {
if (!htmlContent || !attachments || !attachments.length) {
return htmlContent;
}
console.log(`Processing CID references with ${attachments.length} attachments available`);
try {
// Create a map of content IDs to their attachment data
const cidMap = new Map();
attachments.forEach(att => {
if (att.contentId) {
// Content ID sometimes has <> brackets which need to be removed
const cleanCid = att.contentId.replace(/[<>]/g, '');
cidMap.set(cleanCid, {
contentType: att.contentType || 'application/octet-stream',
content: att.content
});
console.log(`Mapped CID: ${cleanCid} to attachment of type ${att.contentType || 'unknown'}`);
}
});
// If we have no content IDs mapped, return original content
if (cidMap.size === 0) {
console.log('No CID references found in attachments');
return htmlContent;
}
// Check if we're in a browser environment
if (typeof document === 'undefined') {
console.log('Not in browser environment, skipping CID processing');
return htmlContent;
}
// Parse the HTML content and replace CID references
const tempDiv = document.createElement('div');
tempDiv.innerHTML = htmlContent;
// Find all images with CID sources
const imgElements = tempDiv.querySelectorAll('img[src^="cid:"]');
console.log(`Found ${imgElements.length} img elements with CID references`);
if (imgElements.length === 0) {
return htmlContent;
}
// Process each image with a CID reference
let replacedCount = 0;
imgElements.forEach(img => {
const src = img.getAttribute('src');
if (!src || !src.startsWith('cid:')) return;
// Extract the content ID from the src
const cid = src.substring(4); // Remove 'cid:' prefix
// Find the matching attachment
const attachment = cidMap.get(cid);
if (attachment && attachment.content) {
// Convert the attachment content to a data URL
const dataUrl = `data:${attachment.contentType};base64,${attachment.content}`;
// Replace the CID reference with the data URL
img.setAttribute('src', dataUrl);
replacedCount++;
console.log(`Replaced CID ${cid} with data URL`);
} else {
console.log(`No matching attachment found for CID: ${cid}`);
}
});
console.log(`Replaced ${replacedCount} CID references with data URLs`);
// Return the updated HTML content
return tempDiv.innerHTML;
} catch (error) {
console.error('Error processing CID references:', error);
return htmlContent;
}
}
/**
* Format email for forwarding
*/
@ -453,6 +545,12 @@ export function formatForwardedEmail(originalEmail: EmailMessage | LegacyEmailMe
}
}
// Process embedded images with CID references
if (htmlContent && email.attachments && email.attachments.length > 0) {
console.log('Processing CID references before sanitization');
htmlContent = processCidReferences(htmlContent, email.attachments);
}
// Create the forwarded email HTML content
if (htmlContent) {
console.log('Formatting HTML forward, original content length:', htmlContent.length);