From 1dbf66cdec297464054e4719d6b802b9ed84c2a2 Mon Sep 17 00:00:00 2001
From: alma <alma@governance-labs.org>
Date: Thu, 1 May 2025 16:08:22 +0200
Subject: [PATCH] courrier preview

---
 lib/services/email-service.ts |  33 ++++-
 lib/utils/email-content.ts    | 269 ++++++++++++++--------------------
 lib/utils/email-utils.ts      |  98 +++++++++++++
 3 files changed, 241 insertions(+), 159 deletions(-)
diff --git a/lib/services/email-service.ts b/lib/services/email-service.ts
index 94fd30ab..1ea56362 100644
--- a/lib/services/email-service.ts
+++ b/lib/services/email-service.ts
@@ -718,9 +718,32 @@ export async function getEmailContent(
     // Convert flags from Set to boolean checks
     const flagsArray = Array.from(flags as Set<string>);
     
-    // Preserve the raw HTML exactly as it was in the original email
+    // Process the raw HTML with CID attachments
     const rawHtml = parsedEmail.html || '';
     
+    // Import processHtmlContent if needed
+    const { processHtmlContent } = await import('../utils/email-content');
+    
+    // Process HTML content with attachments for CID image handling
+    let processedHtml = rawHtml;
+    let direction = 'ltr';
+    
+    if (rawHtml) {
+      const processed = processHtmlContent(rawHtml, {
+        sanitize: true,
+        blockExternalContent: false,
+        attachments: parsedEmail.attachments?.map(att => ({
+          filename: att.filename || 'attachment',
+          contentType: att.contentType,
+          content: att.content?.toString('base64'), // Convert Buffer to base64 string
+          contentId: att.contentId
+        }))
+      });
+      
+      processedHtml = processed.sanitizedContent;
+      direction = processed.direction;
+    }
+    
     const email: EmailMessage = {
       id: emailId,
       messageId: envelope.messageId,
@@ -753,13 +776,15 @@ export async function getEmailContent(
       attachments: parsedEmail.attachments?.map(att => ({
         filename: att.filename || 'attachment',
         contentType: att.contentType,
+        contentId: att.contentId,
+        content: att.content?.toString('base64'),
         size: att.size || 0
       })),
       content: {
         text: parsedEmail.text || '',
-        html: rawHtml || '',
-        isHtml: !!rawHtml,
-        direction: 'ltr' // Default to left-to-right
+        html: processedHtml || '',
+        isHtml: !!processedHtml,
+        direction
       },
       folder: normalizedFolder,
       contentFetched: true,
diff --git a/lib/utils/email-content.ts b/lib/utils/email-content.ts
index 1c049306..5833cb9c 100644
--- a/lib/utils/email-content.ts
+++ b/lib/utils/email-content.ts
@@ -13,20 +13,23 @@
 import { sanitizeHtml } from './dom-purify-config';
 import { detectTextDirection } from './text-direction';
 import { EmailContent } from '@/types/email';
+import { processCidReferences } from './email-utils';
 
 /**
  * Extract content from various possible email formats
  * Centralized implementation to reduce duplication across the codebase
  */
-export function extractEmailContent(email: any): { text: string; html: string } {
+export function extractEmailContent(email: any): { text: string; html: string; isHtml: boolean; direction: 'ltr' | 'rtl'; } {
   // Default empty values
   let textContent = '';
   let htmlContent = '';
+  let isHtml = false;
+  let direction: 'ltr' | 'rtl' = 'ltr';
 
   // Early exit if no email
   if (!email) {
     console.log('extractEmailContent: No email provided');
-    return { text: '', html: '' };
+    return { text: '', html: '', isHtml: false, direction: 'ltr' };
   }
 
   try {
@@ -35,6 +38,8 @@ export function extractEmailContent(email: any): { text: string; html: string }
       // Standard format with content object
       textContent = email.content.text || '';
       htmlContent = email.content.html || '';
+      isHtml = email.content.isHtml || !!htmlContent;
+      direction = email.content.direction || 'ltr';
       
       // Handle complex email formats where content might be nested
       if (!textContent && !htmlContent) {
@@ -44,13 +49,17 @@ export function extractEmailContent(email: any): { text: string; html: string }
             // Determine if body is HTML or text
             if (isHtmlContent(email.content.body)) {
               htmlContent = email.content.body;
+              isHtml = true;
             } else {
               textContent = email.content.body;
+              isHtml = false;
             }
           } else if (typeof email.content.body === 'object' && email.content.body) {
             // Some email formats nest content inside body
             htmlContent = email.content.body.html || '';
             textContent = email.content.body.text || '';
+            isHtml = email.content.body.isHtml || !!htmlContent;
+            direction = email.content.body.direction || 'ltr';
           }
         }
         
@@ -60,8 +69,10 @@ export function extractEmailContent(email: any): { text: string; html: string }
             // Check if data looks like HTML
             if (isHtmlContent(email.content.data)) {
               htmlContent = email.content.data;
+              isHtml = true;
             } else {
               textContent = email.content.data;
+              isHtml = false;
             }
           }
         }
@@ -70,19 +81,25 @@ export function extractEmailContent(email: any): { text: string; html: string }
       // Check if content is likely HTML
       if (isHtmlContent(email.content)) {
         htmlContent = email.content;
+        isHtml = true;
       } else {
         textContent = email.content;
+        isHtml = false;
       }
     } else {
       // Check other common properties
       htmlContent = email.html || '';
       textContent = email.text || '';
+      isHtml = email.isHtml || !!htmlContent;
+      direction = email.direction || 'ltr';
       
       // If still no content, check for less common properties
       if (!htmlContent && !textContent) {
         // Try additional properties that some email clients use
         htmlContent = email.body?.html || email.bodyHtml || email.htmlBody || '';
         textContent = email.body?.text || email.bodyText || email.plainText || '';
+        isHtml = email.body?.isHtml || !!htmlContent;
+        direction = email.body?.direction || 'ltr';
       }
     }
   } catch (error) {
@@ -99,10 +116,12 @@ export function extractEmailContent(email: any): { text: string; html: string }
     hasHtml: !!htmlContent, 
     htmlLength: htmlContent?.length || 0,
     hasText: !!textContent, 
-    textLength: textContent?.length || 0
+    textLength: textContent?.length || 0,
+    isHtml,
+    direction
   });
 
-  return { text: textContent, html: htmlContent };
+  return { text: textContent, html: htmlContent, isHtml, direction };
 }
 
 /**
@@ -160,28 +179,30 @@ export function formatEmailContent(email: any): string {
   
   try {
     // Extract content from email
-    const { text, html } = extractEmailContent(email);
+    const { text, html, isHtml, direction } = extractEmailContent(email);
     
     console.log('formatEmailContent processing:', { 
       hasHtml: !!html, 
       htmlLength: html?.length || 0,
       hasText: !!text, 
       textLength: text?.length || 0,
-      emailType: typeof email === 'string' ? 'string' : 'object'
+      emailType: typeof email === 'string' ? 'string' : 'object',
+      isHtml,
+      direction
     });
     
     // If we have HTML content, sanitize and standardize it
     if (html) {
       // Process HTML content
-      let processedHtml = processHtmlContent(html, text);
+      const processed = processHtmlContent(html, { sanitize: true });
       
       console.log('HTML content processed:', { 
-        processedLength: processedHtml?.length || 0,
-        isEmpty: !processedHtml || processedHtml.trim().length === 0
+        processedLength: processed.sanitizedContent?.length || 0,
+        isEmpty: !processed.sanitizedContent || processed.sanitizedContent.trim().length === 0
       });
       
       // Apply styling
-      return `<div class="email-content" style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif; line-height: 1.6; color: #333; max-width: 100%; overflow-x: auto; overflow-wrap: break-word; word-wrap: break-word;" dir="${detectTextDirection(text)}">${processedHtml}</div>`;
+      return `<div class="email-content" style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif; line-height: 1.6; color: #333; max-width: 100%; overflow-x: auto; overflow-wrap: break-word; word-wrap: break-word;" dir="${processed.direction}">${processed.sanitizedContent}</div>`;
     } 
     // If we only have text content, format it properly
     else if (text) {
@@ -198,164 +219,102 @@ export function formatEmailContent(email: any): string {
 }
 
 /**
- * Process HTML content to fix common email rendering issues
+ * Process HTML content to ensure safe rendering and proper formatting
  */
-export function processHtmlContent(htmlContent: string, textContent?: string): string {
-  if (!htmlContent) return '';
-  
+export function processHtmlContent(
+  htmlContent: string,
+  options?: {
+    sanitize?: boolean;
+    blockExternalContent?: boolean;
+    attachments?: Array<{
+      filename?: string;
+      name?: string;
+      contentType?: string;
+      content?: string;
+      contentId?: string;
+    }>;
+  } | string // Support for legacy textContent parameter
+): {
+  sanitizedContent: string;
+  hasImages: boolean;
+  hasExternalContent: boolean;
+  direction: 'ltr' | 'rtl';
+} {
+  // Handle legacy string parameter (textContent)
+  if (typeof options === 'string') {
+    options = { sanitize: true };
+  }
+
+  console.log('Processing HTML content:', {
+    contentLength: htmlContent?.length || 0,
+    startsWithHtml: htmlContent?.startsWith('<html'),
+    startsWithDiv: htmlContent?.startsWith('<div'),
+    containsForwardedMessage: htmlContent?.includes('---------- Forwarded message ----------'),
+    containsQuoteHeader: htmlContent?.includes('<div class="gmail_quote"'),
+    sanitize: options?.sanitize, 
+    blockExternalContent: options?.blockExternalContent,
+    hasAttachments: options?.attachments?.length || 0
+  });
+
+  if (!htmlContent) {
+    return {
+      sanitizedContent: '',
+      hasImages: false,
+      hasExternalContent: false,
+      direction: 'ltr',
+    };
+  }
+
+  // Store the original content for comparison
+  const originalContent = htmlContent;
+
+  // Process CID references before sanitization
+  if (options?.attachments?.length) {
+    console.log('Processing CID references in processHtmlContent');
+    htmlContent = processCidReferences(htmlContent, options.attachments);
+  }
+
   try {
-    console.log('processHtmlContent input:', {
-      length: htmlContent.length,
-      startsWithHtml: htmlContent.trim().startsWith('<html'),
-      startsWithDiv: htmlContent.trim().startsWith('<div'),
-      hasBody: htmlContent.includes('<body'),
-      containsForwardedMessage: htmlContent.includes('---------- Forwarded message ----------'),
-      containsQuoteHeader: htmlContent.includes('wrote:'),
-      hasBlockquote: htmlContent.includes('<blockquote'),
-      hasTable: htmlContent.includes('<table')
-    });
+    // Apply sanitization by default unless explicitly turned off
+    let sanitizedContent = (options?.sanitize !== false) ? sanitizeHtml(htmlContent) : htmlContent;
     
-    // Check for browser environment (DOMParser is browser-only)
-    const hasHtmlTag = htmlContent.includes('<html');
-    const hasBodyTag = htmlContent.includes('<body');
-    
-    // Preserve original HTML for debugging
-    let originalHtml = htmlContent;
-    
-    // Extract body content if we have a complete HTML document and in browser environment
-    if (hasHtmlTag && hasBodyTag && typeof window !== 'undefined' && typeof DOMParser !== 'undefined') {
-      try {
-        // Create a DOM parser to extract just the body content
-        const parser = new DOMParser();
-        const doc = parser.parseFromString(htmlContent, 'text/html');
-        const bodyContent = doc.body.innerHTML;
-        
-        if (bodyContent) {
-          console.log('Extracted body content from HTML document, length:', bodyContent.length);
-          htmlContent = bodyContent;
-        }
-      } catch (error) {
-        console.error('Error extracting body content:', error);
-      }
-    }
-    
-    // Use the centralized sanitizeHtml function
-    let sanitizedContent = sanitizeHtml(htmlContent);
-    
-    console.log('After sanitizeHtml:', {
-      originalLength: originalHtml.length,
+    // Log content changes from sanitization
+    console.log('HTML sanitization results:', {
+      originalLength: originalContent.length,
       sanitizedLength: sanitizedContent.length,
-      difference: originalHtml.length - sanitizedContent.length,
-      percentRemoved: ((originalHtml.length - sanitizedContent.length) / originalHtml.length * 100).toFixed(2) + '%',
-      containsForwardedMessage: sanitizedContent.includes('---------- Forwarded message ----------'),
-      hasTable: sanitizedContent.includes('<table'),
-      hasBlockquote: sanitizedContent.includes('<blockquote')
+      difference: originalContent.length - sanitizedContent.length,
+      percentRemoved: ((originalContent.length - sanitizedContent.length) / originalContent.length * 100).toFixed(2) + '%',
+      isEmpty: !sanitizedContent || sanitizedContent.trim().length === 0
     });
     
-    // Fix URL encoding issues and clean up content
-    try {
-      if (typeof window !== 'undefined' && typeof document !== 'undefined') {
-        // Temporary element to manipulate the HTML
-        const tempDiv = document.createElement('div');
-        tempDiv.innerHTML = sanitizedContent;
-        
-        // Fix all links that might have been double-encoded
-        const links = tempDiv.querySelectorAll('a');
-        links.forEach(link => {
-          const href = link.getAttribute('href');
-          if (href && href.includes('%')) {
-            try {
-              // Try to decode URLs that might have been double-encoded
-              const decodedHref = decodeURIComponent(href);
-              link.setAttribute('href', decodedHref);
-            } catch (e) {
-              // If decoding fails, keep the original
-              console.warn('Failed to decode href:', href);
-            }
-          }
-        });
-        
-        // Fix image URLs - preserve cid: URLs for email attachments
-        const images = tempDiv.querySelectorAll('img');
-        images.forEach(img => {
-          const src = img.getAttribute('src');
-          if (src) {
-            // Don't modify cid: URLs as they are handled specially in email clients
-            if (src.startsWith('cid:')) {
-              // Keep cid: URLs as they are
-              console.log('Preserving CID reference:', src);
-            } 
-            // Fix http:// URLs to https:// for security
-            else if (src.startsWith('http://')) {
-              img.setAttribute('src', src.replace('http://', 'https://'));
-            }
-            // Handle relative URLs that might be broken
-            else if (!src.startsWith('https://') && !src.startsWith('data:')) {
-              if (src.startsWith('/')) {
-                img.setAttribute('src', `https://example.com${src}`);
-              } else {
-                img.setAttribute('src', `https://example.com/${src}`);
-              }
-            }
-          }
-        });
-        
-        // Clean up excessive whitespace and empty elements
-        // Find all text nodes and normalize whitespace
-        const walker = document.createTreeWalker(
-          tempDiv,
-          NodeFilter.SHOW_TEXT,
-          null
-        );
-        
-        const textNodes = [];
-        while (walker.nextNode()) {
-          textNodes.push(walker.currentNode);
-        }
-        
-        // Process text nodes to normalize whitespace
-        textNodes.forEach(node => {
-          if (node.nodeValue) {
-            // Replace sequences of whitespace with a single space
-            node.nodeValue = node.nodeValue.replace(/\s+/g, ' ').trim();
-          }
-        });
-        
-        // Remove empty paragraphs and divs that contain only whitespace
-        const emptyElements = tempDiv.querySelectorAll('p, div, span');
-        emptyElements.forEach(el => {
-          if (el.innerHTML.trim() === '' || el.innerHTML === '&nbsp;') {
-            el.parentNode?.removeChild(el);
-          }
-        });
-        
-        // Remove excessive consecutive <br> tags (more than 2)
-        let html = tempDiv.innerHTML;
-        html = html.replace(/(<br\s*\/?>\s*){3,}/gi, '<br><br>');
-        tempDiv.innerHTML = html;
-        
-        // Get the fixed HTML
-        sanitizedContent = tempDiv.innerHTML;
-      }
-    } catch (e) {
-      console.error('Error fixing content:', e);
-    }
-    
     // Fix common email client quirks without breaking cid: URLs
-    return sanitizedContent
+    sanitizedContent = sanitizedContent
       // Fix for Outlook WebVML content
       .replace(/<!--\[if\s+gte\s+mso/g, '<!--[if gte mso')
-      // Fix for broken image paths WITHOUT replacing cid: URLs
-      .replace(/(src|background)="(?!(?:https?:|data:|cid:))/gi, '$1="https://')
-      // Fix for base64 images that might be broken across lines
-      .replace(/src="data:image\/[^;]+;base64,\s*([^"]+)\s*"/gi, (match, p1) => {
-        return `src="data:image/png;base64,${p1.replace(/\s+/g, '')}"`;
-      })
+      // Fix for broken image paths starting with // (add https:)
+      .replace(/src="\/\//g, 'src="https://')
+      // Handle mixed content issues by converting http:// to https://
+      .replace(/src="http:\/\//g, 'src="https://')
+      // Fix email signature line breaks
+      .replace(/--<br>/g, '<hr style="border-top: 1px solid #ccc; margin: 15px 0;">')
+      .replace(/-- <br>/g, '<hr style="border-top: 1px solid #ccc; margin: 15px 0;">')
       // Remove excessive whitespace from the HTML string itself
       .replace(/>\s+</g, '> <');
+
+    return {
+      sanitizedContent,
+      hasImages: sanitizedContent.includes('<img'),
+      hasExternalContent: sanitizedContent.includes('https://'),
+      direction: detectTextDirection(sanitizedContent)
+    };
   } catch (error) {
     console.error('Error processing HTML content:', error);
-    return htmlContent;
+    return {
+      sanitizedContent: htmlContent,
+      hasImages: false,
+      hasExternalContent: false,
+      direction: 'ltr',
+    };
   }
 }
 
diff --git a/lib/utils/email-utils.ts b/lib/utils/email-utils.ts
index c0f35326..bc7c0343 100644
--- a/lib/utils/email-utils.ts
+++ b/lib/utils/email-utils.ts
@@ -381,6 +381,98 @@ export function formatReplyEmail(originalEmail: EmailMessage | LegacyEmailMessag
   return result;
 }
 
+/**
+ * Process and replace CID references with base64 data URLs using the email's attachments.
+ * This function should be called before sanitizing the content.
+ */
+export function processCidReferences(htmlContent: string, attachments?: Array<{
+  filename?: string;
+  name?: string;
+  contentType?: string;
+  content?: string;
+  contentId?: string;
+}>): string {
+  if (!htmlContent || !attachments || !attachments.length) {
+    return htmlContent;
+  }
+  
+  console.log(`Processing CID references with ${attachments.length} attachments available`);
+  
+  try {
+    // Create a map of content IDs to their attachment data
+    const cidMap = new Map();
+    attachments.forEach(att => {
+      if (att.contentId) {
+        // Content ID sometimes has <> brackets which need to be removed
+        const cleanCid = att.contentId.replace(/[<>]/g, '');
+        cidMap.set(cleanCid, {
+          contentType: att.contentType || 'application/octet-stream',
+          content: att.content
+        });
+        console.log(`Mapped CID: ${cleanCid} to attachment of type ${att.contentType || 'unknown'}`);
+      }
+    });
+    
+    // If we have no content IDs mapped, return original content
+    if (cidMap.size === 0) {
+      console.log('No CID references found in attachments');
+      return htmlContent;
+    }
+    
+    // Check if we're in a browser environment
+    if (typeof document === 'undefined') {
+      console.log('Not in browser environment, skipping CID processing');
+      return htmlContent;
+    }
+    
+    // Parse the HTML content and replace CID references
+    const tempDiv = document.createElement('div');
+    tempDiv.innerHTML = htmlContent;
+    
+    // Find all images with CID sources
+    const imgElements = tempDiv.querySelectorAll('img[src^="cid:"]');
+    
+    console.log(`Found ${imgElements.length} img elements with CID references`);
+    
+    if (imgElements.length === 0) {
+      return htmlContent;
+    }
+    
+    // Process each image with a CID reference
+    let replacedCount = 0;
+    imgElements.forEach(img => {
+      const src = img.getAttribute('src');
+      if (!src || !src.startsWith('cid:')) return;
+      
+      // Extract the content ID from the src
+      const cid = src.substring(4); // Remove 'cid:' prefix
+      
+      // Find the matching attachment
+      const attachment = cidMap.get(cid);
+      
+      if (attachment && attachment.content) {
+        // Convert the attachment content to a data URL
+        const dataUrl = `data:${attachment.contentType};base64,${attachment.content}`;
+        
+        // Replace the CID reference with the data URL
+        img.setAttribute('src', dataUrl);
+        replacedCount++;
+        console.log(`Replaced CID ${cid} with data URL`);
+      } else {
+        console.log(`No matching attachment found for CID: ${cid}`);
+      }
+    });
+    
+    console.log(`Replaced ${replacedCount} CID references with data URLs`);
+    
+    // Return the updated HTML content
+    return tempDiv.innerHTML;
+  } catch (error) {
+    console.error('Error processing CID references:', error);
+    return htmlContent;
+  }
+}
+
 /**
  * Format email for forwarding
  */
@@ -453,6 +545,12 @@ export function formatForwardedEmail(originalEmail: EmailMessage | LegacyEmailMe
     }
   }
 
+  // Process embedded images with CID references
+  if (htmlContent && email.attachments && email.attachments.length > 0) {
+    console.log('Processing CID references before sanitization');
+    htmlContent = processCidReferences(htmlContent, email.attachments);
+  }
+
   // Create the forwarded email HTML content
   if (htmlContent) {
     console.log('Formatting HTML forward, original content length:', htmlContent.length);