Neah/lib/utils/email-content.ts

/**
 * Centralized Email Content Utilities
 *
 * This file contains all core functions for email content processing:
 * - Content extraction
 * - HTML sanitization
 * - Text direction handling
 * - URL fixing
 *
 * Other modules should import from this file rather than implementing their own versions.
 */

import { sanitizeHtml } from './dom-purify-config';
import { detectTextDirection } from './text-direction';
import { EmailContent } from '@/types/email';

/**
 * Extract content from various possible email formats
 * Centralized implementation to reduce duplication across the codebase
 */
export function extractEmailContent(email: any): { text: string; html: string } {
  // Default empty values
  let textContent = '';
  let htmlContent = '';

  // Early exit if no email
  if (!email) {
    console.log('extractEmailContent: No email provided');
    return { text: '', html: '' };
  }

  try {
    // Extract based on common formats
    if (email.content && typeof email.content === 'object') {
      // Standard format with content object
      textContent = email.content.text || '';
      htmlContent = email.content.html || '';

      // Handle complex email formats where content might be nested
      if (!textContent && !htmlContent) {
        // Try to find content in deeper nested structure
        if (email.content.body) {
          if (typeof email.content.body === 'string') {
            // Determine if body is HTML or text
            if (isHtmlContent(email.content.body)) {
              htmlContent = email.content.body;
            } else {
              textContent = email.content.body;
            }
          } else if (typeof email.content.body === 'object' && email.content.body) {
            // Some email formats nest content inside body
            htmlContent = email.content.body.html || '';
            textContent = email.content.body.text || '';
          }
        }

        // Check for data property which some email services use
        if (!textContent && !htmlContent && email.content.data) {
          if (typeof email.content.data === 'string') {
            // Check if data looks like HTML
            if (isHtmlContent(email.content.data)) {
              htmlContent = email.content.data;
            } else {
              textContent = email.content.data;
            }
          }
        }
      }
    } else if (typeof email.content === 'string') {
      // Check if content is likely HTML
      if (isHtmlContent(email.content)) {
        htmlContent = email.content;
      } else {
        textContent = email.content;
      }
    } else {
      // Check other common properties
      htmlContent = email.html || '';
      textContent = email.text || '';

      // If still no content, check for less common properties
      if (!htmlContent && !textContent) {
        // Try additional properties that some email clients use
        htmlContent = email.body?.html || email.bodyHtml || email.htmlBody || '';
        textContent = email.body?.text || email.bodyText || email.plainText || '';
      }
    }
  } catch (error) {
    console.error('Error extracting email content:', error);
  }

  // Ensure we always have at least some text content
  if (!textContent && htmlContent) {
    textContent = extractTextFromHtml(htmlContent);
  }

  // Log extraction results
  console.log('Extracted email content:', {
    hasHtml: !!htmlContent,
    htmlLength: htmlContent?.length || 0,
    hasText: !!textContent,
    textLength: textContent?.length || 0
  });

  return { text: textContent, html: htmlContent };
}

/**
 * Extract plain text from HTML content
 */
export function extractTextFromHtml(html: string): string {
  if (!html) return '';

  try {
    // Use DOM API if available
    if (typeof window !== 'undefined' && typeof document !== 'undefined') {
      const tempDiv = document.createElement('div');
      tempDiv.innerHTML = html;
      return tempDiv.textContent || tempDiv.innerText || '';
    } else {
      // Simple regex fallback for non-browser environments
      return html.replace(/<[^>]*>/g, ' ')
        .replace(/&nbsp;/g, ' ')
        .replace(/&lt;/g, '<')
        .replace(/&gt;/g, '>')
        .replace(/&amp;/g, '&')
        .replace(/\s+/g, ' ')
        .trim();
    }
  } catch (e) {
    console.error('Error extracting text from HTML:', e);
    // Fallback to basic strip
    return html.replace(/<[^>]*>/g, ' ').trim();
  }
}

/**
 * Check if a string is likely HTML content
 */
export function isHtmlContent(content: string): boolean {
  if (!content) return false;

  return content.trim().startsWith('<') &&
    (content.includes('<html') ||
     content.includes('<body') ||
     content.includes('<div') ||
     content.includes('<p>') ||
     content.includes('<br>'));
}

/**
 * Format and standardize email content for display following email industry standards.
 * This is the main entry point for rendering email content.
 */
export function formatEmailContent(email: any): string {
  if (!email) {
    console.log('formatEmailContent: No email provided');
    return '';
  }

  try {
    // Extract content from email
    const { text, html } = extractEmailContent(email);

    // If we have HTML content, sanitize and standardize it
    if (html) {
      // Process HTML content
      let processedHtml = processHtmlContent(html, text);

      // Apply styling
      return `<div class="email-content" style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif; line-height: 1.6; color: #333; max-width: 100%; overflow-x: auto; overflow-wrap: break-word; word-wrap: break-word;" dir="${detectTextDirection(text)}">${processedHtml}</div>`;
    }
    // If we only have text content, format it properly
    else if (text) {
      return formatPlainTextToHtml(text);
    }

    // Default case: empty or unrecognized content
    return '<div class="email-content-empty" style="padding: 20px; text-align: center; color: #666;">No content available</div>';
  } catch (error) {
    console.error('formatEmailContent: Error formatting email content:', error);
    return `<div class="email-content-error" style="padding: 15px; color: #721c24; background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 4px;"><p>Error displaying email content</p><p style="font-size: 12px; margin-top: 10px;">${error instanceof Error ? error.message : 'Unknown error'}</p></div>`;
  }
}

/**
 * Process HTML content to fix common email rendering issues
 */
export function processHtmlContent(htmlContent: string, textContent?: string): string {
  if (!htmlContent) return '';

  try {
    // Check for browser environment (DOMParser is browser-only)
    const hasHtmlTag = htmlContent.includes('<html');
    const hasBodyTag = htmlContent.includes('<body');

    // Extract body content if we have a complete HTML document and in browser environment
    if (hasHtmlTag && hasBodyTag && typeof window !== 'undefined' && typeof DOMParser !== 'undefined') {
      try {
        // Create a DOM parser to extract just the body content
        const parser = new DOMParser();
        const doc = parser.parseFromString(htmlContent, 'text/html');
        const bodyContent = doc.body.innerHTML;

        if (bodyContent) {
          htmlContent = bodyContent;
        }
      } catch (error) {
        console.error('Error extracting body content:', error);
      }
    }

    // Use the centralized sanitizeHtml function
    let sanitizedContent = sanitizeHtml(htmlContent);

    // Fix URL encoding issues
    try {
      if (typeof window !== 'undefined' && typeof document !== 'undefined') {
        // Temporary element to manipulate the HTML
        const tempDiv = document.createElement('div');
        tempDiv.innerHTML = sanitizedContent;

        // Fix all links that might have been double-encoded
        const links = tempDiv.querySelectorAll('a');
        links.forEach(link => {
          const href = link.getAttribute('href');
          if (href && href.includes('%')) {
            try {
              // Try to decode URLs that might have been double-encoded
              const decodedHref = decodeURIComponent(href);
              link.setAttribute('href', decodedHref);
            } catch (e) {
              // If decoding fails, keep the original
              console.warn('Failed to decode href:', href);
            }
          }
        });

        // Fix image URLs - preserve cid: URLs for email attachments
        const images = tempDiv.querySelectorAll('img');
        images.forEach(img => {
          const src = img.getAttribute('src');
          if (src) {
            // Don't modify cid: URLs as they are handled specially in email clients
            if (src.startsWith('cid:')) {
              // Keep cid: URLs as they are
              console.log('Preserving CID reference:', src);
            }
            // Fix http:// URLs to https:// for security
            else if (src.startsWith('http://')) {
              img.setAttribute('src', src.replace('http://', 'https://'));
            }
            // Handle relative URLs that might be broken
            else if (!src.startsWith('https://') && !src.startsWith('data:')) {
              if (src.startsWith('/')) {
                img.setAttribute('src', `https://example.com${src}`);
              } else {
                img.setAttribute('src', `https://example.com/${src}`);
              }
            }
          }
        });

        // Get the fixed HTML
        sanitizedContent = tempDiv.innerHTML;
      }
    } catch (e) {
      console.error('Error fixing URLs in content:', e);
    }

    // Fix common email client quirks without breaking cid: URLs
    return sanitizedContent
      // Fix for Outlook WebVML content
      .replace(/<!--\[if\s+gte\s+mso/g, '<!--[if gte mso')
      // Fix for broken image paths WITHOUT replacing cid: URLs
      .replace(/(src|background)="(?!(?:https?:|data:|cid:))/gi, '$1="https://')
      // Fix for base64 images that might be broken across lines
      .replace(/src="data:image\/[^;]+;base64,\s*([^"]+)\s*"/gi, (match, p1) => {
        return `src="data:image/png;base64,${p1.replace(/\s+/g, '')}"`;
      });
  } catch (error) {
    console.error('Error processing HTML content:', error);
    return htmlContent;
  }
}

/**
 * Format plain text to HTML with proper line breaks and styling
 */
export function formatPlainTextToHtml(text: string): string {
  if (!text) return '';

  // Detect text direction
  const direction = detectTextDirection(text);

  // Escape HTML characters to prevent XSS
  const escapedText = text
    .replace(/&/g, '&amp;')
    .replace(/</g, '&lt;')
    .replace(/>/g, '&gt;')
    .replace(/"/g, '&quot;')
    .replace(/'/g, '&#039;');

  // Format plain text with proper line breaks and paragraphs
  const formattedText = escapedText
    .replace(/\r\n|\r|\n/g, '<br>') // Convert all newlines to <br>
    .replace(/((?:<br>){2,})/g, '</p><p>') // Convert multiple newlines to paragraphs
    .replace(/<br><\/p>/g, '</p>') // Fix any <br></p> combinations
    .replace(/<p><br>/g, '<p>'); // Fix any <p><br> combinations

  return `<div class="email-content" style="font-family: -apple-system, BlinkMacSystemFont, Menlo, Monaco, Consolas, 'Courier New', monospace; white-space: pre-wrap; line-height: 1.5; color: #333; padding: 15px; max-width: 100%; overflow-wrap: break-word;" dir="${direction}"><p>${formattedText}</p></div>`;
}