/** * Centralized Email Content Utilities * * This file contains all core functions for email content processing: * - Content extraction * - HTML sanitization * - Text direction handling * - URL fixing * * Other modules should import from this file rather than implementing their own versions. */ import { sanitizeHtml } from './dom-purify-config'; import { detectTextDirection } from './text-direction'; import { EmailContent } from '@/types/email'; /** * Extract content from various possible email formats * Centralized implementation to reduce duplication across the codebase */ export function extractEmailContent(email: any): { text: string; html: string } { // Default empty values let textContent = ''; let htmlContent = ''; // Early exit if no email if (!email) { console.log('extractEmailContent: No email provided'); return { text: '', html: '' }; } try { // Extract based on common formats if (email.content && typeof email.content === 'object') { // Standard format with content object textContent = email.content.text || ''; htmlContent = email.content.html || ''; // Handle complex email formats where content might be nested if (!textContent && !htmlContent) { // Try to find content in deeper nested structure if (email.content.body) { if (typeof email.content.body === 'string') { // Determine if body is HTML or text if (isHtmlContent(email.content.body)) { htmlContent = email.content.body; } else { textContent = email.content.body; } } else if (typeof email.content.body === 'object' && email.content.body) { // Some email formats nest content inside body htmlContent = email.content.body.html || ''; textContent = email.content.body.text || ''; } } // Check for data property which some email services use if (!textContent && !htmlContent && email.content.data) { if (typeof email.content.data === 'string') { // Check if data looks like HTML if (isHtmlContent(email.content.data)) { htmlContent = email.content.data; } else { textContent = email.content.data; } } } } } else if (typeof email.content === 'string') { // Check if content is likely HTML if (isHtmlContent(email.content)) { htmlContent = email.content; } else { textContent = email.content; } } else { // Check other common properties htmlContent = email.html || ''; textContent = email.text || ''; // If still no content, check for less common properties if (!htmlContent && !textContent) { // Try additional properties that some email clients use htmlContent = email.body?.html || email.bodyHtml || email.htmlBody || ''; textContent = email.body?.text || email.bodyText || email.plainText || ''; } } } catch (error) { console.error('Error extracting email content:', error); } // Ensure we always have at least some text content if (!textContent && htmlContent) { textContent = extractTextFromHtml(htmlContent); } // Log extraction results console.log('Extracted email content:', { hasHtml: !!htmlContent, htmlLength: htmlContent?.length || 0, hasText: !!textContent, textLength: textContent?.length || 0 }); return { text: textContent, html: htmlContent }; } /** * Extract plain text from HTML content */ export function extractTextFromHtml(html: string): string { if (!html) return ''; try { // Use DOM API if available if (typeof window !== 'undefined' && typeof document !== 'undefined') { const tempDiv = document.createElement('div'); tempDiv.innerHTML = html; return tempDiv.textContent || tempDiv.innerText || ''; } else { // Simple regex fallback for non-browser environments return html.replace(/<[^>]*>/g, ' ') .replace(/ /g, ' ') .replace(/</g, '<') .replace(/>/g, '>') .replace(/&/g, '&') .replace(/\s+/g, ' ') .trim(); } } catch (e) { console.error('Error extracting text from HTML:', e); // Fallback to basic strip return html.replace(/<[^>]*>/g, ' ').trim(); } } /** * Check if a string is likely HTML content */ export function isHtmlContent(content: string): boolean { if (!content) return false; return content.trim().startsWith('<') && (content.includes('') || content.includes('
')); } /** * Format and standardize email content for display following email industry standards. * This is the main entry point for rendering email content. */ export function formatEmailContent(email: any): string { if (!email) { console.log('formatEmailContent: No email provided'); return ''; } try { // Extract content from email const { text, html } = extractEmailContent(email); console.log('formatEmailContent processing:', { hasHtml: !!html, htmlLength: html?.length || 0, hasText: !!text, textLength: text?.length || 0, emailType: typeof email === 'string' ? 'string' : 'object' }); // If we have HTML content, sanitize and standardize it if (html) { // Process HTML content let processedHtml = processHtmlContent(html, text); console.log('HTML content processed:', { processedLength: processedHtml?.length || 0, isEmpty: !processedHtml || processedHtml.trim().length === 0 }); // Apply styling return `
${processedHtml}
`; } // If we only have text content, format it properly else if (text) { console.log('Using plain text formatting'); return formatPlainTextToHtml(text); } // Default case: empty or unrecognized content return '
No content available
'; } catch (error) { console.error('formatEmailContent: Error formatting email content:', error); return `

Error displaying email content

${error instanceof Error ? error.message : 'Unknown error'}

`; } } /** * Process HTML content to fix common email rendering issues */ export function processHtmlContent(htmlContent: string, textContent?: string): string { if (!htmlContent) return ''; try { console.log('processHtmlContent input:', { length: htmlContent.length, startsWithHtml: htmlContent.trim().startsWith(' { const href = link.getAttribute('href'); if (href && href.includes('%')) { try { // Try to decode URLs that might have been double-encoded const decodedHref = decodeURIComponent(href); link.setAttribute('href', decodedHref); } catch (e) { // If decoding fails, keep the original console.warn('Failed to decode href:', href); } } }); // Fix image URLs - preserve cid: URLs for email attachments const images = tempDiv.querySelectorAll('img'); images.forEach(img => { const src = img.getAttribute('src'); if (src) { // Don't modify cid: URLs as they are handled specially in email clients if (src.startsWith('cid:')) { // Keep cid: URLs as they are console.log('Preserving CID reference:', src); } // Fix http:// URLs to https:// for security else if (src.startsWith('http://')) { img.setAttribute('src', src.replace('http://', 'https://')); } // Handle relative URLs that might be broken else if (!src.startsWith('https://') && !src.startsWith('data:')) { if (src.startsWith('/')) { img.setAttribute('src', `https://example.com${src}`); } else { img.setAttribute('src', `https://example.com/${src}`); } } } }); // Clean up excessive whitespace and empty elements // Find all text nodes and normalize whitespace const walker = document.createTreeWalker( tempDiv, NodeFilter.SHOW_TEXT, null ); const textNodes = []; while (walker.nextNode()) { textNodes.push(walker.currentNode); } // Process text nodes to normalize whitespace textNodes.forEach(node => { if (node.nodeValue) { // Replace sequences of whitespace with a single space node.nodeValue = node.nodeValue.replace(/\s+/g, ' ').trim(); } }); // Remove empty paragraphs and divs that contain only whitespace const emptyElements = tempDiv.querySelectorAll('p, div, span'); emptyElements.forEach(el => { if (el.innerHTML.trim() === '' || el.innerHTML === ' ') { el.parentNode?.removeChild(el); } }); // Remove excessive consecutive
tags (more than 2) let html = tempDiv.innerHTML; html = html.replace(/(\s*){3,}/gi, '

'); tempDiv.innerHTML = html; // Get the fixed HTML sanitizedContent = tempDiv.innerHTML; } } catch (e) { console.error('Error fixing content:', e); } // Fix common email client quirks without breaking cid: URLs return sanitizedContent // Fix for Outlook WebVML content .replace(/