Neah/lib/utils/email-content.ts
2025-05-01 12:30:13 +02:00

312 lines
11 KiB
TypeScript

/**
* Centralized Email Content Utilities
*
* This file contains all core functions for email content processing:
* - Content extraction
* - HTML sanitization
* - Text direction handling
* - URL fixing
*
* Other modules should import from this file rather than implementing their own versions.
*/
import { sanitizeHtml } from './dom-purify-config';
import { detectTextDirection } from './text-direction';
import { EmailContent } from '@/types/email';
/**
* Extract content from various possible email formats
* Centralized implementation to reduce duplication across the codebase
*/
export function extractEmailContent(email: any): { text: string; html: string } {
// Default empty values
let textContent = '';
let htmlContent = '';
// Early exit if no email
if (!email) {
console.log('extractEmailContent: No email provided');
return { text: '', html: '' };
}
try {
// Extract based on common formats
if (email.content && typeof email.content === 'object') {
// Standard format with content object
textContent = email.content.text || '';
htmlContent = email.content.html || '';
// Handle complex email formats where content might be nested
if (!textContent && !htmlContent) {
// Try to find content in deeper nested structure
if (email.content.body) {
if (typeof email.content.body === 'string') {
// Determine if body is HTML or text
if (isHtmlContent(email.content.body)) {
htmlContent = email.content.body;
} else {
textContent = email.content.body;
}
} else if (typeof email.content.body === 'object' && email.content.body) {
// Some email formats nest content inside body
htmlContent = email.content.body.html || '';
textContent = email.content.body.text || '';
}
}
// Check for data property which some email services use
if (!textContent && !htmlContent && email.content.data) {
if (typeof email.content.data === 'string') {
// Check if data looks like HTML
if (isHtmlContent(email.content.data)) {
htmlContent = email.content.data;
} else {
textContent = email.content.data;
}
}
}
}
} else if (typeof email.content === 'string') {
// Check if content is likely HTML
if (isHtmlContent(email.content)) {
htmlContent = email.content;
} else {
textContent = email.content;
}
} else {
// Check other common properties
htmlContent = email.html || '';
textContent = email.text || '';
// If still no content, check for less common properties
if (!htmlContent && !textContent) {
// Try additional properties that some email clients use
htmlContent = email.body?.html || email.bodyHtml || email.htmlBody || '';
textContent = email.body?.text || email.bodyText || email.plainText || '';
}
}
} catch (error) {
console.error('Error extracting email content:', error);
}
// Ensure we always have at least some text content
if (!textContent && htmlContent) {
textContent = extractTextFromHtml(htmlContent);
}
// Log extraction results
console.log('Extracted email content:', {
hasHtml: !!htmlContent,
htmlLength: htmlContent?.length || 0,
hasText: !!textContent,
textLength: textContent?.length || 0
});
return { text: textContent, html: htmlContent };
}
/**
* Extract plain text from HTML content
*/
export function extractTextFromHtml(html: string): string {
if (!html) return '';
try {
// Use DOM API if available
if (typeof window !== 'undefined' && typeof document !== 'undefined') {
const tempDiv = document.createElement('div');
tempDiv.innerHTML = html;
return tempDiv.textContent || tempDiv.innerText || '';
} else {
// Simple regex fallback for non-browser environments
return html.replace(/<[^>]*>/g, ' ')
.replace(/&nbsp;/g, ' ')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&amp;/g, '&')
.replace(/\s+/g, ' ')
.trim();
}
} catch (e) {
console.error('Error extracting text from HTML:', e);
// Fallback to basic strip
return html.replace(/<[^>]*>/g, ' ').trim();
}
}
/**
* Check if a string is likely HTML content
*/
export function isHtmlContent(content: string): boolean {
if (!content) return false;
return content.trim().startsWith('<') &&
(content.includes('<html') ||
content.includes('<body') ||
content.includes('<div') ||
content.includes('<p>') ||
content.includes('<br>'));
}
/**
* Format and standardize email content for display following email industry standards.
* This is the main entry point for rendering email content.
*/
export function formatEmailContent(email: any): string {
if (!email) {
console.log('formatEmailContent: No email provided');
return '';
}
try {
// Extract content from email
const { text, html } = extractEmailContent(email);
// If we have HTML content, sanitize and standardize it
if (html) {
// Process HTML content
let processedHtml = processHtmlContent(html, text);
// Apply styling
return `<div class="email-content" style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif; line-height: 1.6; color: #333; max-width: 100%; overflow-x: auto; overflow-wrap: break-word; word-wrap: break-word;" dir="${detectTextDirection(text)}">${processedHtml}</div>`;
}
// If we only have text content, format it properly
else if (text) {
return formatPlainTextToHtml(text);
}
// Default case: empty or unrecognized content
return '<div class="email-content-empty" style="padding: 20px; text-align: center; color: #666;">No content available</div>';
} catch (error) {
console.error('formatEmailContent: Error formatting email content:', error);
return `<div class="email-content-error" style="padding: 15px; color: #721c24; background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 4px;"><p>Error displaying email content</p><p style="font-size: 12px; margin-top: 10px;">${error instanceof Error ? error.message : 'Unknown error'}</p></div>`;
}
}
/**
* Process HTML content to fix common email rendering issues
*/
export function processHtmlContent(htmlContent: string, textContent?: string): string {
if (!htmlContent) return '';
try {
// Check for browser environment (DOMParser is browser-only)
const hasHtmlTag = htmlContent.includes('<html');
const hasBodyTag = htmlContent.includes('<body');
// Extract body content if we have a complete HTML document and in browser environment
if (hasHtmlTag && hasBodyTag && typeof window !== 'undefined' && typeof DOMParser !== 'undefined') {
try {
// Create a DOM parser to extract just the body content
const parser = new DOMParser();
const doc = parser.parseFromString(htmlContent, 'text/html');
const bodyContent = doc.body.innerHTML;
if (bodyContent) {
htmlContent = bodyContent;
}
} catch (error) {
console.error('Error extracting body content:', error);
}
}
// Use the centralized sanitizeHtml function
let sanitizedContent = sanitizeHtml(htmlContent);
// Fix URL encoding issues
try {
if (typeof window !== 'undefined' && typeof document !== 'undefined') {
// Temporary element to manipulate the HTML
const tempDiv = document.createElement('div');
tempDiv.innerHTML = sanitizedContent;
// Fix all links that might have been double-encoded
const links = tempDiv.querySelectorAll('a');
links.forEach(link => {
const href = link.getAttribute('href');
if (href && href.includes('%')) {
try {
// Try to decode URLs that might have been double-encoded
const decodedHref = decodeURIComponent(href);
link.setAttribute('href', decodedHref);
} catch (e) {
// If decoding fails, keep the original
console.warn('Failed to decode href:', href);
}
}
});
// Fix image URLs - preserve cid: URLs for email attachments
const images = tempDiv.querySelectorAll('img');
images.forEach(img => {
const src = img.getAttribute('src');
if (src) {
// Don't modify cid: URLs as they are handled specially in email clients
if (src.startsWith('cid:')) {
// Keep cid: URLs as they are
console.log('Preserving CID reference:', src);
}
// Fix http:// URLs to https:// for security
else if (src.startsWith('http://')) {
img.setAttribute('src', src.replace('http://', 'https://'));
}
// Handle relative URLs that might be broken
else if (!src.startsWith('https://') && !src.startsWith('data:')) {
if (src.startsWith('/')) {
img.setAttribute('src', `https://example.com${src}`);
} else {
img.setAttribute('src', `https://example.com/${src}`);
}
}
}
});
// Get the fixed HTML
sanitizedContent = tempDiv.innerHTML;
}
} catch (e) {
console.error('Error fixing URLs in content:', e);
}
// Fix common email client quirks without breaking cid: URLs
return sanitizedContent
// Fix for Outlook WebVML content
.replace(/<!--\[if\s+gte\s+mso/g, '<!--[if gte mso')
// Fix for broken image paths WITHOUT replacing cid: URLs
.replace(/(src|background)="(?!(?:https?:|data:|cid:))/gi, '$1="https://')
// Fix for base64 images that might be broken across lines
.replace(/src="data:image\/[^;]+;base64,\s*([^"]+)\s*"/gi, (match, p1) => {
return `src="data:image/png;base64,${p1.replace(/\s+/g, '')}"`;
});
} catch (error) {
console.error('Error processing HTML content:', error);
return htmlContent;
}
}
/**
* Format plain text to HTML with proper line breaks and styling
*/
export function formatPlainTextToHtml(text: string): string {
if (!text) return '';
// Detect text direction
const direction = detectTextDirection(text);
// Escape HTML characters to prevent XSS
const escapedText = text
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#039;');
// Format plain text with proper line breaks and paragraphs
const formattedText = escapedText
.replace(/\r\n|\r|\n/g, '<br>') // Convert all newlines to <br>
.replace(/((?:<br>){2,})/g, '</p><p>') // Convert multiple newlines to paragraphs
.replace(/<br><\/p>/g, '</p>') // Fix any <br></p> combinations
.replace(/<p><br>/g, '<p>'); // Fix any <p><br> combinations
return `<div class="email-content" style="font-family: -apple-system, BlinkMacSystemFont, Menlo, Monaco, Consolas, 'Courier New', monospace; white-space: pre-wrap; line-height: 1.5; color: #333; padding: 15px; max-width: 100%; overflow-wrap: break-word;" dir="${direction}"><p>${formattedText}</p></div>`;
}