312 lines
11 KiB
TypeScript
312 lines
11 KiB
TypeScript
/**
|
|
* Centralized Email Content Utilities
|
|
*
|
|
* This file contains all core functions for email content processing:
|
|
* - Content extraction
|
|
* - HTML sanitization
|
|
* - Text direction handling
|
|
* - URL fixing
|
|
*
|
|
* Other modules should import from this file rather than implementing their own versions.
|
|
*/
|
|
|
|
import { sanitizeHtml } from './dom-purify-config';
|
|
import { detectTextDirection } from './text-direction';
|
|
import { EmailContent } from '@/types/email';
|
|
|
|
/**
|
|
* Extract content from various possible email formats
|
|
* Centralized implementation to reduce duplication across the codebase
|
|
*/
|
|
export function extractEmailContent(email: any): { text: string; html: string } {
|
|
// Default empty values
|
|
let textContent = '';
|
|
let htmlContent = '';
|
|
|
|
// Early exit if no email
|
|
if (!email) {
|
|
console.log('extractEmailContent: No email provided');
|
|
return { text: '', html: '' };
|
|
}
|
|
|
|
try {
|
|
// Extract based on common formats
|
|
if (email.content && typeof email.content === 'object') {
|
|
// Standard format with content object
|
|
textContent = email.content.text || '';
|
|
htmlContent = email.content.html || '';
|
|
|
|
// Handle complex email formats where content might be nested
|
|
if (!textContent && !htmlContent) {
|
|
// Try to find content in deeper nested structure
|
|
if (email.content.body) {
|
|
if (typeof email.content.body === 'string') {
|
|
// Determine if body is HTML or text
|
|
if (isHtmlContent(email.content.body)) {
|
|
htmlContent = email.content.body;
|
|
} else {
|
|
textContent = email.content.body;
|
|
}
|
|
} else if (typeof email.content.body === 'object' && email.content.body) {
|
|
// Some email formats nest content inside body
|
|
htmlContent = email.content.body.html || '';
|
|
textContent = email.content.body.text || '';
|
|
}
|
|
}
|
|
|
|
// Check for data property which some email services use
|
|
if (!textContent && !htmlContent && email.content.data) {
|
|
if (typeof email.content.data === 'string') {
|
|
// Check if data looks like HTML
|
|
if (isHtmlContent(email.content.data)) {
|
|
htmlContent = email.content.data;
|
|
} else {
|
|
textContent = email.content.data;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} else if (typeof email.content === 'string') {
|
|
// Check if content is likely HTML
|
|
if (isHtmlContent(email.content)) {
|
|
htmlContent = email.content;
|
|
} else {
|
|
textContent = email.content;
|
|
}
|
|
} else {
|
|
// Check other common properties
|
|
htmlContent = email.html || '';
|
|
textContent = email.text || '';
|
|
|
|
// If still no content, check for less common properties
|
|
if (!htmlContent && !textContent) {
|
|
// Try additional properties that some email clients use
|
|
htmlContent = email.body?.html || email.bodyHtml || email.htmlBody || '';
|
|
textContent = email.body?.text || email.bodyText || email.plainText || '';
|
|
}
|
|
}
|
|
} catch (error) {
|
|
console.error('Error extracting email content:', error);
|
|
}
|
|
|
|
// Ensure we always have at least some text content
|
|
if (!textContent && htmlContent) {
|
|
textContent = extractTextFromHtml(htmlContent);
|
|
}
|
|
|
|
// Log extraction results
|
|
console.log('Extracted email content:', {
|
|
hasHtml: !!htmlContent,
|
|
htmlLength: htmlContent?.length || 0,
|
|
hasText: !!textContent,
|
|
textLength: textContent?.length || 0
|
|
});
|
|
|
|
return { text: textContent, html: htmlContent };
|
|
}
|
|
|
|
/**
|
|
* Extract plain text from HTML content
|
|
*/
|
|
export function extractTextFromHtml(html: string): string {
|
|
if (!html) return '';
|
|
|
|
try {
|
|
// Use DOM API if available
|
|
if (typeof window !== 'undefined' && typeof document !== 'undefined') {
|
|
const tempDiv = document.createElement('div');
|
|
tempDiv.innerHTML = html;
|
|
return tempDiv.textContent || tempDiv.innerText || '';
|
|
} else {
|
|
// Simple regex fallback for non-browser environments
|
|
return html.replace(/<[^>]*>/g, ' ')
|
|
.replace(/ /g, ' ')
|
|
.replace(/</g, '<')
|
|
.replace(/>/g, '>')
|
|
.replace(/&/g, '&')
|
|
.replace(/\s+/g, ' ')
|
|
.trim();
|
|
}
|
|
} catch (e) {
|
|
console.error('Error extracting text from HTML:', e);
|
|
// Fallback to basic strip
|
|
return html.replace(/<[^>]*>/g, ' ').trim();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if a string is likely HTML content
|
|
*/
|
|
export function isHtmlContent(content: string): boolean {
|
|
if (!content) return false;
|
|
|
|
return content.trim().startsWith('<') &&
|
|
(content.includes('<html') ||
|
|
content.includes('<body') ||
|
|
content.includes('<div') ||
|
|
content.includes('<p>') ||
|
|
content.includes('<br>'));
|
|
}
|
|
|
|
/**
|
|
* Format and standardize email content for display following email industry standards.
|
|
* This is the main entry point for rendering email content.
|
|
*/
|
|
export function formatEmailContent(email: any): string {
|
|
if (!email) {
|
|
console.log('formatEmailContent: No email provided');
|
|
return '';
|
|
}
|
|
|
|
try {
|
|
// Extract content from email
|
|
const { text, html } = extractEmailContent(email);
|
|
|
|
// If we have HTML content, sanitize and standardize it
|
|
if (html) {
|
|
// Process HTML content
|
|
let processedHtml = processHtmlContent(html, text);
|
|
|
|
// Apply styling
|
|
return `<div class="email-content" style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif; line-height: 1.6; color: #333; max-width: 100%; overflow-x: auto; overflow-wrap: break-word; word-wrap: break-word;" dir="${detectTextDirection(text)}">${processedHtml}</div>`;
|
|
}
|
|
// If we only have text content, format it properly
|
|
else if (text) {
|
|
return formatPlainTextToHtml(text);
|
|
}
|
|
|
|
// Default case: empty or unrecognized content
|
|
return '<div class="email-content-empty" style="padding: 20px; text-align: center; color: #666;">No content available</div>';
|
|
} catch (error) {
|
|
console.error('formatEmailContent: Error formatting email content:', error);
|
|
return `<div class="email-content-error" style="padding: 15px; color: #721c24; background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 4px;"><p>Error displaying email content</p><p style="font-size: 12px; margin-top: 10px;">${error instanceof Error ? error.message : 'Unknown error'}</p></div>`;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Process HTML content to fix common email rendering issues
|
|
*/
|
|
export function processHtmlContent(htmlContent: string, textContent?: string): string {
|
|
if (!htmlContent) return '';
|
|
|
|
try {
|
|
// Check for browser environment (DOMParser is browser-only)
|
|
const hasHtmlTag = htmlContent.includes('<html');
|
|
const hasBodyTag = htmlContent.includes('<body');
|
|
|
|
// Extract body content if we have a complete HTML document and in browser environment
|
|
if (hasHtmlTag && hasBodyTag && typeof window !== 'undefined' && typeof DOMParser !== 'undefined') {
|
|
try {
|
|
// Create a DOM parser to extract just the body content
|
|
const parser = new DOMParser();
|
|
const doc = parser.parseFromString(htmlContent, 'text/html');
|
|
const bodyContent = doc.body.innerHTML;
|
|
|
|
if (bodyContent) {
|
|
htmlContent = bodyContent;
|
|
}
|
|
} catch (error) {
|
|
console.error('Error extracting body content:', error);
|
|
}
|
|
}
|
|
|
|
// Use the centralized sanitizeHtml function
|
|
let sanitizedContent = sanitizeHtml(htmlContent);
|
|
|
|
// Fix URL encoding issues
|
|
try {
|
|
if (typeof window !== 'undefined' && typeof document !== 'undefined') {
|
|
// Temporary element to manipulate the HTML
|
|
const tempDiv = document.createElement('div');
|
|
tempDiv.innerHTML = sanitizedContent;
|
|
|
|
// Fix all links that might have been double-encoded
|
|
const links = tempDiv.querySelectorAll('a');
|
|
links.forEach(link => {
|
|
const href = link.getAttribute('href');
|
|
if (href && href.includes('%')) {
|
|
try {
|
|
// Try to decode URLs that might have been double-encoded
|
|
const decodedHref = decodeURIComponent(href);
|
|
link.setAttribute('href', decodedHref);
|
|
} catch (e) {
|
|
// If decoding fails, keep the original
|
|
console.warn('Failed to decode href:', href);
|
|
}
|
|
}
|
|
});
|
|
|
|
// Fix image URLs - preserve cid: URLs for email attachments
|
|
const images = tempDiv.querySelectorAll('img');
|
|
images.forEach(img => {
|
|
const src = img.getAttribute('src');
|
|
if (src) {
|
|
// Don't modify cid: URLs as they are handled specially in email clients
|
|
if (src.startsWith('cid:')) {
|
|
// Keep cid: URLs as they are
|
|
console.log('Preserving CID reference:', src);
|
|
}
|
|
// Fix http:// URLs to https:// for security
|
|
else if (src.startsWith('http://')) {
|
|
img.setAttribute('src', src.replace('http://', 'https://'));
|
|
}
|
|
// Handle relative URLs that might be broken
|
|
else if (!src.startsWith('https://') && !src.startsWith('data:')) {
|
|
if (src.startsWith('/')) {
|
|
img.setAttribute('src', `https://example.com${src}`);
|
|
} else {
|
|
img.setAttribute('src', `https://example.com/${src}`);
|
|
}
|
|
}
|
|
}
|
|
});
|
|
|
|
// Get the fixed HTML
|
|
sanitizedContent = tempDiv.innerHTML;
|
|
}
|
|
} catch (e) {
|
|
console.error('Error fixing URLs in content:', e);
|
|
}
|
|
|
|
// Fix common email client quirks without breaking cid: URLs
|
|
return sanitizedContent
|
|
// Fix for Outlook WebVML content
|
|
.replace(/<!--\[if\s+gte\s+mso/g, '<!--[if gte mso')
|
|
// Fix for broken image paths WITHOUT replacing cid: URLs
|
|
.replace(/(src|background)="(?!(?:https?:|data:|cid:))/gi, '$1="https://')
|
|
// Fix for base64 images that might be broken across lines
|
|
.replace(/src="data:image\/[^;]+;base64,\s*([^"]+)\s*"/gi, (match, p1) => {
|
|
return `src="data:image/png;base64,${p1.replace(/\s+/g, '')}"`;
|
|
});
|
|
} catch (error) {
|
|
console.error('Error processing HTML content:', error);
|
|
return htmlContent;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Format plain text to HTML with proper line breaks and styling
|
|
*/
|
|
export function formatPlainTextToHtml(text: string): string {
|
|
if (!text) return '';
|
|
|
|
// Detect text direction
|
|
const direction = detectTextDirection(text);
|
|
|
|
// Escape HTML characters to prevent XSS
|
|
const escapedText = text
|
|
.replace(/&/g, '&')
|
|
.replace(/</g, '<')
|
|
.replace(/>/g, '>')
|
|
.replace(/"/g, '"')
|
|
.replace(/'/g, ''');
|
|
|
|
// Format plain text with proper line breaks and paragraphs
|
|
const formattedText = escapedText
|
|
.replace(/\r\n|\r|\n/g, '<br>') // Convert all newlines to <br>
|
|
.replace(/((?:<br>){2,})/g, '</p><p>') // Convert multiple newlines to paragraphs
|
|
.replace(/<br><\/p>/g, '</p>') // Fix any <br></p> combinations
|
|
.replace(/<p><br>/g, '<p>'); // Fix any <p><br> combinations
|
|
|
|
return `<div class="email-content" style="font-family: -apple-system, BlinkMacSystemFont, Menlo, Monaco, Consolas, 'Courier New', monospace; white-space: pre-wrap; line-height: 1.5; color: #333; padding: 15px; max-width: 100%; overflow-wrap: break-word;" dir="${direction}"><p>${formattedText}</p></div>`;
|
|
}
|