386 lines
14 KiB
TypeScript
386 lines
14 KiB
TypeScript
/**
|
|
* Centralized Email Content Utilities
|
|
*
|
|
* This file contains all core functions for email content processing:
|
|
* - Content extraction
|
|
* - HTML sanitization
|
|
* - Text direction handling
|
|
* - URL fixing
|
|
*
|
|
* Other modules should import from this file rather than implementing their own versions.
|
|
*/
|
|
|
|
import { sanitizeHtml } from './dom-purify-config';
|
|
import { detectTextDirection } from './text-direction';
|
|
import { EmailContent } from '@/types/email';
|
|
import { processCidReferences } from './email-utils';
|
|
|
|
/**
|
|
* Extract content from various possible email formats
|
|
* Centralized implementation to reduce duplication across the codebase
|
|
*/
|
|
export function extractEmailContent(email: any): { text: string; html: string; isHtml: boolean; direction: 'ltr' | 'rtl'; } {
|
|
// Default empty values
|
|
let textContent = '';
|
|
let htmlContent = '';
|
|
let isHtml = false;
|
|
let direction: 'ltr' | 'rtl' = 'ltr';
|
|
|
|
// Early exit if no email
|
|
if (!email) {
|
|
console.log('extractEmailContent: No email provided');
|
|
return { text: '', html: '', isHtml: false, direction: 'ltr' };
|
|
}
|
|
|
|
try {
|
|
// Extract based on common formats
|
|
if (email.content && typeof email.content === 'object') {
|
|
// Standard format with content object
|
|
textContent = email.content.text || '';
|
|
htmlContent = email.content.html || '';
|
|
isHtml = email.content.isHtml || !!htmlContent;
|
|
direction = email.content.direction || 'ltr';
|
|
|
|
// Handle complex email formats where content might be nested
|
|
if (!textContent && !htmlContent) {
|
|
// Try to find content in deeper nested structure
|
|
if (email.content.body) {
|
|
if (typeof email.content.body === 'string') {
|
|
// Determine if body is HTML or text
|
|
if (isHtmlContent(email.content.body)) {
|
|
htmlContent = email.content.body;
|
|
isHtml = true;
|
|
} else {
|
|
textContent = email.content.body;
|
|
isHtml = false;
|
|
}
|
|
} else if (typeof email.content.body === 'object' && email.content.body) {
|
|
// Some email formats nest content inside body
|
|
htmlContent = email.content.body.html || '';
|
|
textContent = email.content.body.text || '';
|
|
isHtml = email.content.body.isHtml || !!htmlContent;
|
|
direction = email.content.body.direction || 'ltr';
|
|
}
|
|
}
|
|
|
|
// Check for data property which some email services use
|
|
if (!textContent && !htmlContent && email.content.data) {
|
|
if (typeof email.content.data === 'string') {
|
|
// Check if data looks like HTML
|
|
if (isHtmlContent(email.content.data)) {
|
|
htmlContent = email.content.data;
|
|
isHtml = true;
|
|
} else {
|
|
textContent = email.content.data;
|
|
isHtml = false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} else if (typeof email.content === 'string') {
|
|
// Check if content is likely HTML
|
|
if (isHtmlContent(email.content)) {
|
|
htmlContent = email.content;
|
|
isHtml = true;
|
|
} else {
|
|
textContent = email.content;
|
|
isHtml = false;
|
|
}
|
|
} else {
|
|
// Check other common properties
|
|
htmlContent = email.html || '';
|
|
textContent = email.text || '';
|
|
isHtml = email.isHtml || !!htmlContent;
|
|
direction = email.direction || 'ltr';
|
|
|
|
// If still no content, check for less common properties
|
|
if (!htmlContent && !textContent) {
|
|
// Try additional properties that some email clients use
|
|
htmlContent = email.body?.html || email.bodyHtml || email.htmlBody || '';
|
|
textContent = email.body?.text || email.bodyText || email.plainText || '';
|
|
isHtml = email.body?.isHtml || !!htmlContent;
|
|
direction = email.body?.direction || 'ltr';
|
|
}
|
|
}
|
|
} catch (error) {
|
|
console.error('Error extracting email content:', error);
|
|
}
|
|
|
|
// Ensure we always have at least some text content
|
|
if (!textContent && htmlContent) {
|
|
textContent = extractTextFromHtml(htmlContent);
|
|
}
|
|
|
|
// Log extraction results
|
|
console.log('Extracted email content:', {
|
|
hasHtml: !!htmlContent,
|
|
htmlLength: htmlContent?.length || 0,
|
|
hasText: !!textContent,
|
|
textLength: textContent?.length || 0,
|
|
isHtml,
|
|
direction
|
|
});
|
|
|
|
return { text: textContent, html: htmlContent, isHtml, direction };
|
|
}
|
|
|
|
/**
|
|
* Extract plain text from HTML content
|
|
*/
|
|
export function extractTextFromHtml(html: string): string {
|
|
if (!html) return '';
|
|
|
|
try {
|
|
// Use DOM API if available
|
|
if (typeof window !== 'undefined' && typeof document !== 'undefined') {
|
|
const tempDiv = document.createElement('div');
|
|
tempDiv.innerHTML = html;
|
|
return tempDiv.textContent || tempDiv.innerText || '';
|
|
} else {
|
|
// Simple regex fallback for non-browser environments
|
|
return html.replace(/<[^>]*>/g, ' ')
|
|
.replace(/ /g, ' ')
|
|
.replace(/</g, '<')
|
|
.replace(/>/g, '>')
|
|
.replace(/&/g, '&')
|
|
.replace(/\s+/g, ' ')
|
|
.trim();
|
|
}
|
|
} catch (e) {
|
|
console.error('Error extracting text from HTML:', e);
|
|
// Fallback to basic strip
|
|
return html.replace(/<[^>]*>/g, ' ').trim();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if a string is likely HTML content
|
|
*/
|
|
export function isHtmlContent(content: string): boolean {
|
|
if (!content) return false;
|
|
|
|
return content.trim().startsWith('<') &&
|
|
(content.includes('<html') ||
|
|
content.includes('<body') ||
|
|
content.includes('<div') ||
|
|
content.includes('<p>') ||
|
|
content.includes('<br>'));
|
|
}
|
|
|
|
/**
|
|
* Format and standardize email content for display following email industry standards.
|
|
* This is the main entry point for rendering email content.
|
|
*/
|
|
export function formatEmailContent(email: any): string {
|
|
if (!email) {
|
|
console.log('formatEmailContent: No email provided');
|
|
return '';
|
|
}
|
|
|
|
try {
|
|
// Extract content from email
|
|
const { text, html, isHtml, direction } = extractEmailContent(email);
|
|
|
|
console.log('formatEmailContent processing:', {
|
|
hasHtml: !!html,
|
|
htmlLength: html?.length || 0,
|
|
hasText: !!text,
|
|
textLength: text?.length || 0,
|
|
emailType: typeof email === 'string' ? 'string' : 'object',
|
|
isHtml,
|
|
direction
|
|
});
|
|
|
|
// If we have HTML content, sanitize and standardize it
|
|
if (html) {
|
|
// Process HTML content
|
|
const processed = processHtmlContent(html, { sanitize: true });
|
|
|
|
console.log('HTML content processed:', {
|
|
processedLength: processed.sanitizedContent?.length || 0,
|
|
isEmpty: !processed.sanitizedContent || processed.sanitizedContent.trim().length === 0
|
|
});
|
|
|
|
// Apply styling
|
|
return `<div class="email-content" style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif; line-height: 1.6; color: #333; max-width: 100%; overflow-x: auto; overflow-wrap: break-word; word-wrap: break-word;" dir="${processed.direction}">${processed.sanitizedContent}</div>`;
|
|
}
|
|
// If we only have text content, format it properly
|
|
else if (text) {
|
|
console.log('Using plain text formatting');
|
|
return formatPlainTextToHtml(text);
|
|
}
|
|
|
|
// Default case: empty or unrecognized content
|
|
return '<div class="email-content-empty" style="padding: 20px; text-align: center; color: #666;">No content available</div>';
|
|
} catch (error) {
|
|
console.error('formatEmailContent: Error formatting email content:', error);
|
|
return `<div class="email-content-error" style="padding: 15px; color: #721c24; background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 4px;"><p>Error displaying email content</p><p style="font-size: 12px; margin-top: 10px;">${error instanceof Error ? error.message : 'Unknown error'}</p></div>`;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Process HTML content to ensure safe rendering and proper formatting
|
|
*/
|
|
export function processHtmlContent(
|
|
htmlContent: string,
|
|
options?: {
|
|
sanitize?: boolean;
|
|
blockExternalContent?: boolean;
|
|
preserveReplyFormat?: boolean;
|
|
attachments?: Array<{
|
|
filename?: string;
|
|
name?: string;
|
|
contentType?: string;
|
|
content?: string;
|
|
contentId?: string;
|
|
}>;
|
|
} | string // Support for legacy textContent parameter
|
|
): {
|
|
sanitizedContent: string;
|
|
hasImages: boolean;
|
|
hasExternalContent: boolean;
|
|
direction: 'ltr' | 'rtl';
|
|
} {
|
|
// Handle legacy string parameter (textContent)
|
|
if (typeof options === 'string') {
|
|
options = { sanitize: true };
|
|
}
|
|
|
|
console.log('Processing HTML content:', {
|
|
contentLength: htmlContent?.length || 0,
|
|
startsWithHtml: htmlContent?.startsWith('<html'),
|
|
startsWithDiv: htmlContent?.startsWith('<div'),
|
|
containsForwardedMessage: htmlContent?.includes('---------- Forwarded message ----------'),
|
|
containsQuoteHeader: htmlContent?.includes('<div class="gmail_quote"'),
|
|
sanitize: options?.sanitize,
|
|
preserveReplyFormat: options?.preserveReplyFormat,
|
|
blockExternalContent: options?.blockExternalContent,
|
|
hasAttachments: options?.attachments?.length || 0
|
|
});
|
|
|
|
if (!htmlContent) {
|
|
return {
|
|
sanitizedContent: '',
|
|
hasImages: false,
|
|
hasExternalContent: false,
|
|
direction: 'ltr',
|
|
};
|
|
}
|
|
|
|
// Store the original content for comparison
|
|
const originalContent = htmlContent;
|
|
|
|
// Process CID references before sanitization
|
|
if (options?.attachments?.length) {
|
|
console.log('Processing CID references in processHtmlContent');
|
|
htmlContent = processCidReferences(htmlContent, options.attachments);
|
|
}
|
|
|
|
try {
|
|
// Special handling for reply/forwarded content with less aggressive sanitization
|
|
const isReplyOrForward = options?.preserveReplyFormat === true;
|
|
|
|
// Apply sanitization by default unless explicitly turned off
|
|
let sanitizedContent = (options?.sanitize !== false)
|
|
? sanitizeHtml(htmlContent, { preserveReplyFormat: isReplyOrForward })
|
|
: htmlContent;
|
|
|
|
// Log content changes from sanitization
|
|
console.log('HTML sanitization results:', {
|
|
originalLength: originalContent.length,
|
|
sanitizedLength: sanitizedContent.length,
|
|
difference: originalContent.length - sanitizedContent.length,
|
|
percentRemoved: ((originalContent.length - sanitizedContent.length) / originalContent.length * 100).toFixed(2) + '%',
|
|
isEmpty: !sanitizedContent || sanitizedContent.trim().length === 0,
|
|
isReplyOrForward: isReplyOrForward
|
|
});
|
|
|
|
// Detect if content is a forwarded message to ensure special handling for tables
|
|
const isForwardedEmail =
|
|
sanitizedContent.includes('---------- Forwarded message ----------') ||
|
|
sanitizedContent.includes('Forwarded message') ||
|
|
(sanitizedContent.includes('From:') && sanitizedContent.includes('Date:') &&
|
|
sanitizedContent.includes('Subject:') && sanitizedContent.includes('To:'));
|
|
|
|
// Special processing for forwarded email styling
|
|
if (isForwardedEmail || isReplyOrForward) {
|
|
console.log('Detected forwarded email or reply content, enhancing structure');
|
|
// Make sure we're not removing important table structures
|
|
sanitizedContent = sanitizedContent
|
|
// Preserve table styling for email headers
|
|
.replace(/<table([^>]*)>/g, '<table$1 style="margin: 10px 0; border-collapse: collapse; font-size: 13px; color: #333;">')
|
|
.replace(/<td([^>]*)>/g, '<td$1 style="padding: 3px 5px; vertical-align: top;">')
|
|
// Ensure blockquote styling is preserved
|
|
.replace(/<blockquote([^>]*)>/g, '<blockquote$1 style="margin: 0; padding-left: 10px; border-left: 3px solid #ddd; color: #505050; background-color: #f9f9f9; padding: 10px;">');
|
|
}
|
|
|
|
// Fix common email client quirks without breaking cid: URLs
|
|
sanitizedContent = sanitizedContent
|
|
// Fix for Outlook WebVML content
|
|
.replace(/<!--\[if\s+gte\s+mso/g, '<!--[if gte mso')
|
|
// Fix for broken image paths starting with // (add https:)
|
|
.replace(/src="\/\//g, 'src="https://')
|
|
// Handle mixed content issues by converting http:// to https://
|
|
.replace(/src="http:\/\//g, 'src="https://')
|
|
// Fix email signature line breaks
|
|
.replace(/--<br>/g, '<hr style="border-top: 1px solid #ccc; margin: 15px 0;">')
|
|
.replace(/-- <br>/g, '<hr style="border-top: 1px solid #ccc; margin: 15px 0;">')
|
|
// Remove excessive whitespace from the HTML string itself
|
|
.replace(/>\s+</g, '> <');
|
|
|
|
// Additional processing for quoted content in replies/forwards
|
|
if (sanitizedContent.includes('blockquote')) {
|
|
console.log('Enhancing blockquote styling');
|
|
sanitizedContent = sanitizedContent
|
|
// Ensure blockquotes have proper styling
|
|
.replace(/<blockquote([^>]*)>/g, (match, attrs) => {
|
|
if (match.includes('style=')) {
|
|
return match; // Already has style
|
|
}
|
|
return `<blockquote${attrs} style="margin: 0; padding-left: 10px; border-left: 3px solid #ddd; color: #505050; background-color: #f9f9f9; padding: 10px;">`;
|
|
});
|
|
}
|
|
|
|
return {
|
|
sanitizedContent,
|
|
hasImages: sanitizedContent.includes('<img'),
|
|
hasExternalContent: sanitizedContent.includes('https://'),
|
|
direction: detectTextDirection(sanitizedContent)
|
|
};
|
|
} catch (error) {
|
|
console.error('Error processing HTML content:', error);
|
|
return {
|
|
sanitizedContent: htmlContent,
|
|
hasImages: false,
|
|
hasExternalContent: false,
|
|
direction: 'ltr',
|
|
};
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Format plain text to HTML with proper line breaks and styling
|
|
*/
|
|
export function formatPlainTextToHtml(text: string): string {
|
|
if (!text) return '';
|
|
|
|
// Detect text direction
|
|
const direction = detectTextDirection(text);
|
|
|
|
// Escape HTML characters to prevent XSS
|
|
const escapedText = text
|
|
.replace(/&/g, '&')
|
|
.replace(/</g, '<')
|
|
.replace(/>/g, '>')
|
|
.replace(/"/g, '"')
|
|
.replace(/'/g, ''');
|
|
|
|
// Format plain text with proper line breaks and paragraphs
|
|
const formattedText = escapedText
|
|
.replace(/\r\n|\r|\n/g, '<br>') // Convert all newlines to <br>
|
|
.replace(/((?:<br>){2,})/g, '</p><p>') // Convert multiple newlines to paragraphs
|
|
.replace(/<br><\/p>/g, '</p>') // Fix any <br></p> combinations
|
|
.replace(/<p><br>/g, '<p>'); // Fix any <p><br> combinations
|
|
|
|
return `<div class="email-content" style="font-family: -apple-system, BlinkMacSystemFont, Menlo, Monaco, Consolas, 'Courier New', monospace; white-space: pre-wrap; line-height: 1.5; color: #333; padding: 15px; max-width: 100%; overflow-wrap: break-word;" dir="${direction}"><p>${formattedText}</p></div>`;
|
|
}
|