Neah/lib/utils/text-direction.ts
2025-05-01 11:37:05 +02:00

341 lines
12 KiB
TypeScript

/**
* Text Direction Utilities
*
* Centralized utilities for handling text direction (RTL/LTR)
* to ensure consistent behavior across the application.
*/
import { sanitizeHtml } from './dom-purify-config';
import { EmailContent } from '@/types/email';
/**
* Detects if text contains RTL characters and should be displayed right-to-left
* Uses a comprehensive regex pattern that covers Arabic, Hebrew, and other RTL scripts
*
* @param text Text to analyze for direction
* @returns 'rtl' if RTL characters are detected, otherwise 'ltr'
*/
export function detectTextDirection(text: string | undefined | null): 'ltr' | 'rtl' {
if (!text) return 'ltr';
// Comprehensive pattern for RTL languages:
// - Arabic (0600-06FF, FB50-FDFF, FE70-FEFF)
// - Hebrew (0590-05FF, FB1D-FB4F)
// - RTL marks and controls (200F, 202B, 202E)
const rtlPattern = /[\u0591-\u07FF\u200F\u202B\u202E\uFB1D-\uFDFD\uFE70-\uFEFC]/;
return rtlPattern.test(text) ? 'rtl' : 'ltr';
}
/**
* Adds appropriate direction attribute to HTML content based on content analysis
*
* @param htmlContent HTML content to analyze and enhance with direction
* @param textContent Plain text version for direction analysis (optional)
* @returns HTML with appropriate direction attribute
*/
export function applyTextDirection(htmlContent: string, textContent?: string): string {
if (!htmlContent) return '';
// If text content is provided, use it for direction detection
// Otherwise extract text from HTML for direction detection
const textForAnalysis = textContent ||
htmlContent.replace(/<[^>]*>/g, '')
.replace(/&nbsp;/g, ' ')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&amp;/g, '&');
const direction = detectTextDirection(textForAnalysis);
// If the HTML already has a dir attribute, don't override it
if (htmlContent.includes('dir="rtl"') || htmlContent.includes('dir="ltr"')) {
return htmlContent;
}
// Check if we already have an email-content wrapper
if (htmlContent.startsWith('<div class="email-content')) {
// Replace opening div with one that includes direction
return htmlContent.replace(
/<div class="email-content([^"]*)"/,
`<div class="email-content$1" dir="${direction}"`
);
}
// Otherwise, wrap the content with a direction-aware container
return `<div class="email-content" dir="${direction}">${htmlContent}</div>`;
}
/**
* Extracts content from various possible email formats
* Reduces duplication across the codebase for content extraction
*/
export function extractEmailContent(email: any): { text: string; html: string } {
// Default empty values
let textContent = '';
let htmlContent = '';
// Extract based on common formats
if (email) {
if (typeof email.content === 'object' && email.content) {
// Standard format with content object
textContent = email.content.text || '';
htmlContent = email.content.html || '';
// Handle complex email formats where content might be nested
if (!textContent && !htmlContent) {
// Try to find content in deeper nested structure
if (email.content.body) {
if (typeof email.content.body === 'string') {
// Determine if body is HTML or text
if (email.content.body.includes('<') && (
email.content.body.includes('<html') ||
email.content.body.includes('<body') ||
email.content.body.includes('<div')
)) {
htmlContent = email.content.body;
} else {
textContent = email.content.body;
}
} else if (typeof email.content.body === 'object' && email.content.body) {
// Some email formats nest content inside body
htmlContent = email.content.body.html || '';
textContent = email.content.body.text || '';
}
}
// Check for data property which some email services use
if (!textContent && !htmlContent && email.content.data) {
if (typeof email.content.data === 'string') {
// Check if data looks like HTML
if (email.content.data.includes('<') && (
email.content.data.includes('<html') ||
email.content.data.includes('<body') ||
email.content.data.includes('<div')
)) {
htmlContent = email.content.data;
} else {
textContent = email.content.data;
}
}
}
// Last resort: try to convert the entire content object to string
if (!textContent && !htmlContent) {
try {
// Some email servers encode content as JSON string
const contentStr = JSON.stringify(email.content);
if (contentStr && contentStr !== '{}') {
textContent = `[Complex email content - please view in original format]`;
}
} catch (e) {
console.error('Error extracting content from complex object:', e);
}
}
}
} else if (typeof email.content === 'string') {
// Check if content is likely HTML
if (email.content.includes('<') && (
email.content.includes('<html') ||
email.content.includes('<body') ||
email.content.includes('<div')
)) {
htmlContent = email.content;
} else {
textContent = email.content;
}
} else {
// Check other common properties
htmlContent = email.html || '';
textContent = email.text || '';
// If still no content, check for less common properties
if (!htmlContent && !textContent) {
// Try additional properties that some email clients use
htmlContent = email.body?.html || email.bodyHtml || email.htmlBody || '';
textContent = email.body?.text || email.bodyText || email.plainText || '';
}
}
}
// Ensure we always have at least some text content
if (!textContent && htmlContent) {
try {
// Create a helper function to extract text from HTML
const tempDiv = document.createElement('div');
tempDiv.innerHTML = htmlContent;
textContent = tempDiv.textContent || tempDiv.innerText || '';
} catch (e) {
// Fallback for non-browser environments or if extraction fails
textContent = htmlContent.replace(/<[^>]*>/g, ' ')
.replace(/\s+/g, ' ')
.trim() || '[Email content]';
}
}
// Add debug logging to help troubleshoot content extraction
console.log('Extracted email content:', {
hasHtml: !!htmlContent,
htmlLength: htmlContent.length,
hasText: !!textContent,
textLength: textContent.length
});
return { text: textContent, html: htmlContent };
}
/**
* Comprehensive utility that processes email content:
* - Sanitizes HTML content
* - Detects text direction
* - Applies direction attributes
*
* This reduces redundancy by combining these steps into one centralized function
*/
export function processContentWithDirection(content: string | EmailContent | null | undefined): {
html: string;
text: string;
direction: 'ltr' | 'rtl';
} {
// Default result with fallbacks
const result = {
html: '',
text: '',
direction: 'ltr' as const
};
// Handle null/undefined cases
if (!content) return result;
// Extract text and HTML content based on input type
let textContent = '';
let htmlContent = '';
if (typeof content === 'string') {
// Simple string content (check if it's HTML or plain text)
if (content.includes('<') && (
content.includes('<html') ||
content.includes('<body') ||
content.includes('<div') ||
content.includes('<p>')
)) {
htmlContent = content;
} else {
textContent = content;
}
} else {
// EmailContent object
textContent = content.text || '';
htmlContent = content.html || '';
}
// Handle complex email content that might not be properly detected
if (!textContent && !htmlContent && typeof content === 'object') {
console.log('Processing complex content object:', content);
// Try to extract content from complex object structure
try {
// Check for common email content formats
// Type assertion to 'any' since we need to handle various email formats
const contentAny = content as any;
if (contentAny.body) {
if (typeof contentAny.body === 'string') {
// Detect if body is HTML or text
if (contentAny.body.includes('<') && (
contentAny.body.includes('<html') ||
contentAny.body.includes('<body') ||
contentAny.body.includes('<div')
)) {
htmlContent = contentAny.body;
} else {
textContent = contentAny.body;
}
} else if (typeof contentAny.body === 'object' && contentAny.body) {
// Extract from nested body object
htmlContent = contentAny.body.html || '';
textContent = contentAny.body.text || '';
}
}
// Try to convert complex content to string for debugging
if (!textContent && !htmlContent) {
try {
const contentStr = JSON.stringify(content);
console.log('Complex content structure:', contentStr.slice(0, 300) + '...');
textContent = '[Complex email content]';
} catch (e) {
console.error('Failed to stringify complex content:', e);
}
}
} catch (error) {
console.error('Error processing complex content:', error);
}
}
// Always ensure we have text for direction detection
if (!textContent && htmlContent) {
// Extract text from HTML for direction detection
try {
// Use DOM API if available
if (typeof document !== 'undefined') {
const tempDiv = document.createElement('div');
tempDiv.innerHTML = htmlContent;
textContent = tempDiv.textContent || tempDiv.innerText || '';
} else {
// Simple regex fallback for non-browser environments
textContent = htmlContent.replace(/<[^>]*>/g, ' ')
.replace(/&nbsp;/g, ' ')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&amp;/g, '&')
.replace(/\s+/g, ' ')
.trim();
}
} catch (e) {
console.error('Error extracting text from HTML:', e);
textContent = 'Failed to extract text content';
}
}
// Detect direction from text
const direction = detectTextDirection(textContent);
// Sanitize HTML if present
if (htmlContent) {
try {
// Sanitize HTML first using the centralized function
htmlContent = sanitizeHtml(htmlContent);
// Then apply direction
htmlContent = applyTextDirection(htmlContent, textContent);
} catch (error) {
console.error('Error sanitizing HTML content:', error);
// Create fallback content if sanitization fails
htmlContent = `<div dir="${direction}">${
textContent ?
textContent.replace(/\n/g, '<br>') :
'Could not process HTML content'
}</div>`;
}
} else if (textContent) {
// Convert plain text to HTML with proper direction
htmlContent = `<div dir="${direction}">${textContent.replace(/\n/g, '<br>')}</div>`;
}
// Add debug logging for troubleshooting
console.log('Processed content:', {
direction,
htmlLength: htmlContent.length,
textLength: textContent.length,
hasHtml: !!htmlContent,
hasText: !!textContent
});
// Return processed content
return {
text: textContent,
html: htmlContent,
direction
};
}