Neah/lib/utils/text-direction.ts

/**
 * Text Direction Utilities
 *
 * Centralized utilities for handling text direction (RTL/LTR)
 * to ensure consistent behavior across the application.
 */

import { sanitizeHtml } from './dom-purify-config';
import { EmailContent } from '@/types/email';

/**
 * Detects if text contains RTL characters and should be displayed right-to-left
 * Uses a comprehensive regex pattern that covers Arabic, Hebrew, and other RTL scripts
 *
 * @param text Text to analyze for direction
 * @returns 'rtl' if RTL characters are detected, otherwise 'ltr'
 */
export function detectTextDirection(text: string | undefined | null): 'ltr' | 'rtl' {
  if (!text) return 'ltr';

  // Comprehensive pattern for RTL languages:
  // - Arabic (0600-06FF, FB50-FDFF, FE70-FEFF)
  // - Hebrew (0590-05FF, FB1D-FB4F)
  // - RTL marks and controls (200F, 202B, 202E)
  const rtlPattern = /[\u0591-\u07FF\u200F\u202B\u202E\uFB1D-\uFDFD\uFE70-\uFEFC]/;

  return rtlPattern.test(text) ? 'rtl' : 'ltr';
}

/**
 * Adds appropriate direction attribute to HTML content based on content analysis
 *
 * @param htmlContent HTML content to analyze and enhance with direction
 * @param textContent Plain text version for direction analysis (optional)
 * @returns HTML with appropriate direction attribute
 */
export function applyTextDirection(htmlContent: string, textContent?: string): string {
  if (!htmlContent) return '';

  // If text content is provided, use it for direction detection
  // Otherwise extract text from HTML for direction detection
  const textForAnalysis = textContent ||
    htmlContent.replace(/<[^>]*>/g, '')
               .replace(/&nbsp;/g, ' ')
               .replace(/&lt;/g, '<')
               .replace(/&gt;/g, '>')
               .replace(/&amp;/g, '&');

  const direction = detectTextDirection(textForAnalysis);

  // If the HTML already has a dir attribute, don't override it
  if (htmlContent.includes('dir="rtl"') || htmlContent.includes('dir="ltr"')) {
    return htmlContent;
  }

  // Check if we already have an email-content wrapper
  if (htmlContent.startsWith('<div class="email-content')) {
    // Replace opening div with one that includes direction
    return htmlContent.replace(
      /<div class="email-content([^"]*)"/,
      `<div class="email-content$1" dir="${direction}"`
    );
  }

  // Otherwise, wrap the content with a direction-aware container
  return `<div class="email-content" dir="${direction}">${htmlContent}</div>`;
}

/**
 * Extracts content from various possible email formats
 * Reduces duplication across the codebase for content extraction
 */
export function extractEmailContent(email: any): { text: string; html: string } {
  // Default empty values
  let textContent = '';
  let htmlContent = '';

  // Extract based on common formats
  if (email) {
    if (typeof email.content === 'object' && email.content) {
      // Standard format with content object
      textContent = email.content.text || '';
      htmlContent = email.content.html || '';

      // Handle complex email formats where content might be nested
      if (!textContent && !htmlContent) {
        // Try to find content in deeper nested structure
        if (email.content.body) {
          if (typeof email.content.body === 'string') {
            // Determine if body is HTML or text
            if (email.content.body.includes('<') && (
              email.content.body.includes('<html') ||
              email.content.body.includes('<body') ||
              email.content.body.includes('<div')
            )) {
              htmlContent = email.content.body;
            } else {
              textContent = email.content.body;
            }
          } else if (typeof email.content.body === 'object' && email.content.body) {
            // Some email formats nest content inside body
            htmlContent = email.content.body.html || '';
            textContent = email.content.body.text || '';
          }
        }

        // Check for data property which some email services use
        if (!textContent && !htmlContent && email.content.data) {
          if (typeof email.content.data === 'string') {
            // Check if data looks like HTML
            if (email.content.data.includes('<') && (
              email.content.data.includes('<html') ||
              email.content.data.includes('<body') ||
              email.content.data.includes('<div')
            )) {
              htmlContent = email.content.data;
            } else {
              textContent = email.content.data;
            }
          }
        }

        // Last resort: try to convert the entire content object to string
        if (!textContent && !htmlContent) {
          try {
            // Some email servers encode content as JSON string
            const contentStr = JSON.stringify(email.content);
            if (contentStr && contentStr !== '{}') {
              textContent = `[Complex email content - please view in original format]`;
            }
          } catch (e) {
            console.error('Error extracting content from complex object:', e);
          }
        }
      }
    } else if (typeof email.content === 'string') {
      // Check if content is likely HTML
      if (email.content.includes('<') && (
        email.content.includes('<html') ||
        email.content.includes('<body') ||
        email.content.includes('<div')
      )) {
        htmlContent = email.content;
      } else {
        textContent = email.content;
      }
    } else {
      // Check other common properties
      htmlContent = email.html || '';
      textContent = email.text || '';

      // If still no content, check for less common properties
      if (!htmlContent && !textContent) {
        // Try additional properties that some email clients use
        htmlContent = email.body?.html || email.bodyHtml || email.htmlBody || '';
        textContent = email.body?.text || email.bodyText || email.plainText || '';
      }
    }
  }

  // Ensure we always have at least some text content
  if (!textContent && htmlContent) {
    try {
      // Create a helper function to extract text from HTML
      const tempDiv = document.createElement('div');
      tempDiv.innerHTML = htmlContent;
      textContent = tempDiv.textContent || tempDiv.innerText || '';
    } catch (e) {
      // Fallback for non-browser environments or if extraction fails
      textContent = htmlContent.replace(/<[^>]*>/g, ' ')
                              .replace(/\s+/g, ' ')
                              .trim() || '[Email content]';
    }
  }

  // Add debug logging to help troubleshoot content extraction
  console.log('Extracted email content:', {
    hasHtml: !!htmlContent,
    htmlLength: htmlContent.length,
    hasText: !!textContent,
    textLength: textContent.length
  });

  return { text: textContent, html: htmlContent };
}

/**
 * Comprehensive utility that processes email content:
 * - Sanitizes HTML content
 * - Detects text direction
 * - Applies direction attributes
 *
 * This reduces redundancy by combining these steps into one centralized function
 */
export function processContentWithDirection(content: string | EmailContent | null | undefined): {
  html: string;
  text: string;
  direction: 'ltr' | 'rtl';
} {
  // Default result with fallbacks
  const result = {
    html: '',
    text: '',
    direction: 'ltr' as const
  };

  // Handle null/undefined cases
  if (!content) return result;

  // Extract text and HTML content based on input type
  let textContent = '';
  let htmlContent = '';

  if (typeof content === 'string') {
    // Simple string content (check if it's HTML or plain text)
    if (content.includes('<') && (
      content.includes('<html') ||
      content.includes('<body') ||
      content.includes('<div') ||
      content.includes('<p>')
    )) {
      htmlContent = content;
    } else {
      textContent = content;
    }
  } else {
    // EmailContent object
    textContent = content.text || '';
    htmlContent = content.html || '';
  }

  // Handle complex email content that might not be properly detected
  if (!textContent && !htmlContent && typeof content === 'object') {
    console.log('Processing complex content object:', content);

    // Try to extract content from complex object structure
    try {
      // Check for common email content formats
      // Type assertion to 'any' since we need to handle various email formats
      const contentAny = content as any;

      if (contentAny.body) {
        if (typeof contentAny.body === 'string') {
          // Detect if body is HTML or text
          if (contentAny.body.includes('<') && (
            contentAny.body.includes('<html') ||
            contentAny.body.includes('<body') ||
            contentAny.body.includes('<div')
          )) {
            htmlContent = contentAny.body;
          } else {
            textContent = contentAny.body;
          }
        } else if (typeof contentAny.body === 'object' && contentAny.body) {
          // Extract from nested body object
          htmlContent = contentAny.body.html || '';
          textContent = contentAny.body.text || '';
        }
      }

      // Try to convert complex content to string for debugging
      if (!textContent && !htmlContent) {
        try {
          const contentStr = JSON.stringify(content);
          console.log('Complex content structure:', contentStr.slice(0, 300) + '...');
          textContent = '[Complex email content]';
        } catch (e) {
          console.error('Failed to stringify complex content:', e);
        }
      }
    } catch (error) {
      console.error('Error processing complex content:', error);
    }
  }

  // Always ensure we have text for direction detection
  if (!textContent && htmlContent) {
    // Extract text from HTML for direction detection
    try {
      // Use DOM API if available
      if (typeof document !== 'undefined') {
        const tempDiv = document.createElement('div');
        tempDiv.innerHTML = htmlContent;
        textContent = tempDiv.textContent || tempDiv.innerText || '';
      } else {
        // Simple regex fallback for non-browser environments
        textContent = htmlContent.replace(/<[^>]*>/g, ' ')
                                .replace(/&nbsp;/g, ' ')
                                .replace(/&lt;/g, '<')
                                .replace(/&gt;/g, '>')
                                .replace(/&amp;/g, '&')
                                .replace(/\s+/g, ' ')
                                .trim();
      }
    } catch (e) {
      console.error('Error extracting text from HTML:', e);
      textContent = 'Failed to extract text content';
    }
  }

  // Detect direction from text
  const direction = detectTextDirection(textContent);

  // Sanitize HTML if present
  if (htmlContent) {
    try {
      // Sanitize HTML first using the centralized function
      htmlContent = sanitizeHtml(htmlContent);

      // Then apply direction
      htmlContent = applyTextDirection(htmlContent, textContent);
    } catch (error) {
      console.error('Error sanitizing HTML content:', error);
      // Create fallback content if sanitization fails
      htmlContent = `<div dir="${direction}">${
        textContent ?
          textContent.replace(/\n/g, '<br>') :
          'Could not process HTML content'
      }</div>`;
    }
  } else if (textContent) {
    // Convert plain text to HTML with proper direction
    htmlContent = `<div dir="${direction}">${textContent.replace(/\n/g, '<br>')}</div>`;
  }

  // Add debug logging for troubleshooting
  console.log('Processed content:', {
    direction,
    htmlLength: htmlContent.length,
    textLength: textContent.length,
    hasHtml: !!htmlContent,
    hasText: !!textContent
  });

  // Return processed content
  return {
    text: textContent,
    html: htmlContent,
    direction
  };
}