Neah/lib/utils/email-mime-decoder.ts

/**
 * Email MIME Decoder
 *
 * This module provides functions to decode MIME-encoded email content
 * for proper display in a frontend application.
 */

/**
 * Decode a MIME encoded string (quoted-printable or base64)
 * @param {string} text - The encoded text
 * @param {string} encoding - The encoding type ('quoted-printable', 'base64', etc)
 * @param {string} charset - The character set (utf-8, iso-8859-1, etc)
 * @returns {string} - The decoded text
 */
export function decodeMIME(text: string, encoding?: string, charset = 'utf-8'): string {
  if (!text) return '';

  // Normalize encoding to lowercase
  encoding = (encoding || '').toLowerCase();
  charset = (charset || 'utf-8').toLowerCase();

  try {
    // Handle different encoding types
    if (encoding === 'quoted-printable') {
      return decodeQuotedPrintable(text, charset);
    } else if (encoding === 'base64') {
      return decodeBase64(text, charset);
    } else {
      // Plain text or other encoding
      return text;
    }
  } catch (error) {
    console.error('Error decoding MIME:', error);
    return text; // Return original text if decoding fails
  }
}

/**
 * Decode a quoted-printable encoded string
 * @param {string} text - The quoted-printable encoded text
 * @param {string} charset - The character set
 * @returns {string} - The decoded text
 */
export function decodeQuotedPrintable(text: string, charset: string): string {
  // Replace soft line breaks (=\r\n or =\n)
  let decoded = text.replace(/=(?:\r\n|\n)/g, '');

  // Replace quoted-printable encoded characters
  decoded = decoded.replace(/=([0-9A-F]{2})/gi, (match, p1) => {
    return String.fromCharCode(parseInt(p1, 16));
  });

  // Handle character encoding
  if (charset !== 'utf-8' && typeof TextDecoder !== 'undefined') {
    try {
      const bytes = new Uint8Array(decoded.length);
      for (let i = 0; i < decoded.length; i++) {
        bytes[i] = decoded.charCodeAt(i);
      }
      return new TextDecoder(charset).decode(bytes);
    } catch (e) {
      console.warn('TextDecoder error:', e);
    }
  }

  return decoded;
}

/**
 * Decode a base64 encoded string
 * @param {string} text - The base64 encoded text
 * @param {string} charset - The character set
 * @returns {string} - The decoded text
 */
export function decodeBase64(text: string, charset: string): string {
  // Remove whitespace that might be present in the base64 string
  const cleanText = text.replace(/\s/g, '');

  try {
    // Use built-in atob function and TextDecoder for charset handling
    const binary = atob(cleanText);
    if (charset !== 'utf-8' && typeof TextDecoder !== 'undefined') {
      // If TextDecoder is available and the charset is not utf-8
      const bytes = new Uint8Array(binary.length);
      for (let i = 0; i < binary.length; i++) {
        bytes[i] = binary.charCodeAt(i);
      }
      return new TextDecoder(charset).decode(bytes);
    }
    return binary;
  } catch (e) {
    console.error('Base64 decoding error:', e);
    return text;
  }
}

/**
 * Parse email headers to extract content type, encoding and charset
 * @param {string} headers - The raw email headers
 * @returns {Object} - Object containing content type, encoding and charset
 */
export function parseEmailHeaders(headers: string): {
  contentType: string;
  encoding: string;
  charset: string;
} {
  const result = {
    contentType: 'text/plain',
    encoding: 'quoted-printable',
    charset: 'utf-8'
  };

  // Extract content type
  const contentTypeMatch = headers.match(/Content-Type:\s*([^;]+)(?:;\s*charset=([^;]+))?/i);
  if (contentTypeMatch) {
    result.contentType = contentTypeMatch[1].trim().toLowerCase();
    if (contentTypeMatch[2]) {
      result.charset = contentTypeMatch[2].trim().replace(/"/g, '').toLowerCase();
    }
  }

  // Extract content transfer encoding
  const encodingMatch = headers.match(/Content-Transfer-Encoding:\s*([^\s]+)/i);
  if (encodingMatch) {
    result.encoding = encodingMatch[1].trim().toLowerCase();
  }

  return result;
}

/**
 * Decode an email body based on its headers
 * @param {string} emailRaw - The raw email content (headers + body)
 * @returns {Object} - Object containing decoded text and html parts
 */
export function decodeEmail(emailRaw: string): {
  contentType: string;
  charset: string;
  encoding: string;
  decodedBody: string;
  headers: string;
} {
  // Separate headers and body
  const parts = emailRaw.split(/\r?\n\r?\n/);
  const headers = parts[0];
  const body = parts.slice(1).join('\n\n');

  // Parse headers
  const { contentType, encoding, charset } = parseEmailHeaders(headers);

  // Decode the body
  const decodedBody = decodeMIME(body, encoding, charset);

  return {
    contentType,
    charset,
    encoding,
    decodedBody,
    headers
  };
}

interface EmailContent {
  text: string;
  html: string;
  attachments: Array<{
    contentType: string;
    content: string;
    filename?: string;
  }>;
}

/**
 * Process a multipart email to extract text and HTML parts
 * @param {string} emailRaw - The raw email content
 * @param {string} boundary - The multipart boundary
 * @returns {Object} - Object containing text and html parts
 */
export function processMultipartEmail(emailRaw: string, boundary: string): EmailContent {
  const result: EmailContent = {
    text: '',
    html: '',
    attachments: []
  };

  // Split by boundary
  const boundaryRegex = new RegExp(`--${boundary}\\r?\\n|--${boundary}--\\r?\\n?`, 'g');
  const parts = emailRaw.split(boundaryRegex).filter(part => part.trim());

  // Process each part
  parts.forEach(part => {
    const decoded = decodeEmail(part);

    if (decoded.contentType === 'text/plain') {
      result.text = decoded.decodedBody;
    } else if (decoded.contentType === 'text/html') {
      result.html = decoded.decodedBody;
    } else if (decoded.contentType.startsWith('image/') ||
              decoded.contentType.startsWith('application/')) {
      // Extract filename if available
      const filenameMatch = decoded.headers.match(/filename=["']?([^"';\r\n]+)/i);
      const filename = filenameMatch ? filenameMatch[1] : 'attachment';

      // Handle attachments
      result.attachments.push({
        contentType: decoded.contentType,
        content: decoded.decodedBody,
        filename
      });
    }
  });

  return result;
}

/**
 * Extract boundary from Content-Type header
 * @param {string} contentType - The Content-Type header value
 * @returns {string|null} - The boundary string or null if not found
 */
export function extractBoundary(contentType: string): string | null {
  const boundaryMatch = contentType.match(/boundary=["']?([^"';]+)/i);
  return boundaryMatch ? boundaryMatch[1] : null;
}

/**
 * Parse an email from its raw content
 * @param {string} rawEmail - The raw email content
 * @returns {Object} - The parsed email with text and html parts
 */
export function parseRawEmail(rawEmail: string): EmailContent {
  // Default result structure
  const result: EmailContent = {
    text: '',
    html: '',
    attachments: []
  };

  try {
    // Split headers and body
    const headerBodySplit = rawEmail.split(/\r?\n\r?\n/);
    const headers = headerBodySplit[0];
    const body = headerBodySplit.slice(1).join('\n\n');

    // Check if multipart
    const contentTypeHeader = headers.match(/Content-Type:\s*([^\r\n]+)/i);

    if (contentTypeHeader && contentTypeHeader[1].includes('multipart/')) {
      // Get boundary
      const boundary = extractBoundary(contentTypeHeader[1]);

      if (boundary) {
        // Process multipart email
        return processMultipartEmail(rawEmail, boundary);
      }
    }

    // Not multipart, decode as a single part
    const { contentType, encoding, charset, decodedBody } = decodeEmail(rawEmail);

    // Set content based on type
    if (contentType.includes('text/html')) {
      result.html = decodedBody;
    } else {
      result.text = decodedBody;
    }

    return result;
  } catch (error) {
    console.error('Error parsing raw email:', error);
    // Return raw content as text on error
    result.text = rawEmail;
    return result;
  }
}