Neah/lib/utils/email-mime-decoder.ts

/**
 * Infomaniak Email MIME Decoder
 *
 * This module provides specialized functions to decode MIME-encoded email content
 * from Infomaniak servers for proper display in a frontend application.
 * It handles multipart messages, different encodings, and character set conversions.
 */

import { LegacyEmailMessage } from '@/types/email';

export interface DecodedEmail {
  subject: string;
  from: string;
  to: string;
  cc?: string;
  bcc?: string;
  date: string;
  text?: string;
  html?: string;
  attachments?: Array<{
    filename: string;
    contentType: string;
    encoding?: string;
    content?: string;
  }>;
  headers?: Record<string, string>;
}

export interface EmailHeaderInfo {
  contentType: string;
  encoding: string;
  charset: string;
  boundary?: string;
}

/**
 * Main function to decode Infomaniak Email in MIME format
 */
export function decodeInfomaniakEmail(rawEmailContent: string): DecodedEmail {
  // Check if it's a multipart email
  const headers = extractHeaders(rawEmailContent);
  const headerInfo = parseEmailHeaders(headers);

  if (headerInfo.contentType.includes('multipart')) {
    return processMultipartEmail(rawEmailContent, headerInfo);
  } else {
    return processSinglePartEmail(rawEmailContent, headerInfo);
  }
}

/**
 * Process a single part email
 */
function processSinglePartEmail(rawEmail: string, headerInfo: EmailHeaderInfo): DecodedEmail {
  const splitEmail = rawEmail.split('\r\n\r\n');
  const headers = splitEmail[0];
  const body = splitEmail.slice(1).join('\r\n\r\n');

  const parsedHeaders = parseHeadersToObject(headers);
  const decodedBody = decodeMimeContent(body, headerInfo.encoding);
  const content = convertCharset(decodedBody, headerInfo.charset);

  const result: DecodedEmail = {
    subject: decodeHeaderValue(parsedHeaders['subject'] || ''),
    from: decodeHeaderValue(parsedHeaders['from'] || ''),
    to: decodeHeaderValue(parsedHeaders['to'] || ''),
    cc: parsedHeaders['cc'] ? decodeHeaderValue(parsedHeaders['cc']) : undefined,
    bcc: parsedHeaders['bcc'] ? decodeHeaderValue(parsedHeaders['bcc']) : undefined,
    date: parsedHeaders['date'] || '',
    headers: parsedHeaders
  };

  if (headerInfo.contentType.includes('text/plain')) {
    result.text = content;
  } else if (headerInfo.contentType.includes('text/html')) {
    result.html = content;
  }

  return result;
}

/**
 * Process a multipart email
 */
function processMultipartEmail(rawEmail: string, headerInfo: EmailHeaderInfo): DecodedEmail {
  if (!headerInfo.boundary) {
    throw new Error('Multipart email missing boundary');
  }

  const boundary = headerInfo.boundary;
  const splitEmail = rawEmail.split('\r\n\r\n');
  const headers = splitEmail[0];
  const parsedHeaders = parseHeadersToObject(headers);

  const result: DecodedEmail = {
    subject: decodeHeaderValue(parsedHeaders['subject'] || ''),
    from: decodeHeaderValue(parsedHeaders['from'] || ''),
    to: decodeHeaderValue(parsedHeaders['to'] || ''),
    cc: parsedHeaders['cc'] ? decodeHeaderValue(parsedHeaders['cc']) : undefined,
    bcc: parsedHeaders['bcc'] ? decodeHeaderValue(parsedHeaders['bcc']) : undefined,
    date: parsedHeaders['date'] || '',
    attachments: [],
    headers: parsedHeaders
  };

  // Split by boundary
  const bodyContent = rawEmail.split('--' + boundary);

  // Process each part (skip first as it's headers and last as it's boundary end)
  for (let i = 1; i < bodyContent.length - 1; i++) {
    const part = bodyContent[i];
    const partHeaders = extractHeaders(part);
    const partHeaderInfo = parseEmailHeaders(partHeaders);

    // Handle sub-multipart (nested multipart)
    if (partHeaderInfo.contentType.includes('multipart') && partHeaderInfo.boundary) {
      const subMultipart = processMultipartEmail(part, partHeaderInfo);
      if (subMultipart.html) result.html = subMultipart.html;
      if (subMultipart.text) result.text = subMultipart.text;
      if (subMultipart.attachments) {
        result.attachments = [...(result.attachments || []), ...subMultipart.attachments];
      }
      continue;
    }

    // Get content after headers
    const partContent = part.split('\r\n\r\n').slice(1).join('\r\n\r\n');
    const decodedContent = decodeMimeContent(partContent, partHeaderInfo.encoding);
    const content = convertCharset(decodedContent, partHeaderInfo.charset);

    // Check content disposition
    const contentDisposition = getHeaderValue(partHeaders, 'Content-Disposition') || '';

    if (contentDisposition.includes('attachment')) {
      // This is an attachment
      const filename = extractFilename(contentDisposition);
      if (result.attachments && filename) {
        result.attachments.push({
          filename,
          contentType: partHeaderInfo.contentType,
          encoding: partHeaderInfo.encoding,
          content: decodedContent
        });
      }
    } else {
      // This is a content part
      if (partHeaderInfo.contentType.includes('text/plain')) {
        result.text = content;
      } else if (partHeaderInfo.contentType.includes('text/html')) {
        result.html = content;
      }
    }
  }

  return result;
}

/**
 * Extract headers from an email or part
 */
function extractHeaders(content: string): string {
  const headerEnd = content.indexOf('\r\n\r\n');
  if (headerEnd === -1) return content;
  return content.substring(0, headerEnd);
}

/**
 * Parse email headers into an object
 */
function parseHeadersToObject(headers: string): Record<string, string> {
  const result: Record<string, string> = {};
  const lines = headers.split('\r\n');

  let currentHeader = '';
  let currentValue = '';

  for (const line of lines) {
    // If line starts with a space or tab, it's a continuation
    if (line.startsWith(' ') || line.startsWith('\t')) {
      currentValue += ' ' + line.trim();
    } else {
      // Save previous header if exists
      if (currentHeader) {
        result[currentHeader.toLowerCase()] = currentValue;
      }

      const colonIndex = line.indexOf(':');
      if (colonIndex !== -1) {
        currentHeader = line.substring(0, colonIndex).trim();
        currentValue = line.substring(colonIndex + 1).trim();
      }
    }
  }

  // Save the last header
  if (currentHeader) {
    result[currentHeader.toLowerCase()] = currentValue;
  }

  return result;
}

/**
 * Parse email headers to extract content type, encoding and charset
 */
function parseEmailHeaders(headers: string): EmailHeaderInfo {
  const contentType = getHeaderValue(headers, 'Content-Type') || 'text/plain';
  const encoding = getHeaderValue(headers, 'Content-Transfer-Encoding') || '7bit';

  // Extract charset
  let charset = 'utf-8';
  const charsetMatch = contentType.match(/charset\s*=\s*["']?([^"';\s]+)/i);
  if (charsetMatch) {
    charset = charsetMatch[1];
  }

  // Extract boundary for multipart emails
  let boundary;
  const boundaryMatch = contentType.match(/boundary\s*=\s*["']?([^"';\s]+)/i);
  if (boundaryMatch) {
    boundary = boundaryMatch[1];
  }

  return { contentType, encoding, charset, boundary };
}

/**
 * Get a specific header value
 */
function getHeaderValue(headers: string, name: string): string | null {
  const regex = new RegExp(`${name}:\\s*([^\\r\\n]+)`, 'i');
  const match = headers.match(regex);
  return match ? match[1].trim() : null;
}

/**
 * Extract filename from Content-Disposition header
 */
function extractFilename(contentDisposition: string): string {
  const filenameMatch = contentDisposition.match(/filename\s*=\s*["']?([^"';\s]+)/i);
  if (filenameMatch) return filenameMatch[1];

  // For encoded filenames
  const encodedFilenameMatch = contentDisposition.match(/filename\*=([^']*)'[^']*'([^;]+)/i);
  if (encodedFilenameMatch) {
    try {
      return decodeURIComponent(encodedFilenameMatch[2].replace(/%([\dA-F]{2})/g, '%$1'));
    } catch (e) {
      return encodedFilenameMatch[2];
    }
  }

  return 'attachment';
}

/**
 * Decode MIME content based on encoding
 */
function decodeMimeContent(content: string, encoding: string): string {
  switch (encoding.toLowerCase()) {
    case 'quoted-printable':
      return decodeQuotedPrintable(content);
    case 'base64':
      return decodeBase64(content);
    case '7bit':
    case '8bit':
    case 'binary':
    default:
      return content;
  }
}

/**
 * Decode quoted-printable content
 */
function decodeQuotedPrintable(content: string): string {
  return content
    .replace(/=\r\n/g, '')                         // Remove soft line breaks
    .replace(/=([a-fA-F0-9]{2})/g, (match, p1) => {  // Replace hex codes with chars
      return String.fromCharCode(parseInt(p1, 16));
    });
}

/**
 * Decode base64 content
 */
function decodeBase64(content: string): string {
  // Remove any whitespace
  const cleanContent = content.replace(/\s+/g, '');
  try {
    return atob(cleanContent);
  } catch (e) {
    console.error('Error decoding base64', e);
    return content;
  }
}

/**
 * Convert content from specified charset to UTF-8
 */
function convertCharset(content: string, charset: string): string {
  // Basic charset conversion - for more complex cases, consider TextDecoder
  if (charset.toLowerCase() === 'utf-8' || charset.toLowerCase() === 'utf8') {
    return content;
  }

  try {
    // For browsers that support TextDecoder
    if (typeof TextDecoder !== 'undefined') {
      // Convert string to ArrayBuffer
      const buffer = new Uint8Array(content.length);
      for (let i = 0; i < content.length; i++) {
        buffer[i] = content.charCodeAt(i);
      }

      const decoder = new TextDecoder(charset);
      return decoder.decode(buffer);
    }
  } catch (e) {
    console.warn('TextDecoder not supported or failed for charset:', charset);
  }

  // Fallback for simpler encodings
  if (charset.toLowerCase() === 'iso-8859-1' || charset.toLowerCase() === 'latin1') {
    return content;  // Browser will handle this reasonably
  }

  console.warn('Unsupported charset:', charset);
  return content;  // Return as-is if we can't convert
}

/**
 * Decode encoded header values (RFC 2047)
 */
function decodeHeaderValue(value: string): string {
  // Decode headers like =?UTF-8?Q?Subject?=
  return value.replace(/=\?([^?]+)\?([BQ])\?([^?]*)\?=/gi, (match, charset, encoding, text) => {
    if (encoding.toUpperCase() === 'B') {
      // Base64 encoding
      try {
        const decoded = atob(text);
        return convertCharset(decoded, charset);
      } catch (e) {
        return text;
      }
    } else if (encoding.toUpperCase() === 'Q') {
      // Quoted-printable
      try {
        const decoded = text
          .replace(/_/g, ' ')
          .replace(/=([\da-fA-F]{2})/g, (m: string, hex: string) =>
            String.fromCharCode(parseInt(hex, 16))
          );
        return convertCharset(decoded, charset);
      } catch (e) {
        return text;
      }
    }
    return text;
  });
}

/**
 * Clean HTML content for safe rendering
 */
export function cleanHtml(html: string): string {
  // Basic sanitization - consider using DOMPurify in a real app
  return html
    .replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '')
    .replace(/on\w+="[^"]*"/g, '')
    .replace(/on\w+='[^']*'/g, '')
    .replace(/on\w+=\w+/g, '');
}

/**
 * Check if email content is likely in MIME format
 */
export function isMimeFormat(content: string | undefined): boolean {
  if (!content) return false;

  // Check for common MIME headers
  return (
    content.includes('Content-Type:') &&
    content.includes('MIME-Version:') &&
    /\r\n\r\n/.test(content)
  );
}

/**
 * Adapt legacy email to use the decoded MIME content
 */
export function adaptMimeEmail(legacyEmail: LegacyEmailMessage): LegacyEmailMessage {
  if (!legacyEmail.content || typeof legacyEmail.content !== 'string' || !isMimeFormat(legacyEmail.content)) {
    return legacyEmail;
  }

  try {
    const decoded = decodeInfomaniakEmail(legacyEmail.content);

    return {
      ...legacyEmail,
      html: decoded.html,
      text: decoded.text || '',
      subject: decoded.subject || legacyEmail.subject,
      // Keep original content for reference
      content: decoded.html || decoded.text || ''
    };
  } catch (e) {
    console.error('Failed to decode MIME email:', e);
    return legacyEmail;
  }
}