NeahOpti/lib/infomaniak-mime-decoder.ts

// Infomaniak-specific MIME decoder functions

export function decodeQuotedPrintable(text: string, charset: string): string {
  if (!text) return '';

  // Replace soft line breaks (=\r\n or =\n or =\r)
  let decoded = text.replace(/=(?:\r\n|\n|\r)/g, '');

  // Replace quoted-printable encoded characters
  decoded = decoded
    // Handle common encoded characters
    .replace(/=3D/g, '=')
    .replace(/=20/g, ' ')
    .replace(/=09/g, '\t')
    .replace(/=0A/g, '\n')
    .replace(/=0D/g, '\r')
    // Handle other quoted-printable encoded characters
    .replace(/=([0-9A-F]{2})/gi, (match, p1) => {
      return String.fromCharCode(parseInt(p1, 16));
    });

  // Handle character encoding
  try {
    if (typeof TextDecoder !== 'undefined') {
      const bytes = new Uint8Array(Array.from(decoded).map(c => c.charCodeAt(0)));
      return new TextDecoder(charset).decode(bytes);
    }
    return decoded;
  } catch (e) {
    console.warn('Charset conversion error:', e);
    return decoded;
  }
}

export function decodeBase64(text: string, charset: string): string {
  if (!text) return '';

  try {
    // Remove any whitespace and line breaks
    const cleanText = text.replace(/\s+/g, '');

    // Decode base64
    const binary = atob(cleanText);

    // Convert to bytes
    const bytes = new Uint8Array(binary.length);
    for (let i = 0; i < binary.length; i++) {
      bytes[i] = binary.charCodeAt(i);
    }

    // Decode using specified charset
    if (typeof TextDecoder !== 'undefined') {
      return new TextDecoder(charset).decode(bytes);
    }

    // Fallback
    return binary;
  } catch (e) {
    console.warn('Base64 decoding error:', e);
    return text;
  }
}

export function convertCharset(text: string, charset: string): string {
  if (!text) return '';

  try {
    if (typeof TextDecoder !== 'undefined') {
      // Handle common charset aliases
      const normalizedCharset = charset.toLowerCase()
        .replace(/^iso-8859-1$/, 'windows-1252')
        .replace(/^iso-8859-15$/, 'windows-1252')
        .replace(/^utf-8$/, 'utf-8')
        .replace(/^us-ascii$/, 'utf-8');

      const bytes = new Uint8Array(Array.from(text).map(c => c.charCodeAt(0)));
      return new TextDecoder(normalizedCharset).decode(bytes);
    }
    return text;
  } catch (e) {
    console.warn('Charset conversion error:', e);
    return text;
  }
}

export function cleanHtml(html: string): string {
  if (!html) return '';

  // Detect text direction from the content
  const hasRtlChars = /[\u0591-\u07FF\u200F\u202B\u202E\uFB1D-\uFDFD\uFE70-\uFEFC]/.test(html);
  const defaultDir = hasRtlChars ? 'rtl' : 'ltr';

  // Remove or fix malformed URLs
  html = html.replace(/=3D"(http[^"]+)"/g, (match, url) => {
    try {
      return `"${decodeURIComponent(url)}"`;
    } catch {
      return '';
    }
  });

  // Remove any remaining quoted-printable artifacts
  html = html.replace(/=([0-9A-F]{2})/gi, (match, p1) => {
    return String.fromCharCode(parseInt(p1, 16));
  });

  // Clean up any remaining HTML issues while preserving direction
  html = html
    // Remove style and script tags
    .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
    .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
    .replace(/<meta[^>]*>/gi, '')
    .replace(/<link[^>]*>/gi, '')
    .replace(/<base[^>]*>/gi, '')
    .replace(/<title[^>]*>[\s\S]*?<\/title>/gi, '')
    .replace(/<head[^>]*>[\s\S]*?<\/head>/gi, '')
    // Preserve body attributes
    .replace(/<body[^>]*>/gi, (match) => {
      const dir = match.match(/dir=["'](rtl|ltr)["']/i)?.[1] || defaultDir;
      return `<body dir="${dir}">`;
    })
    .replace(/<\/body>/gi, '')
    .replace(/<html[^>]*>/gi, '')
    .replace(/<\/html>/gi, '')
    // Handle tables
    .replace(/<table[^>]*>/gi, '\n')
    .replace(/<\/table>/gi, '\n')
    .replace(/<tr[^>]*>/gi, '\n')
    .replace(/<\/tr>/gi, '\n')
    .replace(/<td[^>]*>/gi, ' ')
    .replace(/<\/td>/gi, ' ')
    .replace(/<th[^>]*>/gi, ' ')
    .replace(/<\/th>/gi, ' ')
    // Handle lists
    .replace(/<ul[^>]*>/gi, '\n')
    .replace(/<\/ul>/gi, '\n')
    .replace(/<ol[^>]*>/gi, '\n')
    .replace(/<\/ol>/gi, '\n')
    .replace(/<li[^>]*>/gi, '• ')
    .replace(/<\/li>/gi, '\n')
    // Handle other block elements
    .replace(/<div[^>]*>/gi, '\n')
    .replace(/<\/div>/gi, '\n')
    .replace(/<p[^>]*>/gi, '\n')
    .replace(/<\/p>/gi, '\n')
    .replace(/<br[^>]*>/gi, '\n')
    .replace(/<hr[^>]*>/gi, '\n')
    // Handle inline elements
    .replace(/<span[^>]*>/gi, '')
    .replace(/<\/span>/gi, '')
    .replace(/<a[^>]*>/gi, '')
    .replace(/<\/a>/gi, '')
    .replace(/<strong[^>]*>/gi, '**')
    .replace(/<\/strong>/gi, '**')
    .replace(/<b[^>]*>/gi, '**')
    .replace(/<\/b>/gi, '**')
    .replace(/<em[^>]*>/gi, '*')
    .replace(/<\/em>/gi, '*')
    .replace(/<i[^>]*>/gi, '*')
    .replace(/<\/i>/gi, '*')
    // Handle special characters
    .replace(/&nbsp;/g, ' ')
    .replace(/&amp;/g, '&')
    .replace(/&lt;/g, '<')
    .replace(/&gt;/g, '>')
    .replace(/&quot;/g, '"')
    .replace(/&#39;/g, "'")
    // Clean up whitespace
    .replace(/\s+/g, ' ')
    .trim();

  // Wrap in a div with the detected direction
  return `<div dir="${defaultDir}">${html}</div>`;
}

export function parseEmailHeaders(headers: string): { contentType: string; encoding: string; charset: string } {
  const result = {
    contentType: 'text/plain',
    encoding: '7bit',
    charset: 'utf-8'
  };

  // Extract content type and charset
  const contentTypeMatch = headers.match(/Content-Type:\s*([^;]+)(?:;\s*charset=([^;"\r\n]+)|(?:;\s*charset="([^"]+)"))?/i);
  if (contentTypeMatch) {
    result.contentType = contentTypeMatch[1].trim().toLowerCase();
    if (contentTypeMatch[2]) {
      result.charset = contentTypeMatch[2].trim().toLowerCase();
    } else if (contentTypeMatch[3]) {
      result.charset = contentTypeMatch[3].trim().toLowerCase();
    }
  }

  // Extract content transfer encoding
  const encodingMatch = headers.match(/Content-Transfer-Encoding:\s*([^\s;\r\n]+)/i);
  if (encodingMatch) {
    result.encoding = encodingMatch[1].trim().toLowerCase();
  }

  return result;
}

export function extractBoundary(headers: string): string | null {
  const boundaryMatch = headers.match(/boundary="?([^"\r\n;]+)"?/i) ||
                       headers.match(/boundary=([^\r\n;]+)/i);

  return boundaryMatch ? boundaryMatch[1].trim() : null;
}

export function extractFilename(headers: string): string {
  const filenameMatch = headers.match(/filename="?([^"\r\n;]+)"?/i) ||
                       headers.match(/name="?([^"\r\n;]+)"?/i);

  return filenameMatch ? filenameMatch[1] : 'attachment';
}

export function extractHeader(headers: string, headerName: string): string {
  const regex = new RegExp(`^${headerName}:\\s*(.*)$`, 'im');
  const match = headers.match(regex);
  return match ? match[1].trim() : '';
}