Neah/lib/infomaniak-mime-decoder.ts

// Infomaniak-specific MIME decoder functions

export function decodeQuotedPrintable(text: string, charset: string): string {
  if (!text) return '';

  // Replace soft line breaks (=\r\n or =\n or =\r)
  let decoded = text.replace(/=(?:\r\n|\n|\r)/g, '');

  // Replace quoted-printable encoded characters (including non-ASCII characters)
  decoded = decoded.replace(/=([0-9A-F]{2})/gi, (match, p1) => {
    return String.fromCharCode(parseInt(p1, 16));
  });

  // Handle character encoding
  try {
    // For browsers with TextDecoder support
    if (typeof TextDecoder !== 'undefined') {
      // Convert string to array of byte values
      const bytes = new Uint8Array(Array.from(decoded).map(c => c.charCodeAt(0)));
      return new TextDecoder(charset).decode(bytes);
    }

    // Fallback for older browsers or when charset handling is not critical
    return decoded;
  } catch (e) {
    console.warn('Charset conversion error:', e);
    return decoded;
  }
}

export function decodeBase64(text: string, charset: string): string {
  if (!text) return '';

  try {
    // Remove any whitespace and line breaks
    const cleanText = text.replace(/\s+/g, '');

    // Decode base64
    const binary = atob(cleanText);

    // Convert to bytes
    const bytes = new Uint8Array(binary.length);
    for (let i = 0; i < binary.length; i++) {
      bytes[i] = binary.charCodeAt(i);
    }

    // Decode using specified charset
    if (typeof TextDecoder !== 'undefined') {
      return new TextDecoder(charset).decode(bytes);
    }

    // Fallback
    return binary;
  } catch (e) {
    console.warn('Base64 decoding error:', e);
    return text;
  }
}

export function convertCharset(text: string, charset: string): string {
  if (!text) return '';

  try {
    // For browsers with TextDecoder support
    if (typeof TextDecoder !== 'undefined') {
      // Convert string to array of byte values
      const bytes = new Uint8Array(Array.from(text).map(c => c.charCodeAt(0)));
      return new TextDecoder(charset).decode(bytes);
    }

    // Fallback for older browsers
    return text;
  } catch (e) {
    console.warn('Charset conversion error:', e);
    return text;
  }
}

export function cleanHtml(html: string): string {
  if (!html) return '';

  // Remove or fix malformed URLs
  html = html.replace(/=3D"(http[^"]+)"/g, (match, url) => {
    try {
      return `"${decodeURIComponent(url)}"`;
    } catch {
      return '';
    }
  });

  // Remove any remaining quoted-printable artifacts
  html = html.replace(/=([0-9A-F]{2})/gi, (match, p1) => {
    return String.fromCharCode(parseInt(p1, 16));
  });

  // Clean up any remaining HTML issues
  html = html
    .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
    .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
    .replace(/<meta[^>]*>/gi, '')
    .replace(/<link[^>]*>/gi, '')
    .replace(/<base[^>]*>/gi, '')
    .replace(/<title[^>]*>[\s\S]*?<\/title>/gi, '')
    .replace(/<head[^>]*>[\s\S]*?<\/head>/gi, '')
    .replace(/<body[^>]*>/gi, '')
    .replace(/<\/body>/gi, '')
    .replace(/<html[^>]*>/gi, '')
    .replace(/<\/html>/gi, '')
    .replace(/<br\s*\/?>/gi, '\n')
    .replace(/<div[^>]*>/gi, '\n')
    .replace(/<\/div>/gi, '')
    .replace(/<p[^>]*>/gi, '\n')
    .replace(/<\/p>/gi, '')
    .replace(/&nbsp;/g, ' ')
    .replace(/&zwnj;/g, '')
    .replace(/&raquo;/g, '»')
    .replace(/&laquo;/g, '«')
    .replace(/&gt;/g, '>')
    .replace(/&lt;/g, '<')
    .replace(/&amp;/g, '&')
    .replace(/&quot;/g, '"')
    .replace(/^\s+$/gm, '')
    .replace(/\n{3,}/g, '\n\n')
    .trim();

  return html;
}

export function parseEmailHeaders(headers: string): { contentType: string; encoding: string; charset: string } {
  const result = {
    contentType: 'text/plain',
    encoding: '7bit',
    charset: 'utf-8'
  };

  // Extract content type and charset
  const contentTypeMatch = headers.match(/Content-Type:\s*([^;]+)(?:;\s*charset=([^;"\r\n]+)|(?:;\s*charset="([^"]+)"))?/i);
  if (contentTypeMatch) {
    result.contentType = contentTypeMatch[1].trim().toLowerCase();
    if (contentTypeMatch[2]) {
      result.charset = contentTypeMatch[2].trim().toLowerCase();
    } else if (contentTypeMatch[3]) {
      result.charset = contentTypeMatch[3].trim().toLowerCase();
    }
  }

  // Extract content transfer encoding
  const encodingMatch = headers.match(/Content-Transfer-Encoding:\s*([^\s;\r\n]+)/i);
  if (encodingMatch) {
    result.encoding = encodingMatch[1].trim().toLowerCase();
  }

  return result;
}

export function extractBoundary(headers: string): string | null {
  const boundaryMatch = headers.match(/boundary="?([^"\r\n;]+)"?/i) ||
                       headers.match(/boundary=([^\r\n;]+)/i);

  return boundaryMatch ? boundaryMatch[1].trim() : null;
}

export function extractFilename(headers: string): string {
  const filenameMatch = headers.match(/filename="?([^"\r\n;]+)"?/i) ||
                       headers.match(/name="?([^"\r\n;]+)"?/i);

  return filenameMatch ? filenameMatch[1] : 'attachment';
}

export function extractHeader(headers: string, headerName: string): string {
  const regex = new RegExp(`^${headerName}:\\s*(.*)$`, 'im');
  const match = headers.match(regex);
  return match ? match[1].trim() : '';
}