Neah/lib/infomaniak-mime-decoder.ts

// Infomaniak-specific MIME decoder functions

export function decodeQuotedPrintable(text: string, charset: string): string {
  if (!text) return '';

  // Replace soft line breaks (=\r\n or =\n or =\r)
  let decoded = text.replace(/=(?:\r\n|\n|\r)/g, '');

  // Replace quoted-printable encoded characters
  decoded = decoded
    // Handle common encoded characters
    .replace(/=3D/g, '=')
    .replace(/=20/g, ' ')
    .replace(/=09/g, '\t')
    .replace(/=0A/g, '\n')
    .replace(/=0D/g, '\r')
    // Handle other quoted-printable encoded characters
    .replace(/=([0-9A-F]{2})/gi, (match, p1) => {
      return String.fromCharCode(parseInt(p1, 16));
    });

  // Handle character encoding
  try {
    if (typeof TextDecoder !== 'undefined') {
      const bytes = new Uint8Array(Array.from(decoded).map(c => c.charCodeAt(0)));
      return new TextDecoder(charset).decode(bytes);
    }
    return decoded;
  } catch (e) {
    console.warn('Charset conversion error:', e);
    return decoded;
  }
}

export function decodeBase64(text: string, charset: string): string {
  if (!text) return '';

  try {
    // Remove any whitespace and line breaks
    const cleanText = text.replace(/\s+/g, '');

    // Decode base64
    const binary = atob(cleanText);

    // Convert to bytes
    const bytes = new Uint8Array(binary.length);
    for (let i = 0; i < binary.length; i++) {
      bytes[i] = binary.charCodeAt(i);
    }

    // Decode using specified charset
    if (typeof TextDecoder !== 'undefined') {
      return new TextDecoder(charset).decode(bytes);
    }

    // Fallback
    return binary;
  } catch (e) {
    console.warn('Base64 decoding error:', e);
    return text;
  }
}

export function convertCharset(text: string, charset: string): string {
  if (!text) return '';

  try {
    if (typeof TextDecoder !== 'undefined') {
      // Handle common charset aliases
      const normalizedCharset = charset.toLowerCase()
        .replace(/^iso-8859-1$/, 'windows-1252')
        .replace(/^iso-8859-15$/, 'windows-1252')
        .replace(/^utf-8$/, 'utf-8')
        .replace(/^us-ascii$/, 'utf-8');

      const bytes = new Uint8Array(Array.from(text).map(c => c.charCodeAt(0)));
      return new TextDecoder(normalizedCharset).decode(bytes);
    }
    return text;
  } catch (e) {
    console.warn('Charset conversion error:', e);
    return text;
  }
}

export function cleanHtml(html: string): string {
  if (!html) return '';

  // Remove or fix malformed URLs
  html = html.replace(/=3D"(http[^"]+)"/g, (match, url) => {
    try {
      return `"${decodeURIComponent(url)}"`;
    } catch {
      return '';
    }
  });

  // Remove any remaining quoted-printable artifacts
  html = html.replace(/=([0-9A-F]{2})/gi, (match, p1) => {
    return String.fromCharCode(parseInt(p1, 16));
  });

  // Clean up any remaining HTML issues
  html = html
    // Remove style and script tags
    .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
    .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
    .replace(/<meta[^>]*>/gi, '')
    .replace(/<link[^>]*>/gi, '')
    .replace(/<base[^>]*>/gi, '')
    .replace(/<title[^>]*>[\s\S]*?<\/title>/gi, '')
    .replace(/<head[^>]*>[\s\S]*?<\/head>/gi, '')
    .replace(/<body[^>]*>/gi, '')
    .replace(/<\/body>/gi, '')
    .replace(/<html[^>]*>/gi, '')
    .replace(/<\/html>/gi, '')
    // Handle tables
    .replace(/<table[^>]*>/gi, '\n')
    .replace(/<\/table>/gi, '\n')
    .replace(/<tr[^>]*>/gi, '\n')
    .replace(/<\/tr>/gi, '\n')
    .replace(/<td[^>]*>/gi, ' ')
    .replace(/<\/td>/gi, ' ')
    .replace(/<th[^>]*>/gi, ' ')
    .replace(/<\/th>/gi, ' ')
    .replace(/<tbody[^>]*>/gi, '')
    .replace(/<\/tbody>/gi, '')
    .replace(/<thead[^>]*>/gi, '')
    .replace(/<\/thead>/gi, '')
    .replace(/<tfoot[^>]*>/gi, '')
    .replace(/<\/tfoot>/gi, '')
    // Handle other structural elements
    .replace(/<br\s*\/?>/gi, '\n')
    .replace(/<div[^>]*>/gi, '\n')
    .replace(/<\/div>/gi, '\n')
    .replace(/<p[^>]*>/gi, '\n')
    .replace(/<\/p>/gi, '\n')
    .replace(/<h[1-6][^>]*>/gi, '\n')
    .replace(/<\/h[1-6]>/gi, '\n')
    .replace(/<ul[^>]*>/gi, '\n')
    .replace(/<\/ul>/gi, '\n')
    .replace(/<ol[^>]*>/gi, '\n')
    .replace(/<\/ol>/gi, '\n')
    .replace(/<li[^>]*>/gi, '\n• ')
    .replace(/<\/li>/gi, '\n')
    .replace(/<blockquote[^>]*>/gi, '\n> ')
    .replace(/<\/blockquote>/gi, '\n')
    // Handle inline elements
    .replace(/<span[^>]*>/gi, '')
    .replace(/<\/span>/gi, '')
    .replace(/<strong[^>]*>/gi, '**')
    .replace(/<\/strong>/gi, '**')
    .replace(/<b[^>]*>/gi, '**')
    .replace(/<\/b>/gi, '**')
    .replace(/<em[^>]*>/gi, '*')
    .replace(/<\/em>/gi, '*')
    .replace(/<i[^>]*>/gi, '*')
    .replace(/<\/i>/gi, '*')
    // Handle HTML entities
    .replace(/&nbsp;/g, ' ')
    .replace(/&zwnj;/g, '')
    .replace(/&raquo;/g, '»')
    .replace(/&laquo;/g, '«')
    .replace(/&gt;/g, '>')
    .replace(/&lt;/g, '<')
    .replace(/&amp;/g, '&')
    .replace(/&quot;/g, '"')
    .replace(/&eacute;/g, 'é')
    .replace(/&egrave;/g, 'è')
    .replace(/&ecirc;/g, 'ê')
    .replace(/&euml;/g, 'ë')
    .replace(/&agrave;/g, 'à')
    .replace(/&acirc;/g, 'â')
    .replace(/&auml;/g, 'ä')
    .replace(/&icirc;/g, 'î')
    .replace(/&iuml;/g, 'ï')
    .replace(/&ocirc;/g, 'ô')
    .replace(/&ouml;/g, 'ö')
    .replace(/&ucirc;/g, 'û')
    .replace(/&uuml;/g, 'ü')
    .replace(/&ccedil;/g, 'ç')
    .replace(/&OElig;/g, 'Œ')
    .replace(/&oelig;/g, 'œ')
    .replace(/&AElig;/g, 'Æ')
    .replace(/&aelig;/g, 'æ')
    // Clean up whitespace
    .replace(/^\s+$/gm, '')
    .replace(/\n{3,}/g, '\n\n')
    .trim();

  return html;
}

export function parseEmailHeaders(headers: string): { contentType: string; encoding: string; charset: string } {
  const result = {
    contentType: 'text/plain',
    encoding: '7bit',
    charset: 'utf-8'
  };

  // Extract content type and charset
  const contentTypeMatch = headers.match(/Content-Type:\s*([^;]+)(?:;\s*charset=([^;"\r\n]+)|(?:;\s*charset="([^"]+)"))?/i);
  if (contentTypeMatch) {
    result.contentType = contentTypeMatch[1].trim().toLowerCase();
    if (contentTypeMatch[2]) {
      result.charset = contentTypeMatch[2].trim().toLowerCase();
    } else if (contentTypeMatch[3]) {
      result.charset = contentTypeMatch[3].trim().toLowerCase();
    }
  }

  // Extract content transfer encoding
  const encodingMatch = headers.match(/Content-Transfer-Encoding:\s*([^\s;\r\n]+)/i);
  if (encodingMatch) {
    result.encoding = encodingMatch[1].trim().toLowerCase();
  }

  return result;
}

export function extractBoundary(headers: string): string | null {
  const boundaryMatch = headers.match(/boundary="?([^"\r\n;]+)"?/i) ||
                       headers.match(/boundary=([^\r\n;]+)/i);

  return boundaryMatch ? boundaryMatch[1].trim() : null;
}

export function extractFilename(headers: string): string {
  const filenameMatch = headers.match(/filename="?([^"\r\n;]+)"?/i) ||
                       headers.match(/name="?([^"\r\n;]+)"?/i);

  return filenameMatch ? filenameMatch[1] : 'attachment';
}

export function extractHeader(headers: string, headerName: string): string {
  const regex = new RegExp(`^${headerName}:\\s*(.*)$`, 'im');
  const match = headers.match(regex);
  return match ? match[1].trim() : '';
}