Neah/lib/email-parser.ts

interface EmailHeaders {
  from: string;
  subject: string;
  date: string;
  to?: string;
}

export function parseEmailHeaders(headerContent: string): EmailHeaders {
  const headers: { [key: string]: string } = {};
  let currentHeader = '';
  let currentValue = '';

  // Split the header content into lines
  const lines = headerContent.split(/\r?\n/);

  for (let i = 0; i < lines.length; i++) {
    const line = lines[i];

    // If line starts with whitespace, it's a continuation of the previous header
    if (/^\s+/.test(line)) {
      currentValue += ' ' + line.trim();
      continue;
    }

    // If we have a current header being processed, save it
    if (currentHeader && currentValue) {
      headers[currentHeader.toLowerCase()] = currentValue.trim();
    }

    // Start processing new header
    const match = line.match(/^([^:]+):\s*(.*)$/);
    if (match) {
      currentHeader = match[1];
      currentValue = match[2];
    }
  }

  // Save the last header
  if (currentHeader && currentValue) {
    headers[currentHeader.toLowerCase()] = currentValue.trim();
  }

  return {
    from: headers['from'] || '',
    subject: headers['subject'] || '',
    date: headers['date'] || new Date().toISOString(),
    to: headers['to']
  };
}

export function decodeEmailBody(content: string, contentType: string): string {
  try {
    // Remove email client-specific markers
    content = content.replace(/\r\n/g, '\n')
                    .replace(/=\n/g, '')
                    .replace(/=3D/g, '=')
                    .replace(/=09/g, '\t');

    // If it's HTML content
    if (contentType.includes('text/html')) {
      return extractTextFromHtml(content);
    }

    return content;
  } catch (error) {
    console.error('Error decoding email body:', error);
    return content;
  }
}

function extractTextFromHtml(html: string): string {
  // Remove scripts and style tags
  html = html.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
             .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '');

  // Convert <br> and <p> to newlines
  html = html.replace(/<br[^>]*>/gi, '\n')
             .replace(/<p[^>]*>/gi, '\n')
             .replace(/<\/p>/gi, '\n');

  // Remove all other HTML tags
  html = html.replace(/<[^>]+>/g, '');

  // Decode HTML entities
  html = html.replace(/&nbsp;/g, ' ')
             .replace(/&amp;/g, '&')
             .replace(/&lt;/g, '<')
             .replace(/&gt;/g, '>')
             .replace(/&quot;/g, '"');

  // Clean up whitespace
  return html.replace(/\n\s*\n/g, '\n\n').trim();
}