NeahOpti/lib/infomaniak-mime-decoder.ts
2025-04-22 12:49:37 +02:00

221 lines
6.8 KiB
TypeScript

// Infomaniak-specific MIME decoder functions
export function decodeQuotedPrintable(text: string, charset: string): string {
if (!text) return '';
// Replace soft line breaks (=\r\n or =\n or =\r)
let decoded = text.replace(/=(?:\r\n|\n|\r)/g, '');
// Replace quoted-printable encoded characters
decoded = decoded
// Handle common encoded characters
.replace(/=3D/g, '=')
.replace(/=20/g, ' ')
.replace(/=09/g, '\t')
.replace(/=0A/g, '\n')
.replace(/=0D/g, '\r')
// Handle other quoted-printable encoded characters
.replace(/=([0-9A-F]{2})/gi, (match, p1) => {
return String.fromCharCode(parseInt(p1, 16));
});
// Handle character encoding
try {
if (typeof TextDecoder !== 'undefined') {
const bytes = new Uint8Array(Array.from(decoded).map(c => c.charCodeAt(0)));
return new TextDecoder(charset).decode(bytes);
}
return decoded;
} catch (e) {
console.warn('Charset conversion error:', e);
return decoded;
}
}
export function decodeBase64(text: string, charset: string): string {
if (!text) return '';
try {
// Remove any whitespace and line breaks
const cleanText = text.replace(/\s+/g, '');
// Decode base64
const binary = atob(cleanText);
// Convert to bytes
const bytes = new Uint8Array(binary.length);
for (let i = 0; i < binary.length; i++) {
bytes[i] = binary.charCodeAt(i);
}
// Decode using specified charset
if (typeof TextDecoder !== 'undefined') {
return new TextDecoder(charset).decode(bytes);
}
// Fallback
return binary;
} catch (e) {
console.warn('Base64 decoding error:', e);
return text;
}
}
export function convertCharset(text: string, charset: string): string {
if (!text) return '';
try {
if (typeof TextDecoder !== 'undefined') {
// Handle common charset aliases
const normalizedCharset = charset.toLowerCase()
.replace(/^iso-8859-1$/, 'windows-1252')
.replace(/^iso-8859-15$/, 'windows-1252')
.replace(/^utf-8$/, 'utf-8')
.replace(/^us-ascii$/, 'utf-8');
const bytes = new Uint8Array(Array.from(text).map(c => c.charCodeAt(0)));
return new TextDecoder(normalizedCharset).decode(bytes);
}
return text;
} catch (e) {
console.warn('Charset conversion error:', e);
return text;
}
}
export function cleanHtml(html: string): string {
if (!html) return '';
// Detect text direction from the content
const hasRtlChars = /[\u0591-\u07FF\u200F\u202B\u202E\uFB1D-\uFDFD\uFE70-\uFEFC]/.test(html);
const defaultDir = hasRtlChars ? 'rtl' : 'ltr';
// Remove or fix malformed URLs
html = html.replace(/=3D"(http[^"]+)"/g, (match, url) => {
try {
return `"${decodeURIComponent(url)}"`;
} catch {
return '';
}
});
// Remove any remaining quoted-printable artifacts
html = html.replace(/=([0-9A-F]{2})/gi, (match, p1) => {
return String.fromCharCode(parseInt(p1, 16));
});
// Clean up any remaining HTML issues while preserving direction
html = html
// Remove style and script tags
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
.replace(/<meta[^>]*>/gi, '')
.replace(/<link[^>]*>/gi, '')
.replace(/<base[^>]*>/gi, '')
.replace(/<title[^>]*>[\s\S]*?<\/title>/gi, '')
.replace(/<head[^>]*>[\s\S]*?<\/head>/gi, '')
// Preserve body attributes
.replace(/<body[^>]*>/gi, (match) => {
const dir = match.match(/dir=["'](rtl|ltr)["']/i)?.[1] || defaultDir;
return `<body dir="${dir}">`;
})
.replace(/<\/body>/gi, '')
.replace(/<html[^>]*>/gi, '')
.replace(/<\/html>/gi, '')
// Handle tables
.replace(/<table[^>]*>/gi, '\n')
.replace(/<\/table>/gi, '\n')
.replace(/<tr[^>]*>/gi, '\n')
.replace(/<\/tr>/gi, '\n')
.replace(/<td[^>]*>/gi, ' ')
.replace(/<\/td>/gi, ' ')
.replace(/<th[^>]*>/gi, ' ')
.replace(/<\/th>/gi, ' ')
// Handle lists
.replace(/<ul[^>]*>/gi, '\n')
.replace(/<\/ul>/gi, '\n')
.replace(/<ol[^>]*>/gi, '\n')
.replace(/<\/ol>/gi, '\n')
.replace(/<li[^>]*>/gi, '• ')
.replace(/<\/li>/gi, '\n')
// Handle other block elements
.replace(/<div[^>]*>/gi, '\n')
.replace(/<\/div>/gi, '\n')
.replace(/<p[^>]*>/gi, '\n')
.replace(/<\/p>/gi, '\n')
.replace(/<br[^>]*>/gi, '\n')
.replace(/<hr[^>]*>/gi, '\n')
// Handle inline elements
.replace(/<span[^>]*>/gi, '')
.replace(/<\/span>/gi, '')
.replace(/<a[^>]*>/gi, '')
.replace(/<\/a>/gi, '')
.replace(/<strong[^>]*>/gi, '**')
.replace(/<\/strong>/gi, '**')
.replace(/<b[^>]*>/gi, '**')
.replace(/<\/b>/gi, '**')
.replace(/<em[^>]*>/gi, '*')
.replace(/<\/em>/gi, '*')
.replace(/<i[^>]*>/gi, '*')
.replace(/<\/i>/gi, '*')
// Handle special characters
.replace(/&nbsp;/g, ' ')
.replace(/&amp;/g, '&')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'")
// Clean up whitespace
.replace(/\s+/g, ' ')
.trim();
// Wrap in a div with the detected direction
return `<div dir="${defaultDir}">${html}</div>`;
}
export function parseEmailHeaders(headers: string): { contentType: string; encoding: string; charset: string } {
const result = {
contentType: 'text/plain',
encoding: '7bit',
charset: 'utf-8'
};
// Extract content type and charset
const contentTypeMatch = headers.match(/Content-Type:\s*([^;]+)(?:;\s*charset=([^;"\r\n]+)|(?:;\s*charset="([^"]+)"))?/i);
if (contentTypeMatch) {
result.contentType = contentTypeMatch[1].trim().toLowerCase();
if (contentTypeMatch[2]) {
result.charset = contentTypeMatch[2].trim().toLowerCase();
} else if (contentTypeMatch[3]) {
result.charset = contentTypeMatch[3].trim().toLowerCase();
}
}
// Extract content transfer encoding
const encodingMatch = headers.match(/Content-Transfer-Encoding:\s*([^\s;\r\n]+)/i);
if (encodingMatch) {
result.encoding = encodingMatch[1].trim().toLowerCase();
}
return result;
}
export function extractBoundary(headers: string): string | null {
const boundaryMatch = headers.match(/boundary="?([^"\r\n;]+)"?/i) ||
headers.match(/boundary=([^\r\n;]+)/i);
return boundaryMatch ? boundaryMatch[1].trim() : null;
}
export function extractFilename(headers: string): string {
const filenameMatch = headers.match(/filename="?([^"\r\n;]+)"?/i) ||
headers.match(/name="?([^"\r\n;]+)"?/i);
return filenameMatch ? filenameMatch[1] : 'attachment';
}
export function extractHeader(headers: string, headerName: string): string {
const regex = new RegExp(`^${headerName}:\\s*(.*)$`, 'im');
const match = headers.match(regex);
return match ? match[1].trim() : '';
}