239 lines
7.2 KiB
TypeScript
239 lines
7.2 KiB
TypeScript
// Infomaniak-specific MIME decoder functions
|
|
|
|
export function decodeQuotedPrintable(text: string, charset: string): string {
|
|
if (!text) return '';
|
|
|
|
// Replace soft line breaks (=\r\n or =\n or =\r)
|
|
let decoded = text.replace(/=(?:\r\n|\n|\r)/g, '');
|
|
|
|
// Replace quoted-printable encoded characters
|
|
decoded = decoded
|
|
// Handle common encoded characters
|
|
.replace(/=3D/g, '=')
|
|
.replace(/=20/g, ' ')
|
|
.replace(/=09/g, '\t')
|
|
.replace(/=0A/g, '\n')
|
|
.replace(/=0D/g, '\r')
|
|
// Handle other quoted-printable encoded characters
|
|
.replace(/=([0-9A-F]{2})/gi, (match, p1) => {
|
|
return String.fromCharCode(parseInt(p1, 16));
|
|
});
|
|
|
|
// Handle character encoding
|
|
try {
|
|
if (typeof TextDecoder !== 'undefined') {
|
|
const bytes = new Uint8Array(Array.from(decoded).map(c => c.charCodeAt(0)));
|
|
return new TextDecoder(charset).decode(bytes);
|
|
}
|
|
return decoded;
|
|
} catch (e) {
|
|
console.warn('Charset conversion error:', e);
|
|
return decoded;
|
|
}
|
|
}
|
|
|
|
export function decodeBase64(text: string, charset: string): string {
|
|
if (!text) return '';
|
|
|
|
try {
|
|
// Remove any whitespace and line breaks
|
|
const cleanText = text.replace(/\s+/g, '');
|
|
|
|
// Decode base64
|
|
const binary = atob(cleanText);
|
|
|
|
// Convert to bytes
|
|
const bytes = new Uint8Array(binary.length);
|
|
for (let i = 0; i < binary.length; i++) {
|
|
bytes[i] = binary.charCodeAt(i);
|
|
}
|
|
|
|
// Decode using specified charset
|
|
if (typeof TextDecoder !== 'undefined') {
|
|
return new TextDecoder(charset).decode(bytes);
|
|
}
|
|
|
|
// Fallback
|
|
return binary;
|
|
} catch (e) {
|
|
console.warn('Base64 decoding error:', e);
|
|
return text;
|
|
}
|
|
}
|
|
|
|
export function convertCharset(text: string, charset: string): string {
|
|
if (!text) return '';
|
|
|
|
try {
|
|
if (typeof TextDecoder !== 'undefined') {
|
|
// Handle common charset aliases
|
|
const normalizedCharset = charset.toLowerCase()
|
|
.replace(/^iso-8859-1$/, 'windows-1252')
|
|
.replace(/^iso-8859-15$/, 'windows-1252')
|
|
.replace(/^utf-8$/, 'utf-8')
|
|
.replace(/^us-ascii$/, 'utf-8');
|
|
|
|
const bytes = new Uint8Array(Array.from(text).map(c => c.charCodeAt(0)));
|
|
return new TextDecoder(normalizedCharset).decode(bytes);
|
|
}
|
|
return text;
|
|
} catch (e) {
|
|
console.warn('Charset conversion error:', e);
|
|
return text;
|
|
}
|
|
}
|
|
|
|
export function cleanHtml(html: string): string {
|
|
if (!html) return '';
|
|
|
|
// Remove or fix malformed URLs
|
|
html = html.replace(/=3D"(http[^"]+)"/g, (match, url) => {
|
|
try {
|
|
return `"${decodeURIComponent(url)}"`;
|
|
} catch {
|
|
return '';
|
|
}
|
|
});
|
|
|
|
// Remove any remaining quoted-printable artifacts
|
|
html = html.replace(/=([0-9A-F]{2})/gi, (match, p1) => {
|
|
return String.fromCharCode(parseInt(p1, 16));
|
|
});
|
|
|
|
// Clean up any remaining HTML issues
|
|
html = html
|
|
// Remove style and script tags
|
|
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
|
|
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
|
|
.replace(/<meta[^>]*>/gi, '')
|
|
.replace(/<link[^>]*>/gi, '')
|
|
.replace(/<base[^>]*>/gi, '')
|
|
.replace(/<title[^>]*>[\s\S]*?<\/title>/gi, '')
|
|
.replace(/<head[^>]*>[\s\S]*?<\/head>/gi, '')
|
|
.replace(/<body[^>]*>/gi, '')
|
|
.replace(/<\/body>/gi, '')
|
|
.replace(/<html[^>]*>/gi, '')
|
|
.replace(/<\/html>/gi, '')
|
|
// Handle tables
|
|
.replace(/<table[^>]*>/gi, '\n')
|
|
.replace(/<\/table>/gi, '\n')
|
|
.replace(/<tr[^>]*>/gi, '\n')
|
|
.replace(/<\/tr>/gi, '\n')
|
|
.replace(/<td[^>]*>/gi, ' ')
|
|
.replace(/<\/td>/gi, ' ')
|
|
.replace(/<th[^>]*>/gi, ' ')
|
|
.replace(/<\/th>/gi, ' ')
|
|
.replace(/<tbody[^>]*>/gi, '')
|
|
.replace(/<\/tbody>/gi, '')
|
|
.replace(/<thead[^>]*>/gi, '')
|
|
.replace(/<\/thead>/gi, '')
|
|
.replace(/<tfoot[^>]*>/gi, '')
|
|
.replace(/<\/tfoot>/gi, '')
|
|
// Handle other structural elements
|
|
.replace(/<br\s*\/?>/gi, '\n')
|
|
.replace(/<div[^>]*>/gi, '\n')
|
|
.replace(/<\/div>/gi, '\n')
|
|
.replace(/<p[^>]*>/gi, '\n')
|
|
.replace(/<\/p>/gi, '\n')
|
|
.replace(/<h[1-6][^>]*>/gi, '\n')
|
|
.replace(/<\/h[1-6]>/gi, '\n')
|
|
.replace(/<ul[^>]*>/gi, '\n')
|
|
.replace(/<\/ul>/gi, '\n')
|
|
.replace(/<ol[^>]*>/gi, '\n')
|
|
.replace(/<\/ol>/gi, '\n')
|
|
.replace(/<li[^>]*>/gi, '\n• ')
|
|
.replace(/<\/li>/gi, '\n')
|
|
.replace(/<blockquote[^>]*>/gi, '\n> ')
|
|
.replace(/<\/blockquote>/gi, '\n')
|
|
// Handle inline elements
|
|
.replace(/<span[^>]*>/gi, '')
|
|
.replace(/<\/span>/gi, '')
|
|
.replace(/<strong[^>]*>/gi, '**')
|
|
.replace(/<\/strong>/gi, '**')
|
|
.replace(/<b[^>]*>/gi, '**')
|
|
.replace(/<\/b>/gi, '**')
|
|
.replace(/<em[^>]*>/gi, '*')
|
|
.replace(/<\/em>/gi, '*')
|
|
.replace(/<i[^>]*>/gi, '*')
|
|
.replace(/<\/i>/gi, '*')
|
|
// Handle HTML entities
|
|
.replace(/ /g, ' ')
|
|
.replace(/‌/g, '')
|
|
.replace(/»/g, '»')
|
|
.replace(/«/g, '«')
|
|
.replace(/>/g, '>')
|
|
.replace(/</g, '<')
|
|
.replace(/&/g, '&')
|
|
.replace(/"/g, '"')
|
|
.replace(/é/g, 'é')
|
|
.replace(/è/g, 'è')
|
|
.replace(/ê/g, 'ê')
|
|
.replace(/ë/g, 'ë')
|
|
.replace(/à/g, 'à')
|
|
.replace(/â/g, 'â')
|
|
.replace(/ä/g, 'ä')
|
|
.replace(/î/g, 'î')
|
|
.replace(/ï/g, 'ï')
|
|
.replace(/ô/g, 'ô')
|
|
.replace(/ö/g, 'ö')
|
|
.replace(/û/g, 'û')
|
|
.replace(/ü/g, 'ü')
|
|
.replace(/ç/g, 'ç')
|
|
.replace(/Œ/g, 'Œ')
|
|
.replace(/œ/g, 'œ')
|
|
.replace(/Æ/g, 'Æ')
|
|
.replace(/æ/g, 'æ')
|
|
// Clean up whitespace
|
|
.replace(/^\s+$/gm, '')
|
|
.replace(/\n{3,}/g, '\n\n')
|
|
.trim();
|
|
|
|
return html;
|
|
}
|
|
|
|
export function parseEmailHeaders(headers: string): { contentType: string; encoding: string; charset: string } {
|
|
const result = {
|
|
contentType: 'text/plain',
|
|
encoding: '7bit',
|
|
charset: 'utf-8'
|
|
};
|
|
|
|
// Extract content type and charset
|
|
const contentTypeMatch = headers.match(/Content-Type:\s*([^;]+)(?:;\s*charset=([^;"\r\n]+)|(?:;\s*charset="([^"]+)"))?/i);
|
|
if (contentTypeMatch) {
|
|
result.contentType = contentTypeMatch[1].trim().toLowerCase();
|
|
if (contentTypeMatch[2]) {
|
|
result.charset = contentTypeMatch[2].trim().toLowerCase();
|
|
} else if (contentTypeMatch[3]) {
|
|
result.charset = contentTypeMatch[3].trim().toLowerCase();
|
|
}
|
|
}
|
|
|
|
// Extract content transfer encoding
|
|
const encodingMatch = headers.match(/Content-Transfer-Encoding:\s*([^\s;\r\n]+)/i);
|
|
if (encodingMatch) {
|
|
result.encoding = encodingMatch[1].trim().toLowerCase();
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
export function extractBoundary(headers: string): string | null {
|
|
const boundaryMatch = headers.match(/boundary="?([^"\r\n;]+)"?/i) ||
|
|
headers.match(/boundary=([^\r\n;]+)/i);
|
|
|
|
return boundaryMatch ? boundaryMatch[1].trim() : null;
|
|
}
|
|
|
|
export function extractFilename(headers: string): string {
|
|
const filenameMatch = headers.match(/filename="?([^"\r\n;]+)"?/i) ||
|
|
headers.match(/name="?([^"\r\n;]+)"?/i);
|
|
|
|
return filenameMatch ? filenameMatch[1] : 'attachment';
|
|
}
|
|
|
|
export function extractHeader(headers: string, headerName: string): string {
|
|
const regex = new RegExp(`^${headerName}:\\s*(.*)$`, 'im');
|
|
const match = headers.match(regex);
|
|
return match ? match[1].trim() : '';
|
|
}
|