221 lines
6.8 KiB
TypeScript
221 lines
6.8 KiB
TypeScript
// Infomaniak-specific MIME decoder functions
|
|
|
|
export function decodeQuotedPrintable(text: string, charset: string): string {
|
|
if (!text) return '';
|
|
|
|
// Replace soft line breaks (=\r\n or =\n or =\r)
|
|
let decoded = text.replace(/=(?:\r\n|\n|\r)/g, '');
|
|
|
|
// Replace quoted-printable encoded characters
|
|
decoded = decoded
|
|
// Handle common encoded characters
|
|
.replace(/=3D/g, '=')
|
|
.replace(/=20/g, ' ')
|
|
.replace(/=09/g, '\t')
|
|
.replace(/=0A/g, '\n')
|
|
.replace(/=0D/g, '\r')
|
|
// Handle other quoted-printable encoded characters
|
|
.replace(/=([0-9A-F]{2})/gi, (match, p1) => {
|
|
return String.fromCharCode(parseInt(p1, 16));
|
|
});
|
|
|
|
// Handle character encoding
|
|
try {
|
|
if (typeof TextDecoder !== 'undefined') {
|
|
const bytes = new Uint8Array(Array.from(decoded).map(c => c.charCodeAt(0)));
|
|
return new TextDecoder(charset).decode(bytes);
|
|
}
|
|
return decoded;
|
|
} catch (e) {
|
|
console.warn('Charset conversion error:', e);
|
|
return decoded;
|
|
}
|
|
}
|
|
|
|
export function decodeBase64(text: string, charset: string): string {
|
|
if (!text) return '';
|
|
|
|
try {
|
|
// Remove any whitespace and line breaks
|
|
const cleanText = text.replace(/\s+/g, '');
|
|
|
|
// Decode base64
|
|
const binary = atob(cleanText);
|
|
|
|
// Convert to bytes
|
|
const bytes = new Uint8Array(binary.length);
|
|
for (let i = 0; i < binary.length; i++) {
|
|
bytes[i] = binary.charCodeAt(i);
|
|
}
|
|
|
|
// Decode using specified charset
|
|
if (typeof TextDecoder !== 'undefined') {
|
|
return new TextDecoder(charset).decode(bytes);
|
|
}
|
|
|
|
// Fallback
|
|
return binary;
|
|
} catch (e) {
|
|
console.warn('Base64 decoding error:', e);
|
|
return text;
|
|
}
|
|
}
|
|
|
|
export function convertCharset(text: string, charset: string): string {
|
|
if (!text) return '';
|
|
|
|
try {
|
|
if (typeof TextDecoder !== 'undefined') {
|
|
// Handle common charset aliases
|
|
const normalizedCharset = charset.toLowerCase()
|
|
.replace(/^iso-8859-1$/, 'windows-1252')
|
|
.replace(/^iso-8859-15$/, 'windows-1252')
|
|
.replace(/^utf-8$/, 'utf-8')
|
|
.replace(/^us-ascii$/, 'utf-8');
|
|
|
|
const bytes = new Uint8Array(Array.from(text).map(c => c.charCodeAt(0)));
|
|
return new TextDecoder(normalizedCharset).decode(bytes);
|
|
}
|
|
return text;
|
|
} catch (e) {
|
|
console.warn('Charset conversion error:', e);
|
|
return text;
|
|
}
|
|
}
|
|
|
|
export function cleanHtml(html: string): string {
|
|
if (!html) return '';
|
|
|
|
// Detect text direction from the content
|
|
const hasRtlChars = /[\u0591-\u07FF\u200F\u202B\u202E\uFB1D-\uFDFD\uFE70-\uFEFC]/.test(html);
|
|
const defaultDir = hasRtlChars ? 'rtl' : 'ltr';
|
|
|
|
// Remove or fix malformed URLs
|
|
html = html.replace(/=3D"(http[^"]+)"/g, (match, url) => {
|
|
try {
|
|
return `"${decodeURIComponent(url)}"`;
|
|
} catch {
|
|
return '';
|
|
}
|
|
});
|
|
|
|
// Remove any remaining quoted-printable artifacts
|
|
html = html.replace(/=([0-9A-F]{2})/gi, (match, p1) => {
|
|
return String.fromCharCode(parseInt(p1, 16));
|
|
});
|
|
|
|
// Clean up any remaining HTML issues while preserving direction
|
|
html = html
|
|
// Remove style and script tags
|
|
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
|
|
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
|
|
.replace(/<meta[^>]*>/gi, '')
|
|
.replace(/<link[^>]*>/gi, '')
|
|
.replace(/<base[^>]*>/gi, '')
|
|
.replace(/<title[^>]*>[\s\S]*?<\/title>/gi, '')
|
|
.replace(/<head[^>]*>[\s\S]*?<\/head>/gi, '')
|
|
// Preserve body attributes
|
|
.replace(/<body[^>]*>/gi, (match) => {
|
|
const dir = match.match(/dir=["'](rtl|ltr)["']/i)?.[1] || defaultDir;
|
|
return `<body dir="${dir}">`;
|
|
})
|
|
.replace(/<\/body>/gi, '')
|
|
.replace(/<html[^>]*>/gi, '')
|
|
.replace(/<\/html>/gi, '')
|
|
// Handle tables
|
|
.replace(/<table[^>]*>/gi, '\n')
|
|
.replace(/<\/table>/gi, '\n')
|
|
.replace(/<tr[^>]*>/gi, '\n')
|
|
.replace(/<\/tr>/gi, '\n')
|
|
.replace(/<td[^>]*>/gi, ' ')
|
|
.replace(/<\/td>/gi, ' ')
|
|
.replace(/<th[^>]*>/gi, ' ')
|
|
.replace(/<\/th>/gi, ' ')
|
|
// Handle lists
|
|
.replace(/<ul[^>]*>/gi, '\n')
|
|
.replace(/<\/ul>/gi, '\n')
|
|
.replace(/<ol[^>]*>/gi, '\n')
|
|
.replace(/<\/ol>/gi, '\n')
|
|
.replace(/<li[^>]*>/gi, '• ')
|
|
.replace(/<\/li>/gi, '\n')
|
|
// Handle other block elements
|
|
.replace(/<div[^>]*>/gi, '\n')
|
|
.replace(/<\/div>/gi, '\n')
|
|
.replace(/<p[^>]*>/gi, '\n')
|
|
.replace(/<\/p>/gi, '\n')
|
|
.replace(/<br[^>]*>/gi, '\n')
|
|
.replace(/<hr[^>]*>/gi, '\n')
|
|
// Handle inline elements
|
|
.replace(/<span[^>]*>/gi, '')
|
|
.replace(/<\/span>/gi, '')
|
|
.replace(/<a[^>]*>/gi, '')
|
|
.replace(/<\/a>/gi, '')
|
|
.replace(/<strong[^>]*>/gi, '**')
|
|
.replace(/<\/strong>/gi, '**')
|
|
.replace(/<b[^>]*>/gi, '**')
|
|
.replace(/<\/b>/gi, '**')
|
|
.replace(/<em[^>]*>/gi, '*')
|
|
.replace(/<\/em>/gi, '*')
|
|
.replace(/<i[^>]*>/gi, '*')
|
|
.replace(/<\/i>/gi, '*')
|
|
// Handle special characters
|
|
.replace(/ /g, ' ')
|
|
.replace(/&/g, '&')
|
|
.replace(/</g, '<')
|
|
.replace(/>/g, '>')
|
|
.replace(/"/g, '"')
|
|
.replace(/'/g, "'")
|
|
// Clean up whitespace
|
|
.replace(/\s+/g, ' ')
|
|
.trim();
|
|
|
|
// Wrap in a div with the detected direction
|
|
return `<div dir="${defaultDir}">${html}</div>`;
|
|
}
|
|
|
|
export function parseEmailHeaders(headers: string): { contentType: string; encoding: string; charset: string } {
|
|
const result = {
|
|
contentType: 'text/plain',
|
|
encoding: '7bit',
|
|
charset: 'utf-8'
|
|
};
|
|
|
|
// Extract content type and charset
|
|
const contentTypeMatch = headers.match(/Content-Type:\s*([^;]+)(?:;\s*charset=([^;"\r\n]+)|(?:;\s*charset="([^"]+)"))?/i);
|
|
if (contentTypeMatch) {
|
|
result.contentType = contentTypeMatch[1].trim().toLowerCase();
|
|
if (contentTypeMatch[2]) {
|
|
result.charset = contentTypeMatch[2].trim().toLowerCase();
|
|
} else if (contentTypeMatch[3]) {
|
|
result.charset = contentTypeMatch[3].trim().toLowerCase();
|
|
}
|
|
}
|
|
|
|
// Extract content transfer encoding
|
|
const encodingMatch = headers.match(/Content-Transfer-Encoding:\s*([^\s;\r\n]+)/i);
|
|
if (encodingMatch) {
|
|
result.encoding = encodingMatch[1].trim().toLowerCase();
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
export function extractBoundary(headers: string): string | null {
|
|
const boundaryMatch = headers.match(/boundary="?([^"\r\n;]+)"?/i) ||
|
|
headers.match(/boundary=([^\r\n;]+)/i);
|
|
|
|
return boundaryMatch ? boundaryMatch[1].trim() : null;
|
|
}
|
|
|
|
export function extractFilename(headers: string): string {
|
|
const filenameMatch = headers.match(/filename="?([^"\r\n;]+)"?/i) ||
|
|
headers.match(/name="?([^"\r\n;]+)"?/i);
|
|
|
|
return filenameMatch ? filenameMatch[1] : 'attachment';
|
|
}
|
|
|
|
export function extractHeader(headers: string, headerName: string): string {
|
|
const regex = new RegExp(`^${headerName}:\\s*(.*)$`, 'im');
|
|
const match = headers.match(regex);
|
|
return match ? match[1].trim() : '';
|
|
}
|