diff --git a/lib/infomaniak-mime-decoder.ts b/lib/infomaniak-mime-decoder.ts index 6b97f47c..bcd21264 100644 --- a/lib/infomaniak-mime-decoder.ts +++ b/lib/infomaniak-mime-decoder.ts @@ -95,6 +95,7 @@ export function cleanHtml(html: string): string { // Clean up any remaining HTML issues html = html + // Remove style and script tags .replace(/]*>[\s\S]*?<\/style>/gi, '') .replace(/]*>[\s\S]*?<\/script>/gi, '') .replace(/]*>/gi, '') @@ -106,11 +107,49 @@ export function cleanHtml(html: string): string { .replace(/<\/body>/gi, '') .replace(/]*>/gi, '') .replace(/<\/html>/gi, '') + // Handle tables + .replace(/]*>/gi, '\n') + .replace(/<\/table>/gi, '\n') + .replace(/]*>/gi, '\n') + .replace(/<\/tr>/gi, '\n') + .replace(/]*>/gi, ' ') + .replace(/<\/td>/gi, ' ') + .replace(/]*>/gi, ' ') + .replace(/<\/th>/gi, ' ') + .replace(/]*>/gi, '') + .replace(/<\/tbody>/gi, '') + .replace(/]*>/gi, '') + .replace(/<\/thead>/gi, '') + .replace(/]*>/gi, '') + .replace(/<\/tfoot>/gi, '') + // Handle other structural elements .replace(//gi, '\n') .replace(/]*>/gi, '\n') - .replace(/<\/div>/gi, '') + .replace(/<\/div>/gi, '\n') .replace(/]*>/gi, '\n') - .replace(/<\/p>/gi, '') + .replace(/<\/p>/gi, '\n') + .replace(/]*>/gi, '\n') + .replace(/<\/h[1-6]>/gi, '\n') + .replace(/]*>/gi, '\n') + .replace(/<\/ul>/gi, '\n') + .replace(/]*>/gi, '\n') + .replace(/<\/ol>/gi, '\n') + .replace(/]*>/gi, '\n• ') + .replace(/<\/li>/gi, '\n') + .replace(/]*>/gi, '\n> ') + .replace(/<\/blockquote>/gi, '\n') + // Handle inline elements + .replace(/]*>/gi, '') + .replace(/<\/span>/gi, '') + .replace(/]*>/gi, '**') + .replace(/<\/strong>/gi, '**') + .replace(/]*>/gi, '**') + .replace(/<\/b>/gi, '**') + .replace(/]*>/gi, '*') + .replace(/<\/em>/gi, '*') + .replace(/]*>/gi, '*') + .replace(/<\/i>/gi, '*') + // Handle HTML entities .replace(/ /g, ' ') .replace(/‌/g, '') .replace(/»/g, '»') @@ -119,6 +158,7 @@ export function cleanHtml(html: string): string { .replace(/</g, '<') .replace(/&/g, '&') .replace(/"/g, '"') + // Clean up whitespace .replace(/^\s+$/gm, '') .replace(/\n{3,}/g, '\n\n') .trim();