Neah/lib/utils/dom-sanitizer.ts
2025-05-01 09:54:17 +02:00

83 lines
2.9 KiB
TypeScript

/**
* DOM Sanitizer
*
* Centralized DOMPurify configuration for consistent HTML sanitization
* throughout the application. This ensures all sanitized content follows
* the same rules for security and presentation.
*/
import DOMPurify from 'isomorphic-dompurify';
// Reset any existing hooks to start with a clean slate
DOMPurify.removeAllHooks();
// Configure DOMPurify with settings appropriate for email content
DOMPurify.setConfig({
// Allow these attributes on all elements
ADD_ATTR: [
'dir', // For text direction
'lang', // For language specification
'style', // For inline styles (carefully sanitized)
'class', 'id', // For CSS targeting
'title', // For tooltips
'target', 'rel', // For links
'colspan', 'rowspan', // For tables
'width', 'height', 'align', 'valign', // Basic layout
'alt', 'src', // For images
'href', // For links
'data-*' // For custom data attributes
],
// Allow these HTML tags
ADD_TAGS: [
'html', 'head', 'body', 'style', 'link', 'meta', 'title',
'table', 'caption', 'col', 'colgroup', 'thead', 'tbody', 'tfoot', 'tr', 'td', 'th',
'div', 'span', 'img', 'br', 'hr', 'section', 'article', 'header', 'footer',
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'blockquote', 'pre', 'code',
'ul', 'ol', 'li', 'dl', 'dt', 'dd', 'a', 'b', 'i', 'u', 'em',
'strong', 'del', 'ins', 'mark', 'small', 'sub', 'sup', 'q', 'abbr'
],
// Explicitly forbid these dangerous tags
FORBID_TAGS: [
'script', 'iframe', 'object', 'embed', 'form',
'input', 'button', 'select', 'textarea'
],
// Explicitly forbid these dangerous attributes
FORBID_ATTR: [
'onerror', 'onload', 'onclick', 'onmouseover', 'onmouseout',
'onkeydown', 'onkeypress', 'onkeyup', 'onchange'
],
// Other configuration options
KEEP_CONTENT: true, // Keep content of removed tags
WHOLE_DOCUMENT: false, // Don't require a full HTML document
ALLOW_DATA_ATTR: true, // Allow data-* attributes
ALLOW_UNKNOWN_PROTOCOLS: true, // Allow protocols like cid: for email images
FORCE_BODY: false // Don't force content to be wrapped in <body>
});
// Export a wrapped sanitizeHtml function that handles email-specific fixes
export function sanitizeHtml(html: string): string {
if (!html) return '';
try {
// Use DOMPurify with our configured settings
const clean = DOMPurify.sanitize(html);
// Fix common email rendering issues
return clean
// Fix for Outlook WebVML content
.replace(/<!--\[if\s+gte\s+mso/g, '<!--[if gte mso')
// Fix for broken image paths that might be relative
.replace(/(src|background)="(?!http|data|https|cid)/gi, '$1="https://');
} catch (e) {
console.error('Error sanitizing HTML:', e);
// Fall back to a basic sanitization approach
return html
.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '')
.replace(/on\w+="[^"]*"/g, '')
.replace(/(javascript|jscript|vbscript|mocha):/gi, 'removed:');
}
}