Source code for feincms3.cleanse

"""
HTML cleansing is by no means only useful for user generated content.
Managers also copy-paste content from word processing programs, the rich
text editor's output isn't always (almost never) in the shape we want it
to be, and a strict allowlist based HTML sanitizer is the best answer
I have.
"""

import warnings

from ckeditor.fields import RichTextField
from html_sanitizer.django import get_sanitizer


__all__ = ("CleansedRichTextField", "cleanse_html")

warnings.warn(
    "The feincms3.cleanse module has been deprecated and will be removed"
    " in the close future. Sorry for the inconvenience.",
    DeprecationWarning,
    stacklevel=2,
)


[docs] def cleanse_html(html): """ Pass ugly HTML, get nice HTML back. """ return get_sanitizer().sanitize(html)
[docs] class CleansedRichTextField(RichTextField): """ This is a subclass of `django-ckeditor <https://github.com/django-ckeditor/django-ckeditor>`_'s ``RichTextField``. The recommended configuration is as follows:: CKEDITOR_CONFIGS = { "default": { "toolbar": "Custom", "format_tags": "h1;h2;h3;p;pre", "toolbar_Custom": [[ "Format", "RemoveFormat", "-", "Bold", "Italic", "Subscript", "Superscript", "-", "NumberedList", "BulletedList", "-", "Anchor", "Link", "Unlink", "-", "HorizontalRule", "SpecialChar", "-", "Source", ]], }, } # Settings for feincms3.plugins.richtext.RichText CKEDITOR_CONFIGS["richtext-plugin"] = CKEDITOR_CONFIGS["default"] The corresponding ``HTML_SANITIZERS`` configuration for `html-sanitizer <https://pypi.org/project/html-sanitizer>`_ would look as follows:: HTML_SANITIZERS = { "default": { "tags": { "a", "h1", "h2", "h3", "strong", "em", "p", "ul", "ol", "li", "br", "sub", "sup", "hr", }, "attributes": { "a": ("href", "name", "target", "title", "id", "rel"), }, "empty": {"hr", "a", "br"}, "separate": {"a", "p", "li"}, # Additional default settings not listed here. }, } At the time of writing those are the defaults of html-sanitizer, so you don't have to do anything. If you want or require a different cleansing function, simply override the default with ``CleansedRichTextField(cleanse=your_function)``. The cleansing function receives the HTML as its first and only argument and returns the cleansed HTML. """ def __init__(self, *args, **kwargs): self.cleanse = kwargs.pop("cleanse", cleanse_html) super().__init__(*args, **kwargs)
[docs] def clean(self, value, instance): return self.cleanse(super().clean(value, instance))