Coverage for src/mesh/views/components/ckeditor_config.py: 90%
78 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-03 13:52 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-03 13:52 +0000
1"""
2CKEditor and corresponding sanitizer configs using the `CKEditorConfig` class.
4We define a very precise list of allowed tags with corresponding attributes and styles
5for the various HTML inputs.
7This specification is used for both:
8 - Front-end: the config of CKEditors (using `allowedContent` config parameter).
9 The editor will discard everything outside of the specified parameters.
10 - Back-end: the server sanitization of the inputs (using nh3/ammoniac library).
11 It will discard every unallowed tag with their content, along with non-allowed
12 attributes.
13"""
15import re
16from copy import deepcopy
17from dataclasses import dataclass, field
18from typing import Any
20import nh3
21from django.utils.translation import gettext_lazy as _
23ALLOWED_INLINE_TAGS = {
24 "a",
25 "abbr",
26 "b",
27 "bdo",
28 "br",
29 "cite",
30 "code",
31 "del",
32 "dfn",
33 "em",
34 "i",
35 "ins",
36 "kbd",
37 "mark",
38 "meter",
39 "q",
40 "ruby",
41 "s",
42 "samp",
43 "small",
44 "span",
45 "strong",
46 "sub",
47 "sup",
48 "time",
49 "u",
50 "var",
51 "wbr",
52}
54ALLOWED_MEDIA_TAGS = {
55 "audio",
56 "img",
57 "figcaption",
58 "figure",
59 "object",
60 "picture",
61 "svg",
62 "track",
63 "video",
64}
66ALLOWED_BLOCK_TAGS = {
67 "address",
68 "aside",
69 "blockquote",
70 "caption",
71 "col",
72 "colgroup",
73 "dd",
74 "div",
75 "dl",
76 "dt",
77 "footer",
78 "form",
79 "h1",
80 "h2",
81 "h3",
82 "h4",
83 "h5",
84 "h6",
85 "header",
86 "hgroup",
87 "hr",
88 "li",
89 "ol",
90 "p",
91 "pre",
92 "section",
93 "table",
94 "tbody",
95 "td",
96 "th",
97 "thead",
98 "tr",
99 "tfoot",
100 "ul",
101}
103BASE_ALLOWED_TAGS = ALLOWED_INLINE_TAGS.union(ALLOWED_BLOCK_TAGS).union(ALLOWED_MEDIA_TAGS)
105DEFAULT_ALLOWED_TAGS = set(
106 ["b", "i", "sub", "sup", "span", "u", "a", "br"]
107 + ["p", "hr", "ul", "li", "ol", "div"] # inline # block
108)
110DEFAULT_ALLOWED_ATTRIBUTES = {"*": {"id", "class", "style"}, "a": {"href"}}
112DEFAULT_ALLOWED_ATTRIBUTES_VALUES = {
113 "*": {
114 "style": {
115 "text-align: center",
116 "text-align: start",
117 "text-align: end",
118 "text-align: left",
119 "text-align: right",
120 "text-align: justify",
121 }
122 }
123}
125DEFAULT_ALLOWED_STYLES = {"*": {"text-align"}}
127DEFAULT_URL_SCHEMES = {"http", "https", "mailto", "tel"}
129space_char = r"( |\s)"
132def sanitize_html_input(
133 value: str,
134 tags: set[str] | None = None,
135 tag_attributes: dict[str, set[str]] | None = None,
136 tag_attribute_values: dict[str, dict[str, set[str]]] | None = None,
137 url_schemes: set[str] | None = None,
138 trim_spaces=True,
139) -> str:
140 """
141 Sanitize an HTML input using nh3 library. This is an allow-list based cleaner.
142 Cf. https://nh3.readthedocs.io/en/latest/
144 TODO: Clean the content of empty-like tags (tags with nothing but space characters).
145 Ex: `<p> </p>` is often generated by CKEditor 4.
146 """
147 if tags is None: 147 ↛ 148line 147 didn't jump to line 148 because the condition on line 147 was never true
148 tags = BASE_ALLOWED_TAGS
149 else:
150 tags = tags.intersection(BASE_ALLOWED_TAGS)
152 cleaned_value = nh3.clean(
153 value,
154 tags=tags,
155 attributes=tag_attributes,
156 tag_attribute_values=tag_attribute_values,
157 url_schemes=url_schemes,
158 )
160 if trim_spaces: 160 ↛ 163line 160 didn't jump to line 163 because the condition on line 160 was always true
161 cleaned_value = re.sub(re.compile(f"{space_char}*$"), "", cleaned_value)
162 cleaned_value = re.sub(re.compile(f"^{space_char}*"), "", cleaned_value)
163 return cleaned_value
166DEFAULT_JAVASCRIPT_CONFIG = {
167 "editorplaceholder": _("Write your text here..."),
168 "autoGrow_minHeight": 100,
169 "autoGrow_maxHeight": 400,
170 "autoGrow_onStartup": True,
171 "extraPlugins": ["mathjax", "autogrow"],
172 "mathJaxLib": "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.4/latest.js?config=TeX-AMS_HTML",
173 "mathJaxClass": "mathjax-formula",
174 "removeDialogTabs": "table:advanced;link:advanced;link:target",
175 "removePlugins": ["exportpdf"],
176 "toolbar": "Basic",
177 "skin": "moono-lisa",
178 "width": "100%",
179 "toolbar_Basic": [
180 ["Bold", "Italic", "Subscript", "Superscript"],
181 ["JustifyLeft", "JustifyCenter"],
182 ["Link"],
183 ["HorizontalRule", "BulletedList", "NumberedList"],
184 ["SpecialChar", "Mathjax"],
185 ["Undo", "Redo"],
186 ],
187 "versionCheck": False,
188}
191@dataclass
192class CKEditorConfig:
193 """
194 Dataclass wrapper for a CKEditor config.
195 """
197 # Id of the CKEditor
198 id: str
199 # Traditional config of the CKEditor
200 base_config: dict = field(default_factory=DEFAULT_JAVASCRIPT_CONFIG.copy)
201 # Allowed HTML tags used by both the CKEditor and the sanitizer
202 allowed_tags: set = field(default_factory=DEFAULT_ALLOWED_TAGS.copy)
203 # Allowed attributes per HTML tag used by both the CKEditor and the sanitizer
204 allowed_attributes: dict[str, set[str]] = field(
205 default_factory=DEFAULT_ALLOWED_ATTRIBUTES.copy
206 )
207 # Allowed values per attributes per HTML tag. Only used by the sanitizer, CKEditor
208 # ACR does not offer this feature.
209 # This should contain the same `style` entry as the `allowed_styles` for coherency
210 # between the CKEditor and the sanitizer.
211 allowed_attributes_values: dict[str, dict] = field(
212 default_factory=DEFAULT_ALLOWED_ATTRIBUTES_VALUES.copy
213 )
214 # Allowed CSS styles per HTML tag.
215 # Only for the CKEditor that handles style attribute separately.
216 allowed_styles: dict[str, set[str]] = field(default_factory=DEFAULT_ALLOWED_STYLES.copy)
217 # Allowed URL schemes for `href` and `src` attributes
218 allowed_url_schemes: set[str] = field(default_factory=DEFAULT_URL_SCHEMES.copy)
220 def allowed_content(self) -> str:
221 """
222 Generate the CKEditor 4 allowed content property from our custom rules.
223 Syntax: cf. https://ckeditor.com/docs/ckeditor4/latest/guide/dev_allowed_content_rules.html#string-format
224 `tag [attrs]{styles}(classes)`
226 Params:
227 - `tags` The iterable of tag strings.
228 - `attrs` The dict of allowed HTML attributes per tag name,
229 using nh3 syntax.
230 - `styles` The dict of allowed CSS styles, using the same syntax as `attrs`.
231 """
232 attrs = self.allowed_attributes
233 styles = self.allowed_styles
235 base_attrs = list(attrs.get("*", set()))
236 base_styles = list(styles.get("*", set()))
238 tag_values: list[str] = []
239 for tag in sorted(self.allowed_tags):
240 tag_value = f"{tag}"
242 tag_attrs = [*base_attrs]
243 if tag in attrs:
244 tag_attrs.extend(list(attrs[tag]))
245 if tag_attrs: 245 ↛ 249line 245 didn't jump to line 249 because the condition on line 245 was always true
246 tag_attrs.sort()
247 tag_value += "[" + ",".join(tag_attrs) + "]"
248 else:
249 tag_value += "[*]"
251 tag_styles = [*base_styles]
252 if tag in styles: 252 ↛ 253line 252 didn't jump to line 253 because the condition on line 252 was never true
253 tag_styles.extend(list(attrs[tag]))
254 if tag_styles: 254 ↛ 258line 254 didn't jump to line 258 because the condition on line 254 was always true
255 tag_styles.sort()
256 tag_value += "{" + ",".join(tag_styles) + "}"
257 else:
258 tag_value += "{*}"
260 tag_value += "(*)"
261 tag_values.append(tag_value)
263 return ";".join(tag_values)
265 def javascript_config(self) -> dict[str, Any]:
266 """
267 The javascript config of a CKEditor.
268 """
269 config = deepcopy(self.base_config)
270 config["allowedContent"] = self.allowed_content()
271 return config
273 def sanitize_value(self, value: str) -> str:
274 """
275 Sanitize the given value according to the editor config.
276 """
277 return sanitize_html_input(
278 value,
279 tags=self.allowed_tags,
280 tag_attributes=self.allowed_attributes,
281 tag_attribute_values=self.allowed_attributes_values,
282 url_schemes=self.allowed_url_schemes,
283 trim_spaces=True,
284 )
287#### [BEGIN] Default editor ####
288DEFAULT_CKEDITOR_CONFIG = CKEditorConfig(id="mesh_default")
289#### [END] Default editor ####
292#### [BEGIN] Submission name editor ####
293# Allowed tags for the submission name.
294# CKEditor is configured to not create <p> tags on enter but add <br /> tags instead.
295SUBMISSION_NAME_TAGS = {"b", "i", "sub", "sup", "span", "u"}
296SUBMISSION_NAME_CKEDITOR_CONFIG = CKEditorConfig(
297 id="submission_name",
298 base_config=deepcopy(DEFAULT_JAVASCRIPT_CONFIG),
299 allowed_tags=deepcopy(SUBMISSION_NAME_TAGS),
300)
301SUBMISSION_NAME_CKEDITOR_CONFIG.base_config.update(
302 {
303 "editorplaceholder": _("Write your title here..."),
304 "autoGrow_minHeight": 70,
305 "autoGrow_maxHeight": 250,
306 "enterMode": 2, # Insert a <br /> tag on Enter instead of creating a new <p> element.
307 "toolbar_Basic": [
308 ["Bold", "Italic", "Subscript", "Superscript"],
309 ["SpecialChar", "Mathjax"],
310 ["Undo", "Redo"],
311 ],
312 }
313)
314#### [END] Submission name editor ####
317#### [BEGIN] Submission abstract editor ####
318# Allowed tags for the submission description.
319SUBMISSION_ABSTRACT_TAGS = set(
320 ["b", "i", "sub", "sup", "span", "u", "a", "br"] + ["p", "hr", "ul", "ol", "li", "div"]
321)
322SUBMISSION_ABSTRACT_CKEDITOR_CONFIG = CKEditorConfig(
323 id="submission_abstract",
324 base_config=deepcopy(DEFAULT_JAVASCRIPT_CONFIG),
325 allowed_tags=deepcopy(SUBMISSION_ABSTRACT_TAGS),
326)
327SUBMISSION_ABSTRACT_CKEDITOR_CONFIG.base_config.update(
328 {
329 "editorplaceholder": _("Write your description here..."),
330 }
331)
332#### [END] Submission abstract editor ####
335#### [BEGIN] E-mail editor ####
336EMAIL_TAGS = set(
337 ["b", "i", "sub", "sup", "span", "u", "a", "br"]
338 + ["p", "div", "hr", "ol", "li", "div", "blockquote"]
339)
340EMAIL_CKEDITOR_CONFIG = CKEditorConfig(
341 id="email",
342 base_config=deepcopy(DEFAULT_JAVASCRIPT_CONFIG),
343 allowed_tags=deepcopy(EMAIL_TAGS),
344)
345EMAIL_CKEDITOR_CONFIG.base_config.update(
346 {
347 "enterMode": 2, # Insert a <br /> tag on Enter instead of creating a new <p> element.
348 }
349)
350#### [END] E-mail editor ####
353ALL_EDITOR_CONFIGS = [
354 DEFAULT_CKEDITOR_CONFIG,
355 SUBMISSION_NAME_CKEDITOR_CONFIG,
356 SUBMISSION_ABSTRACT_CKEDITOR_CONFIG,
357 EMAIL_CKEDITOR_CONFIG,
358]
360MESH_CKEDITOR_CONFIGS = {config.id: config.javascript_config() for config in ALL_EDITOR_CONFIGS}