Coverage for src/mesh/views/components/ckeditor_config.py: 90%

78 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-06-03 13:52 +0000

1""" 

2CKEditor and corresponding sanitizer configs using the `CKEditorConfig` class. 

3 

4We define a very precise list of allowed tags with corresponding attributes and styles 

5for the various HTML inputs. 

6 

7This specification is used for both: 

8 - Front-end: the config of CKEditors (using `allowedContent` config parameter). 

9 The editor will discard everything outside of the specified parameters. 

10 - Back-end: the server sanitization of the inputs (using nh3/ammoniac library). 

11 It will discard every unallowed tag with their content, along with non-allowed 

12 attributes. 

13""" 

14 

15import re 

16from copy import deepcopy 

17from dataclasses import dataclass, field 

18from typing import Any 

19 

20import nh3 

21from django.utils.translation import gettext_lazy as _ 

22 

23ALLOWED_INLINE_TAGS = { 

24 "a", 

25 "abbr", 

26 "b", 

27 "bdo", 

28 "br", 

29 "cite", 

30 "code", 

31 "del", 

32 "dfn", 

33 "em", 

34 "i", 

35 "ins", 

36 "kbd", 

37 "mark", 

38 "meter", 

39 "q", 

40 "ruby", 

41 "s", 

42 "samp", 

43 "small", 

44 "span", 

45 "strong", 

46 "sub", 

47 "sup", 

48 "time", 

49 "u", 

50 "var", 

51 "wbr", 

52} 

53 

54ALLOWED_MEDIA_TAGS = { 

55 "audio", 

56 "img", 

57 "figcaption", 

58 "figure", 

59 "object", 

60 "picture", 

61 "svg", 

62 "track", 

63 "video", 

64} 

65 

66ALLOWED_BLOCK_TAGS = { 

67 "address", 

68 "aside", 

69 "blockquote", 

70 "caption", 

71 "col", 

72 "colgroup", 

73 "dd", 

74 "div", 

75 "dl", 

76 "dt", 

77 "footer", 

78 "form", 

79 "h1", 

80 "h2", 

81 "h3", 

82 "h4", 

83 "h5", 

84 "h6", 

85 "header", 

86 "hgroup", 

87 "hr", 

88 "li", 

89 "ol", 

90 "p", 

91 "pre", 

92 "section", 

93 "table", 

94 "tbody", 

95 "td", 

96 "th", 

97 "thead", 

98 "tr", 

99 "tfoot", 

100 "ul", 

101} 

102 

103BASE_ALLOWED_TAGS = ALLOWED_INLINE_TAGS.union(ALLOWED_BLOCK_TAGS).union(ALLOWED_MEDIA_TAGS) 

104 

105DEFAULT_ALLOWED_TAGS = set( 

106 ["b", "i", "sub", "sup", "span", "u", "a", "br"] 

107 + ["p", "hr", "ul", "li", "ol", "div"] # inline # block 

108) 

109 

110DEFAULT_ALLOWED_ATTRIBUTES = {"*": {"id", "class", "style"}, "a": {"href"}} 

111 

112DEFAULT_ALLOWED_ATTRIBUTES_VALUES = { 

113 "*": { 

114 "style": { 

115 "text-align: center", 

116 "text-align: start", 

117 "text-align: end", 

118 "text-align: left", 

119 "text-align: right", 

120 "text-align: justify", 

121 } 

122 } 

123} 

124 

125DEFAULT_ALLOWED_STYLES = {"*": {"text-align"}} 

126 

127DEFAULT_URL_SCHEMES = {"http", "https", "mailto", "tel"} 

128 

129space_char = r"( |\s)" 

130 

131 

132def sanitize_html_input( 

133 value: str, 

134 tags: set[str] | None = None, 

135 tag_attributes: dict[str, set[str]] | None = None, 

136 tag_attribute_values: dict[str, dict[str, set[str]]] | None = None, 

137 url_schemes: set[str] | None = None, 

138 trim_spaces=True, 

139) -> str: 

140 """ 

141 Sanitize an HTML input using nh3 library. This is an allow-list based cleaner. 

142 Cf. https://nh3.readthedocs.io/en/latest/ 

143 

144 TODO: Clean the content of empty-like tags (tags with nothing but space characters). 

145 Ex: `<p>&nbsp;</p>` is often generated by CKEditor 4. 

146 """ 

147 if tags is None: 147 ↛ 148line 147 didn't jump to line 148 because the condition on line 147 was never true

148 tags = BASE_ALLOWED_TAGS 

149 else: 

150 tags = tags.intersection(BASE_ALLOWED_TAGS) 

151 

152 cleaned_value = nh3.clean( 

153 value, 

154 tags=tags, 

155 attributes=tag_attributes, 

156 tag_attribute_values=tag_attribute_values, 

157 url_schemes=url_schemes, 

158 ) 

159 

160 if trim_spaces: 160 ↛ 163line 160 didn't jump to line 163 because the condition on line 160 was always true

161 cleaned_value = re.sub(re.compile(f"{space_char}*$"), "", cleaned_value) 

162 cleaned_value = re.sub(re.compile(f"^{space_char}*"), "", cleaned_value) 

163 return cleaned_value 

164 

165 

166DEFAULT_JAVASCRIPT_CONFIG = { 

167 "editorplaceholder": _("Write your text here..."), 

168 "autoGrow_minHeight": 100, 

169 "autoGrow_maxHeight": 400, 

170 "autoGrow_onStartup": True, 

171 "extraPlugins": ["mathjax", "autogrow"], 

172 "mathJaxLib": "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.4/latest.js?config=TeX-AMS_HTML", 

173 "mathJaxClass": "mathjax-formula", 

174 "removeDialogTabs": "table:advanced;link:advanced;link:target", 

175 "removePlugins": ["exportpdf"], 

176 "toolbar": "Basic", 

177 "skin": "moono-lisa", 

178 "width": "100%", 

179 "toolbar_Basic": [ 

180 ["Bold", "Italic", "Subscript", "Superscript"], 

181 ["JustifyLeft", "JustifyCenter"], 

182 ["Link"], 

183 ["HorizontalRule", "BulletedList", "NumberedList"], 

184 ["SpecialChar", "Mathjax"], 

185 ["Undo", "Redo"], 

186 ], 

187 "versionCheck": False, 

188} 

189 

190 

191@dataclass 

192class CKEditorConfig: 

193 """ 

194 Dataclass wrapper for a CKEditor config. 

195 """ 

196 

197 # Id of the CKEditor 

198 id: str 

199 # Traditional config of the CKEditor 

200 base_config: dict = field(default_factory=DEFAULT_JAVASCRIPT_CONFIG.copy) 

201 # Allowed HTML tags used by both the CKEditor and the sanitizer 

202 allowed_tags: set = field(default_factory=DEFAULT_ALLOWED_TAGS.copy) 

203 # Allowed attributes per HTML tag used by both the CKEditor and the sanitizer 

204 allowed_attributes: dict[str, set[str]] = field( 

205 default_factory=DEFAULT_ALLOWED_ATTRIBUTES.copy 

206 ) 

207 # Allowed values per attributes per HTML tag. Only used by the sanitizer, CKEditor 

208 # ACR does not offer this feature. 

209 # This should contain the same `style` entry as the `allowed_styles` for coherency 

210 # between the CKEditor and the sanitizer. 

211 allowed_attributes_values: dict[str, dict] = field( 

212 default_factory=DEFAULT_ALLOWED_ATTRIBUTES_VALUES.copy 

213 ) 

214 # Allowed CSS styles per HTML tag. 

215 # Only for the CKEditor that handles style attribute separately. 

216 allowed_styles: dict[str, set[str]] = field(default_factory=DEFAULT_ALLOWED_STYLES.copy) 

217 # Allowed URL schemes for `href` and `src` attributes 

218 allowed_url_schemes: set[str] = field(default_factory=DEFAULT_URL_SCHEMES.copy) 

219 

220 def allowed_content(self) -> str: 

221 """ 

222 Generate the CKEditor 4 allowed content property from our custom rules. 

223 Syntax: cf. https://ckeditor.com/docs/ckeditor4/latest/guide/dev_allowed_content_rules.html#string-format 

224 `tag [attrs]{styles}(classes)` 

225 

226 Params: 

227 - `tags` The iterable of tag strings. 

228 - `attrs` The dict of allowed HTML attributes per tag name, 

229 using nh3 syntax. 

230 - `styles` The dict of allowed CSS styles, using the same syntax as `attrs`. 

231 """ 

232 attrs = self.allowed_attributes 

233 styles = self.allowed_styles 

234 

235 base_attrs = list(attrs.get("*", set())) 

236 base_styles = list(styles.get("*", set())) 

237 

238 tag_values: list[str] = [] 

239 for tag in sorted(self.allowed_tags): 

240 tag_value = f"{tag}" 

241 

242 tag_attrs = [*base_attrs] 

243 if tag in attrs: 

244 tag_attrs.extend(list(attrs[tag])) 

245 if tag_attrs: 245 ↛ 249line 245 didn't jump to line 249 because the condition on line 245 was always true

246 tag_attrs.sort() 

247 tag_value += "[" + ",".join(tag_attrs) + "]" 

248 else: 

249 tag_value += "[*]" 

250 

251 tag_styles = [*base_styles] 

252 if tag in styles: 252 ↛ 253line 252 didn't jump to line 253 because the condition on line 252 was never true

253 tag_styles.extend(list(attrs[tag])) 

254 if tag_styles: 254 ↛ 258line 254 didn't jump to line 258 because the condition on line 254 was always true

255 tag_styles.sort() 

256 tag_value += "{" + ",".join(tag_styles) + "}" 

257 else: 

258 tag_value += "{*}" 

259 

260 tag_value += "(*)" 

261 tag_values.append(tag_value) 

262 

263 return ";".join(tag_values) 

264 

265 def javascript_config(self) -> dict[str, Any]: 

266 """ 

267 The javascript config of a CKEditor. 

268 """ 

269 config = deepcopy(self.base_config) 

270 config["allowedContent"] = self.allowed_content() 

271 return config 

272 

273 def sanitize_value(self, value: str) -> str: 

274 """ 

275 Sanitize the given value according to the editor config. 

276 """ 

277 return sanitize_html_input( 

278 value, 

279 tags=self.allowed_tags, 

280 tag_attributes=self.allowed_attributes, 

281 tag_attribute_values=self.allowed_attributes_values, 

282 url_schemes=self.allowed_url_schemes, 

283 trim_spaces=True, 

284 ) 

285 

286 

287#### [BEGIN] Default editor #### 

288DEFAULT_CKEDITOR_CONFIG = CKEditorConfig(id="mesh_default") 

289#### [END] Default editor #### 

290 

291 

292#### [BEGIN] Submission name editor #### 

293# Allowed tags for the submission name. 

294# CKEditor is configured to not create <p> tags on enter but add <br /> tags instead. 

295SUBMISSION_NAME_TAGS = {"b", "i", "sub", "sup", "span", "u"} 

296SUBMISSION_NAME_CKEDITOR_CONFIG = CKEditorConfig( 

297 id="submission_name", 

298 base_config=deepcopy(DEFAULT_JAVASCRIPT_CONFIG), 

299 allowed_tags=deepcopy(SUBMISSION_NAME_TAGS), 

300) 

301SUBMISSION_NAME_CKEDITOR_CONFIG.base_config.update( 

302 { 

303 "editorplaceholder": _("Write your title here..."), 

304 "autoGrow_minHeight": 70, 

305 "autoGrow_maxHeight": 250, 

306 "enterMode": 2, # Insert a <br /> tag on Enter instead of creating a new <p> element. 

307 "toolbar_Basic": [ 

308 ["Bold", "Italic", "Subscript", "Superscript"], 

309 ["SpecialChar", "Mathjax"], 

310 ["Undo", "Redo"], 

311 ], 

312 } 

313) 

314#### [END] Submission name editor #### 

315 

316 

317#### [BEGIN] Submission abstract editor #### 

318# Allowed tags for the submission description. 

319SUBMISSION_ABSTRACT_TAGS = set( 

320 ["b", "i", "sub", "sup", "span", "u", "a", "br"] + ["p", "hr", "ul", "ol", "li", "div"] 

321) 

322SUBMISSION_ABSTRACT_CKEDITOR_CONFIG = CKEditorConfig( 

323 id="submission_abstract", 

324 base_config=deepcopy(DEFAULT_JAVASCRIPT_CONFIG), 

325 allowed_tags=deepcopy(SUBMISSION_ABSTRACT_TAGS), 

326) 

327SUBMISSION_ABSTRACT_CKEDITOR_CONFIG.base_config.update( 

328 { 

329 "editorplaceholder": _("Write your description here..."), 

330 } 

331) 

332#### [END] Submission abstract editor #### 

333 

334 

335#### [BEGIN] E-mail editor #### 

336EMAIL_TAGS = set( 

337 ["b", "i", "sub", "sup", "span", "u", "a", "br"] 

338 + ["p", "div", "hr", "ol", "li", "div", "blockquote"] 

339) 

340EMAIL_CKEDITOR_CONFIG = CKEditorConfig( 

341 id="email", 

342 base_config=deepcopy(DEFAULT_JAVASCRIPT_CONFIG), 

343 allowed_tags=deepcopy(EMAIL_TAGS), 

344) 

345EMAIL_CKEDITOR_CONFIG.base_config.update( 

346 { 

347 "enterMode": 2, # Insert a <br /> tag on Enter instead of creating a new <p> element. 

348 } 

349) 

350#### [END] E-mail editor #### 

351 

352 

353ALL_EDITOR_CONFIGS = [ 

354 DEFAULT_CKEDITOR_CONFIG, 

355 SUBMISSION_NAME_CKEDITOR_CONFIG, 

356 SUBMISSION_ABSTRACT_CKEDITOR_CONFIG, 

357 EMAIL_CKEDITOR_CONFIG, 

358] 

359 

360MESH_CKEDITOR_CONFIGS = {config.id: config.javascript_config() for config in ALL_EDITOR_CONFIGS}