Project

General

Profile

« Previous | Next » 

Revision e2d26bad

Added by koszko almost 2 years ago

Fix sanitizing of non-HTML XMLDocument's

View differences:

build.sh
201 201

  
202 202
    if [ "$BROWSER" = "chromium" ]; then
203 203
	CHROMIUM_KEY="$(dd if=/dev/urandom bs=32 count=1 2>/dev/null | base64)"
204
	echo "chromium key is" $CHROMIUM_KEY
205
	CHROMIUM_KEY="chromium-key-dummy-file-$CHROMIUM_KEY"
206
	CHROMIUM_KEY=$(echo $CHROMIUM_KEY | tr / -);
204
	CHROMIUM_KEY=$(echo chromium-key-dummy-file-$CHROMIUM_KEY | tr / -)
207 205
	touch $BUILDDIR/$CHROMIUM_KEY
208 206

  
209 207
	CHROMIUM_KEY="\n\
common/misc.js
36 36
    return returnValue;
37 37
}
38 38

  
39
function gen_nonce(length) // Default 16
39
function gen_nonce(length=16)
40 40
{
41
    let randomData = new Uint8Array(length || 16);
41
    let randomData = new Uint8Array(length);
42 42
    crypto.getRandomValues(randomData);
43 43
    return Uint8toHex(randomData);
44 44
}
content/activity_info_server.js
44 44
    report_activity("settings", settings);
45 45
}
46 46

  
47
function report_content_type(content_type)
47
function report_document_type(is_html)
48 48
{
49
    report_activity("content_type", content_type);
49
    report_activity("is_html", is_html);
50 50
}
51 51

  
52 52
function report_repo_query_action(update, port)
......
96 96
 * EXPORT start_activity_info_server
97 97
 * EXPORT report_script
98 98
 * EXPORT report_settings
99
 * EXPORT report_content_type
99
 * EXPORT report_document_type
100 100
 * EXPORTS_END
101 101
 */
content/main.js
22 22
 * IMPORTS_END
23 23
 */
24 24

  
25
document.content_loaded = document.readyState === "complete";
26
const wait_loaded = e => e.content_loaded ? Promise.resolve() :
27
      new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true}));
28

  
29
wait_loaded(document).then(() => document.content_loaded = true);
30

  
25 31
function extract_cookie_policy(cookie, min_time)
26 32
{
27 33
    let best_result = {time: -1};
......
86 92
}
87 93

  
88 94
/*
95
 * In the case of HTML documents:
89 96
 * 1. When injecting some payload we need to sanitize <meta> CSP tags before
90 97
 *    they reach the document.
91 98
 * 2. Only <meta> tags inside <head> are considered valid by the browser and
92 99
 *    need to be considered.
93 100
 * 3. We want to detach <html> from document, wait until its <head> completes
94 101
 *    loading, sanitize it and re-attach <html>.
95
 * 4. Browsers are eager to add <meta>'s that appear after `</head>' but before
96
 *    `<body>'. Due to this behavior the `DOMContentLoaded' event is considered
97
 *    unreliable (although it could still work properly, it is just problematic
98
 *    to verify).
99
 * 5. We shall wait for anything to appear in or after <body> and take that as
100
 *    a sign <head> has _really_ finished loading.
102
 * 4. We shall wait for anything to appear in or after <body> and take that as
103
 *    a sign <head> has finished loading.
104
 * 5. Otherwise, getting the `DOMContentLoaded' event on the document shall also
105
 *    be a sign that <head> is fully loaded.
101 106
 */
102 107

  
103 108
function make_body_start_observer(DOM_element, waiting)
......
123 128

  
124 129
function finish_waiting(waiting)
125 130
{
131
    if (waiting.finished)
132
	return;
133
    waiting.finished = true;
126 134
    waiting.observers.forEach(observer => observer.disconnect());
127
    waiting.doc.removeEventListener("DOMContentLoaded", waiting.loaded_cb);
128 135
    setTimeout(waiting.callback, 0);
129 136
}
130 137

  
......
132 139
{
133 140
    const waiting = {doc, detached_html, callback, observers: []};
134 141

  
135
    /*
136
     * For XML and SVG documents, instead of waiting for `<head>', we wait
137
     * for the entire document to finish loading.
138
     */
139
    if (doc instanceof HTMLDocument) {
140
	if (try_body_started(waiting))
141
	    return;
142
    if (try_body_started(waiting))
143
	return;
142 144

  
143
	waiting.observers = [make_body_start_observer(detached_html, waiting)];
144
    }
145
    waiting.observers = [make_body_start_observer(detached_html, waiting)];
145 146

  
146
    waiting.loaded_cb = () => finish_waiting(waiting);
147
    doc.addEventListener("DOMContentLoaded", waiting.loaded_cb);
147
    wait_loaded(doc).then(() => finish_waiting(waiting));
148 148
}
149 149

  
150 150
function wait_for_head(doc, detached_html)
......
154 154

  
155 155
const blocked_str = "blocked";
156 156

  
157
function block_attribute(node, attr)
157
function block_attribute(node, attr, ns=null)
158 158
{
159
    const [hasa, geta, seta, rema] = ["has", "get", "set", "remove"]
160
	  .map(m => (n, ...args) => typeof ns === "string" ?
161
	       n[`${m}AttributeNS`](ns, ...args) : n[`${m}Attribute`](...args));
159 162
    /*
160
     * Disabling attributes this way allows them to still be relatively
161
     * easily accessed in case they contain some useful data.
163
     * Disabling attributes by prepending `-blocked' allows them to still be
164
     * relatively easily accessed in case they contain some useful data.
162 165
     */
163 166
    const construct_name = [attr];
164
    while (node.hasAttribute(construct_name.join("")))
167
    while (hasa(node, construct_name.join("")))
165 168
	construct_name.unshift(blocked_str);
166 169

  
167 170
    while (construct_name.length > 1) {
168 171
	construct_name.shift();
169 172
	const name = construct_name.join("");
170
	node.setAttribute(`${blocked_str}-${name}`, node.getAttribute(name));
173
	seta(node, `${blocked_str}-${name}`, geta(node, name));
171 174
    }
172

  
173
    node.removeAttribute(attr);
174 175
}
175 176

  
176 177
function sanitize_meta(meta, policy)
177 178
{
178
    const http_equiv = meta.getAttribute("http-equiv");
179
    const value = meta.content;
179
    const value = meta.content || "";
180 180

  
181
    if (!value || !is_csp_header_name(http_equiv, true))
181
    if (!value || !is_csp_header_name(meta.httpEquiv || "", true))
182 182
	return;
183 183

  
184 184
    block_attribute(meta, "content");
185

  
186
    if (is_csp_header_name(http_equiv, false))
187
	meta.content = sanitize_csp_header({value}, policy).value;
188 185
}
189 186

  
187
/*
188
 * Used to disable <script> that has not yet been added to live DOM (doesn't
189
 * work for those already added).
190
 */
190 191
function sanitize_script(script)
191 192
{
192
    script.hachette_blocked_type = script.type;
193
    script.hachette_blocked_type = script.getAttribute("type");
193 194
    script.type = "text/plain";
194 195
}
195 196

  
......
201 202
{
202 203
    script.setAttribute("type", script.hachette_blocked_type);
203 204

  
204
    if (script.hachette_blocked_type === undefined)
205
    if (script.hachette_blocked_type === null)
205 206
	script.removeAttribute("type");
206 207

  
207 208
    delete script.hachette_blocked_type;
208 209
}
209 210

  
210
function apply_hachette_csp_rules(doc, head, policy)
211
{
212
    const meta = doc.createElement("meta");
213
    meta.setAttribute("http-equiv", "Content-Security-Policy");
214
    meta.setAttribute("content", csp_rule(policy.nonce));
215
    head.append(meta);
216
    /* CSP is already in effect, we can remove the <meta> now. */
217
    meta.remove();
218
}
219

  
211
const bad_url_reg = /^data:([^,;]*ml|unknown-content-type)/i;
220 212
function sanitize_urls(element)
221 213
{
222
    for (const attribute of [...element.attributes]) {
223
	if (/^(href|src|data)$/i.test(attribute.localName) &&
224
	    /^data:([^,;]*ml|unknown-content-type)/i.test(attribute.value))
225
	    block_attribute(element, attribute.localName);
226
    }
214
    for (const attr of [...element.attributes || []]
215
	       .filter(attr => /^(href|src|data)$/i.test(attr.localName))
216
	       .filter(attr => bad_url_reg.test(attr.value)))
217
	block_attribute(element, attr.localName, attr.namespaceURI);
227 218
}
228 219

  
229 220
function start_data_urls_sanitizing(doc)
230 221
{
231 222
    doc.querySelectorAll("*[href], *[src], *[data]").forEach(sanitize_urls);
232
    const mutation_handler = m => m.addedNodes.forEach(sanitize_urls);
233
    const mo = new MutationObserver(ms => ms.forEach(mutation_handler));
234
    mo.observe(doc, {childList: true, subtree: true});
223
    if (!doc.content_loaded) {
224
	const mutation_handler = m => m.addedNodes.forEach(sanitize_urls);
225
	const mo = new MutationObserver(ms => ms.forEach(mutation_handler));
226
	mo.observe(doc, {childList: true, subtree: true});
227
	wait_loaded(doc).then(() => mo.disconnect());
228
    }
235 229
}
236 230

  
237
function apply_intrinsics_sanitizing(root_element)
231
/*
232
 * Normally, we block scripts with CSP. However, Mozilla does optimizations that
233
 * cause part of the DOM to be loaded when our content scripts get to run. Thus,
234
 * before the CSP rules we inject (for non-HTTP pages) become effective, we need
235
 * to somehow block the execution of `<script>'s and intrinsics that were
236
 * already there.
237
 */
238
function mozilla_initial_block(doc)
238 239
{
239
    for (const subelem of root_element.querySelectorAll("*")) {
240
	[...subelem.attributes]
241
	    .filter(a => /^on/i.test(a.localName))
242
	    .filter(a => /^javascript:/i.test(a.value))
243
	    .forEach(a => block_attribute(subelem, a.localName));
244
    }
240
    const blocker = e => e.preventDefault();
241
    doc.addEventListener("beforescriptexecute", blocker);
242
    setTimeout(() => doc.removeEventListener("beforescriptexecute", blocker));
243

  
244
    [...doc.all].flatMap(ele => [...ele.attributes].map(attr => [ele, attr]))
245
	.map(([ele, attr]) => [ele, attr.localName])
246
	.filter(([ele, attr]) => /^on/.test(attr) && ele.wrappedJSObject[attr])
247
	.forEach(([ele, attr]) => ele.wrappedJSObject[attr] = null);
245 248
}
246 249

  
250
/*
251
 * Here we block all scripts of a document which might be either and
252
 * HTMLDocument or an XMLDocument. Modifying an XML document might disrupt
253
 * Mozilla's XML preview. This is an unfortunate thing we have to accept for
254
 * now. XML documents *have to* be sanitized as well because they might
255
 * contain `<script>' tags (or on* attributes) with namespace declared as
256
 * "http://www.w3.org/1999/xhtml" or "http://www.w3.org/2000/svg" which allows
257
 * javascript execution.
258
 */
247 259
async function sanitize_document(doc, policy)
248 260
{
249 261
    /*
250 262
     * Blocking of scripts that are in the DOM from the beginning. Needed for
251
     * Mozilla, harmless on Chromium.
252
     * Note that at least in SVG documents the `src' attr on `<script>'s seems
253
     * to be ignored by Firefox, so we don't need to sanitize it.
263
     * Mozilla.
254 264
     */
255
    for (const script of document.getElementsByTagName("script")) {
256
	const old_children = [...script.childNodes];
257
	script.innerHTML = "";
258
	setTimeout(() => old_children.forEach(c => script.append(c)), 0);
259
    }
265
    if (is_mozilla)
266
	mozilla_initial_block(doc);
260 267

  
261 268
    /*
262 269
     * Ensure our CSP rules are employed from the beginning. This CSP injection
263 270
     * method is, when possible, going to be applied together with CSP rules
264 271
     * injected using webRequest.
265
     * For non-HTML documents this is just a dummy operation of adding and
266
     * removing `head'.
272
     * Using elements namespaced as HTML makes this CSP injection also work for
273
     * non-HTML documents.
267 274
     */
268
    let added_head = doc.createElement("head");
269
    if (!doc.head)
270
	doc.documentElement.prepend(added_head);
271

  
272
    apply_hachette_csp_rules(doc, added_head, policy);
273

  
274
    /* Proceed with DOM in its initial state. */
275
    added_head.remove();
275
    const html = new DOMParser().parseFromString(`<html><head><meta \
276
http-equiv="Content-Security-Policy" content="${csp_rule(policy.nonce)}"\
277
/></head><body>Loading...</body></html>`, "text/html").documentElement;
276 278

  
277 279
    /*
278
     * <html> node gets hijacked now, to be re-attached after <head> is loaded
280
     * Root node gets hijacked now, to be re-attached after <head> is loaded
279 281
     * and sanitized.
280 282
     */
281
    const old_html = doc.documentElement;
282
    const new_html = doc.createElement("html");
283
    old_html.replaceWith(new_html);
283
    const root = doc.documentElement;
284
    root.replaceWith(html);
284 285

  
285
    await wait_for_head(doc, old_html);
286

  
287
    for (const meta of old_html.querySelectorAll("head meta"))
288
	sanitize_meta(meta, policy);
289

  
290
    for (const script of old_html.querySelectorAll("script"))
291
	sanitize_script(script, policy);
292

  
293
    if (!(doc instanceof HTMLDocument))
294
	apply_intrinsics_sanitizing(old_html);
286
    /*
287
     * For XML documents, we don't intend to inject payload, so we neither block
288
     * document's CSP `<meta>' tags nor wait for `<head>' to be parsed.
289
     */
290
    if (document instanceof HTMLDocument) {
291
	await wait_for_head(doc, root);
295 292

  
296
    new_html.replaceWith(old_html);
293
	root.querySelectorAll("head meta")
294
	    .forEach(m => sanitize_meta(m, policy));
295
    }
297 296

  
298
    for (const script of old_html.querySelectorAll("script"))
299
	desanitize_script(script, policy);
297
    root.querySelectorAll("script").forEach(s => sanitize_script(s, policy));
298
    html.replaceWith(root);
299
    root.querySelectorAll("script").forEach(s => desanitize_script(s, policy));
300 300

  
301 301
    start_data_urls_sanitizing(doc);
302 302
}
......
329 329
    }
330 330

  
331 331
    if (!policy) {
332
	console.warn("Using fallback policy!");
332
	console.debug("Using fallback policy!");
333 333
	policy = {allow: false, nonce: gen_nonce()};
334 334
    }
335 335

  
336
    console.debug("current policy", policy);
337

  
336 338
    const doc_ready = Promise.all([
337
	policy.allow ? Promise.resolve : sanitize_document(document, policy),
338
	new Promise(cb => document.addEventListener("DOMContentLoaded",
339
						    cb, {once: true}))
339
	policy.allow ? Promise.resolve() : sanitize_document(document, policy),
340
	wait_loaded(document)
340 341
    ]);
341 342

  
342 343
    handle_page_actions(policy.nonce, policy_received_callback, doc_ready);
content/page_actions.js
11 11
 * IMPORT browser
12 12
 * IMPORT report_script
13 13
 * IMPORT report_settings
14
 * IMPORT report_content_type
14
 * IMPORT report_document_type
15 15
 * IMPORTS_END
16 16
 */
17 17

  
......
70 70
			     doc_ready_promise) {
71 71
    policy_received_callback = policy_received_cb;
72 72
    url = document.URL;
73
    is_html = /html/.test(document.contentType);
74
    report_content_type(document.contentType);
73
    is_html = document instanceof HTMLDocument;
74
    report_document_type(is_html);
75 75

  
76 76
    doc_ready_promise.then(document_ready);
77 77

  
html/display-panel.js
276 276
	template.script_contents.textContent = data;
277 277
	container_for_injected.appendChild(template.div);
278 278
    }
279
    if (type === "content_type") {
280
	if (!/html/.test(data))
279
    if (type === "is_html") {
280
	if (!data)
281 281
	    content_type_cell.classList.remove("hide");
282 282
    }
283 283
    if (type === "repo_query_action") {

Also available in: Unified diff