Project

General

Profile

Download (15.8 KB) Statistics
| Branch: | Tag: | Revision:

haketilo / content / policy_enforcing.js @ 95bc9b67

1
/**
2
 * This file is part of Haketilo.
3
 *
4
 * Function: Enforcing script blocking rules on a given page, working from a
5
 *           content script.
6
 *
7
 * Copyright (C) 2021,2022 Wojtek Kosior
8
 * Copyright (C) 2021 jahoti
9
 *
10
 * This program is free software: you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation, either version 3 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * As additional permission under GNU GPL version 3 section 7, you
21
 * may distribute forms of that code without the copy of the GNU
22
 * GPL normally required by section 4, provided you include this
23
 * license notice and, in case of non-source distribution, a URL
24
 * through which recipients can access the Corresponding Source.
25
 * If you modify file(s) with this exception, you may extend this
26
 * exception to your version of the file(s), but you are not
27
 * obligated to do so. If you do not wish to do so, delete this
28
 * exception statement from your version.
29
 *
30
 * As a special exception to the GPL, any HTML file which merely
31
 * makes function calls to this code, and for that purpose
32
 * includes it by reference shall be deemed a separate work for
33
 * copyright law purposes. If you modify this code, you may extend
34
 * this exception to your version of the code, but you are not
35
 * obligated to do so. If you do not wish to do so, delete this
36
 * exception statement from your version.
37
 *
38
 * You should have received a copy of the GNU General Public License
39
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
40
 *
41
 * I, Wojtek Kosior, thereby promise not to sue for violation of this file's
42
 * license. Although I request that you do not make use of this code in a
43
 * proprietary program, I am not going to enforce this in court.
44
 */
45

    
46
#FROM common/misc.js IMPORT csp_header_regex
47

    
48
const html_ns = "http://www.w3.org/1999/xhtml";
49
const svg_ns = "http://www.w3.org/2000/svg";
50

    
51
document.content_loaded = document.readyState === "complete";
52
const wait_loaded = e => e.content_loaded ? Promise.resolve() :
53
      new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true}));
54

    
55
wait_loaded(document).then(() => document.content_loaded = true);
56

    
57
/*
58
 * In the case of HTML documents:
59
 * 1. When injecting some payload we need to sanitize <meta> CSP tags before
60
 *    they reach the document.
61
 * 2. Only <meta> tags inside <head> are considered valid by the browser and
62
 *    need to be considered.
63
 * 3. We want to detach <html> from document, wait until its <head> completes
64
 *    loading, sanitize it and re-attach <html>.
65
 * 4. We shall wait for anything to appear in or after <body> and take that as
66
 *    a sign <head> has finished loading.
67
 * 5. Otherwise, getting the `DOMContentLoaded' event on the document shall also
68
 *    be a sign that <head> is fully loaded.
69
 */
70

    
71
function make_body_start_observer(DOM_element, waiting) {
72
    const observer = new MutationObserver(() => try_body_started(waiting));
73
    observer.observe(DOM_element, {childList: true});
74
    return observer;
75
}
76

    
77
function try_body_started(waiting) {
78
    const body = waiting.detached_html.querySelector("body");
79

    
80
    if ((body && (body.firstChild || body.nextSibling)) ||
81
	waiting.doc.documentElement.nextSibling) {
82
	finish_waiting(waiting);
83
	return true;
84
    }
85

    
86
    if (body && waiting.observers.length < 2)
87
	waiting.observers.push(make_body_start_observer(body, waiting));
88
}
89

    
90
function finish_waiting(waiting) {
91
    if (waiting.finished)
92
	return;
93
    waiting.finished = true;
94
    waiting.observers.forEach(observer => observer.disconnect());
95
    setTimeout(waiting.callback, 0);
96
}
97

    
98
function _wait_for_head(doc, detached_html, callback) {
99
    const waiting = {doc, detached_html, callback, observers: []};
100

    
101
    if (try_body_started(waiting))
102
	return;
103

    
104
    waiting.observers = [make_body_start_observer(detached_html, waiting)];
105

    
106
    wait_loaded(doc).then(() => finish_waiting(waiting));
107
}
108

    
109
function wait_for_head(doc, detached_html) {
110
    return new Promise(cb => _wait_for_head(doc, detached_html, cb));
111
}
112

    
113
const blocked_str = "blocked";
114

    
115
function block_attribute(node, attr, ns=null, replace_with=null) {
116
    const [hasa, geta, seta, rema] = ["has", "get", "set", "remove"]
117
	  .map(m => (n, ...args) => typeof ns === "string" ?
118
	       n[`${m}AttributeNS`](ns, ...args) : n[`${m}Attribute`](...args));
119
    /*
120
     * Disabling attributes by prepending `blocked-' allows them to still be
121
     * relatively easily accessed in case they contain some useful data.
122
     */
123
    const construct_name = [attr];
124
    while (hasa(node, construct_name.join("-")))
125
	construct_name.unshift(blocked_str);
126

    
127
    while (construct_name.length > 1) {
128
	construct_name.shift();
129
	const name = construct_name.join("-");
130
	seta(node, `${blocked_str}-${name}`, geta(node, name));
131
    }
132

    
133
    rema(node, attr);
134
    if (replace_with !== null)
135
	seta(node, attr, replace_with);
136
}
137

    
138
/*
139
 * Used to disable `<script>'s and `<meta>'s that have not yet been added to
140
 * live DOM (doesn't work for those already added).
141
 */
142
function sanitize_meta(meta) {
143
    if (csp_header_regex.test(meta.httpEquiv) && meta.content)
144
	block_attribute(meta, "content");
145
}
146

    
147
function sanitize_script(script) {
148
    script.haketilo_blocked_type = script.getAttribute("type");
149
    script.type = "text/plain";
150
}
151

    
152
/*
153
 * Executed after `<script>' has been connected to the DOM, when it is no longer
154
 * eligible for being executed by the browser.
155
 */
156
function desanitize_script(script) {
157
    script.setAttribute("type", script.haketilo_blocked_type);
158

    
159
    if ([null, undefined].includes(script.haketilo_blocked_type))
160
	script.removeAttribute("type");
161

    
162
    delete script.haketilo_blocked_type;
163
}
164

    
165
/* The following will only be run on pages without payload. */
166
function force_noscript_tag(element) {
167
    if (element.tagName !== "NOSCRIPT")
168
	return;
169

    
170
    let under_head = false;
171
    let ancestor = element;
172
    while (true) {
173
	ancestor = ancestor.parentElement;
174

    
175
	if (ancestor === null)
176
	    break;
177

    
178
	if (ancestor === document.head) {
179
	    under_head = true;
180
	    break;
181
	}
182
    }
183

    
184
    const replacement = document.createElement('haketilo-noscript');
185
    replacement.innerHTML = element.innerHTML;
186

    
187
    for (const script of [...replacement.querySelectorAll('script')])
188
	script.remove();
189

    
190
    if (under_head) {
191
	for (const child of replacement.childNodes)
192
	    element.before(child);
193

    
194
	element.remove();
195
    } else {
196
	element.replaceWith(replacement);
197
    }
198
}
199

    
200
/*
201
 * Blocking certain attributes that might allow 'javascript:' URLs. Some of
202
 * these are: <iframe>'s 'src' attributes (would normally execute js in URL upon
203
 * frame's load), <object>'s 'data' attribute (would also execute upon load) and
204
 * <a>'s 'href' attribute (would execute upon link click).
205
 */
206
const bad_url_reg = /^data:([^,;]*ml|unknown-content-type)|^javascript:/i;
207
function sanitize_element_urls(element) {
208
    if (element.haketilo_sanitized_urls)
209
	return;
210

    
211
    element.haketilo_sanitized_urls = true;
212

    
213
    let some_attr_blocked = false;
214

    
215
    const bad_attrs = [...(element.attributes || [])]
216
	  .filter(attr => /^(href|src|data)$/i.test(attr.localName))
217
	  .filter(attr => bad_url_reg.test(attr.value));
218

    
219
    for (const attr of bad_attrs) {
220
	/*
221
	 * Under some browsers (Mozilla) removing attributes doesn't stop their
222
	 * javascript from executing, but replacing them does. For 'src' and
223
	 * 'data' I chose to replace the attribute with a 'data:' URL and have
224
	 * it replace bad <iframe>'s/<object>'s contents with a "blocked"
225
	 * string. For 'href' (which appears on <a>'s) I chose to use a
226
	 * 'javascript:' URL to avoid having the page reloaded upon a link
227
	 * click.
228
	 */
229
	const replacement_value = /^href$/i.test(attr.localName) ?
230
              "javascript:void('blocked');" : "data:text/plain,blocked";
231
	some_attr_blocked = true;
232
	block_attribute(element, attr.localName, attr.namespaceURI,
233
			replacement_value);
234
    }
235

    
236
    /*
237
     * Trial and error shows that under certain browsers additional element
238
     * removal and re-addition might be necessary to prevent execution of a
239
     * 'javascript:' URL (Parabola's Iceweasel 75 requires it for 'src' URL of
240
     * an <iframe>).
241
     */
242
    if (some_attr_blocked) {
243
	const replacement_elem = document.createElement("a");
244

    
245
	/* Prevent this node from being processed by our observer. */
246
	replacement_elem.haketilo_trusted_node = true;
247

    
248
	element.replaceWith(replacement_elem);
249
	replacement_elem.replaceWith(element);
250
    }
251
}
252

    
253
function sanitize_tree_urls(root) {
254
    root.querySelectorAll("*[href], *[src], *[data]")
255
	.forEach(sanitize_element_urls);
256
}
257

    
258
#IF MOZILLA
259
function sanitize_element_onevent(element) {
260
    if (element.haketilo_sanitized_onevent)
261
	return;
262

    
263
    element.haketilo_sanitized_onevent = true;
264

    
265
    for (const attribute_node of [...(element.attributes || [])]) {
266
	const attr = attribute_node.localName, attr_lo = attr.toLowerCase();
267
	if (!/^on/.test(attr_lo) || !(attr_lo in element))
268
	    continue;
269

    
270
	/*
271
	 * Guard against redefined getter on DOM object property. This is a
272
	 * supplemental security measure since page's own scripts should be
273
	 * blocked and unable to redefine properties, anyway.
274
	 */
275
	if (Object.getOwnPropertyDescriptor(element.wrappedJSObject, attr)) {
276
	    console.error("Haketilo: Redefined property on a DOM object! The page might have bypassed our script blocking measures!");
277
	    continue;
278
	}
279
	element.wrappedJSObject[attr] = null;
280
	block_attribute(element, attr, attribute_node.namespaceURI,
281
			"javascript:void('blocked');");
282
    }
283
}
284

    
285
function sanitize_tree_onevent(root) {
286
    root.querySelectorAll("*")
287
	.forEach(sanitize_element_onevent);
288
}
289
#ENDIF
290

    
291
/*
292
 * Sanitize elements on-the-fly and force <noscript> tags visible as they appear
293
 * using MutationObserver.
294
 *
295
 * Under Abrowser 97 it was observed that MutationObserver does not always work
296
 * as is should. When trying to observe nodes of an XMLDocument the behavior was
297
 * as if the "subtree" option to MutationObserver.observe() was ignored. To work
298
 * around this we avoid using the "subtree" option altogether and have the same
299
 * code work in all scenarios.
300
 */
301
function MOSanitizer(root, payload_present) {
302
    this.root            = root;
303
    this.payload_present = payload_present;
304

    
305
    this.recursively_sanitize(root);
306

    
307
    this.mo = new MutationObserver(ms => this.handle_mutations(ms));
308
}
309

    
310
MOSanitizer.prototype.observe = function() {
311
    this.mo.disconnect();
312

    
313
    let elem = this.root;
314
    while (elem && !elem.haketilo_trusted_node) {
315
	this.mo.observe(elem, {childList: true});
316
	elem = elem.lastElementChild;
317
    }
318
}
319

    
320
MOSanitizer.prototype.handle_mutations = function(mutations) {
321
    for (const mut of mutations) {
322
	for (const new_node of mut.addedNodes)
323
	    this.recursively_sanitize(new_node);
324
    }
325

    
326
    this.observe();
327
}
328

    
329
MOSanitizer.prototype.recursively_sanitize = function(elem) {
330
    const to_process = [elem];
331

    
332
    while (to_process.length > 0) {
333
	const current_elem = to_process.pop();
334

    
335
	if (current_elem.haketilo_trusted_node ||
336
	    current_elem.nodeType !== this.root.ELEMENT_NODE)
337
	    continue;
338

    
339
	to_process.push(...current_elem.children);
340

    
341
	sanitize_element_urls(current_elem);
342
#IF MOZILLA
343
	sanitize_element_onevent(current_elem);
344
#ENDIF
345
	if (!this.payload_present)
346
	    force_noscript_tag(current_elem);
347
    }
348
}
349

    
350
MOSanitizer.prototype.start = function() {
351
    this.recursively_sanitize(this.root);
352
    this.observe();
353
}
354

    
355
MOSanitizer.prototype.stop = function() {
356
    this.mo.disconnect();
357
}
358

    
359
#IF MOZILLA
360
/*
361
 * Normally, we block scripts with CSP. However, Mozilla does optimizations that
362
 * cause part of the DOM to be loaded when our content scripts get to run. Thus,
363
 * before the CSP rules we inject (for non-HTTP pages) become effective, we need
364
 * to somehow block the execution of `<script>'s and intrinsics that were
365
 * already there. Additionally, some browsers (IceCat 60) seem to have problems
366
 * applying this CSP to non-inline `<scripts>' in certain scenarios.
367
 */
368
function prevent_script_execution(event) {
369
    event.preventDefault();
370
}
371
#ENDIF
372

    
373
/*
374
 * Here we block all scripts of a document which might be either an
375
 * HTMLDocument or an XMLDocument. Modifying an XML document might disrupt
376
 * Mozilla's XML preview. This is an unfortunate thing we have to accept for
377
 * now. XML documents *have to* be sanitized as well because they might
378
 * contain `<script>' tags (or on* attributes) with namespace declared as
379
 * "http://www.w3.org/1999/xhtml" or "http://www.w3.org/2000/svg" which allows
380
 * javascript execution.
381
 */
382
async function sanitize_document(doc, policy) {
383
    const root = doc.documentElement;
384
    const substitute_doc =
385
	  new DOMParser().parseFromString("<!DOCTYPE html>", "text/html");
386

    
387
#IF MOZILLA
388
    /*
389
     * Blocking of scripts that are in the DOM from the beginning. Needed for
390
     * Mozilla.
391
     */
392
    const listener_args = ["beforescriptexecute", prevent_script_execution];
393

    
394
    doc.addEventListener(...listener_args);
395
    substitute_doc.addEventListener(...listener_args);
396

    
397
    wait_loaded(doc).then(() => doc.removeEventListener(...listener_args));
398
#ENDIF
399

    
400
    /*
401
     * Ensure our CSP rules are employed from the beginning. This CSP injection
402
     * method is, when possible, going to be applied together with CSP rules
403
     * injected using webRequest.
404
     * Using elements namespaced as HTML makes this CSP injection also work for
405
     * non-HTML documents.
406
     */
407
    const source = `\
408
<!DOCTYPE html>
409
<html>
410
  <head>
411
    <meta http-equiv="Content-Security-Policy" content="${policy.csp}"/>
412
  </head>
413
  <body>
414
    Loading...
415
  </body>
416
</html>`;
417
    const temporary_html =
418
	  new DOMParser().parseFromString(source, "text/html").documentElement;
419

    
420
    /*
421
     * Root node gets hijacked now, to be re-attached after <head> is loaded
422
     * and sanitized.
423
     */
424
    root.replaceWith(temporary_html);
425
#IF MOZILLA
426
    /*
427
     * To be able to handle the onbeforescriptexecute event for scripts that
428
     * appear under detached document.
429
     */
430
    substitute_doc.documentElement.replaceWith(root);
431
#ENDIF
432

    
433
    const sanitizer = new MOSanitizer(root, !!policy.payload);
434
    sanitizer.start();
435
    wait_loaded(doc).then(() => sanitizer.stop());
436

    
437
    /*
438
     * When we don't inject payload, we neither block document's CSP `<meta>'
439
     * tags nor wait for `<head>' to be parsed.
440
     */
441
    if (policy.payload) {
442
	if (doc instanceof HTMLDocument)
443
	    await wait_for_head(doc, root);
444

    
445
	root.querySelectorAll("head meta")
446
	    .forEach(m => sanitize_meta(m, policy));
447
    }
448

    
449
    const scripts = [...root.getElementsByTagNameNS(html_ns, "script"),
450
		     ...root.getElementsByTagNameNS(svg_ns, "script")];
451
    scripts.forEach(s => sanitize_script(s, policy));
452
    temporary_html.replaceWith(root);
453
    scripts.forEach(s => desanitize_script(s, policy));
454
}
455

    
456
async function _disable_service_workers() {
457
    if (!navigator.serviceWorker)
458
	return;
459

    
460
    const registrations = await navigator.serviceWorker.getRegistrations();
461
    if (registrations.length === 0)
462
	return;
463

    
464
    console.warn("Haketilo: Service Workers detected on this page! Unregistering and reloading.");
465

    
466
    try {
467
	await Promise.all(registrations.map(r => r.unregister()));
468
    } finally {
469
	location.reload();
470
    }
471

    
472
    /* Never actually return! */
473
    return new Promise(() => 0);
474
}
475

    
476
/*
477
 * Trying to use service workers APIs might result in exceptions, for example
478
 * when in a non-HTML document. Because of this, we wrap the function that does
479
 * the actual work in a try {} block.
480
 */
481
async function disable_service_workers() {
482
    try {
483
	await _disable_service_workers()
484
    } catch (e) {
485
	console.warn("Haketilo: Exception thrown during an attempt to detect and disable service workers.", e);
486
    }
487
}
488

    
489
function enforce_blocking(policy) {
490
    if (policy.allow)
491
	return;
492

    
493
    return Promise.all([
494
	sanitize_document(document, policy),
495
	disable_service_workers(),
496
	wait_loaded(document)
497
    ]);
498
}
499
#EXPORT enforce_blocking
(3-3/4)