Project

General

Profile

Download (13.1 KB) Statistics
| Branch: | Tag: | Revision:

haketilo / content / policy_enforcing.js @ 33b6872c

1
/**
2
 * This file is part of Haketilo.
3
 *
4
 * Function: Enforcing script blocking rules on a given page, working from a
5
 *           content script.
6
 *
7
 * Copyright (C) 2021,2022 Wojtek Kosior
8
 * Copyright (C) 2021 jahoti
9
 *
10
 * This program is free software: you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation, either version 3 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * As additional permission under GNU GPL version 3 section 7, you
21
 * may distribute forms of that code without the copy of the GNU
22
 * GPL normally required by section 4, provided you include this
23
 * license notice and, in case of non-source distribution, a URL
24
 * through which recipients can access the Corresponding Source.
25
 * If you modify file(s) with this exception, you may extend this
26
 * exception to your version of the file(s), but you are not
27
 * obligated to do so. If you do not wish to do so, delete this
28
 * exception statement from your version.
29
 *
30
 * As a special exception to the GPL, any HTML file which merely
31
 * makes function calls to this code, and for that purpose
32
 * includes it by reference shall be deemed a separate work for
33
 * copyright law purposes. If you modify this code, you may extend
34
 * this exception to your version of the code, but you are not
35
 * obligated to do so. If you do not wish to do so, delete this
36
 * exception statement from your version.
37
 *
38
 * You should have received a copy of the GNU General Public License
39
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
40
 *
41
 * I, Wojtek Kosior, thereby promise not to sue for violation of this file's
42
 * license. Although I request that you do not make use of this code in a
43
 * proprietary program, I am not going to enforce this in court.
44
 */
45

    
46
#FROM common/misc.js IMPORT gen_nonce, csp_header_regex
47

    
48
document.content_loaded = document.readyState === "complete";
49
const wait_loaded = e => e.content_loaded ? Promise.resolve() :
50
      new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true}));
51

    
52
wait_loaded(document).then(() => document.content_loaded = true);
53

    
54
/*
55
 * In the case of HTML documents:
56
 * 1. When injecting some payload we need to sanitize <meta> CSP tags before
57
 *    they reach the document.
58
 * 2. Only <meta> tags inside <head> are considered valid by the browser and
59
 *    need to be considered.
60
 * 3. We want to detach <html> from document, wait until its <head> completes
61
 *    loading, sanitize it and re-attach <html>.
62
 * 4. We shall wait for anything to appear in or after <body> and take that as
63
 *    a sign <head> has finished loading.
64
 * 5. Otherwise, getting the `DOMContentLoaded' event on the document shall also
65
 *    be a sign that <head> is fully loaded.
66
 */
67

    
68
function make_body_start_observer(DOM_element, waiting) {
69
    const observer = new MutationObserver(() => try_body_started(waiting));
70
    observer.observe(DOM_element, {childList: true});
71
    return observer;
72
}
73

    
74
function try_body_started(waiting) {
75
    const body = waiting.detached_html.querySelector("body");
76

    
77
    if ((body && (body.firstChild || body.nextSibling)) ||
78
	waiting.doc.documentElement.nextSibling) {
79
	finish_waiting(waiting);
80
	return true;
81
    }
82

    
83
    if (body && waiting.observers.length < 2)
84
	waiting.observers.push(make_body_start_observer(body, waiting));
85
}
86

    
87
function finish_waiting(waiting) {
88
    if (waiting.finished)
89
	return;
90
    waiting.finished = true;
91
    waiting.observers.forEach(observer => observer.disconnect());
92
    setTimeout(waiting.callback, 0);
93
}
94

    
95
function _wait_for_head(doc, detached_html, callback) {
96
    const waiting = {doc, detached_html, callback, observers: []};
97

    
98
    if (try_body_started(waiting))
99
	return;
100

    
101
    waiting.observers = [make_body_start_observer(detached_html, waiting)];
102

    
103
    wait_loaded(doc).then(() => finish_waiting(waiting));
104
}
105

    
106
function wait_for_head(doc, detached_html) {
107
    return new Promise(cb => _wait_for_head(doc, detached_html, cb));
108
}
109

    
110
const blocked_str = "blocked";
111

    
112
function block_attribute(node, attr, ns=null, replace_with=null) {
113
    const [hasa, geta, seta, rema] = ["has", "get", "set", "remove"]
114
	  .map(m => (n, ...args) => typeof ns === "string" ?
115
	       n[`${m}AttributeNS`](ns, ...args) : n[`${m}Attribute`](...args));
116
    /*
117
     * Disabling attributes by prepending `blocked-' allows them to still be
118
     * relatively easily accessed in case they contain some useful data.
119
     */
120
    const construct_name = [attr];
121
    while (hasa(node, construct_name.join("")))
122
	construct_name.unshift(blocked_str);
123

    
124
    while (construct_name.length > 1) {
125
	construct_name.shift();
126
	const name = construct_name.join("");
127
	seta(node, `${blocked_str}-${name}`, geta(node, name));
128
    }
129

    
130
    rema(node, attr);
131
    if (replace_with !== null)
132
	seta(node, attr, replace_with);
133
}
134

    
135
/*
136
 * Used to disable `<script>'s and `<meta>'s that have not yet been added to
137
 * live DOM (doesn't work for those already added).
138
 */
139
function sanitize_meta(meta) {
140
    if (csp_header_regex.test(meta.httpEquiv) && meta.content)
141
	block_attribute(meta, "content");
142
}
143

    
144
function sanitize_script(script) {
145
    script.haketilo_blocked_type = script.getAttribute("type");
146
    script.type = "text/plain";
147
}
148

    
149
/*
150
 * Executed after `<script>' has been connected to the DOM, when it is no longer
151
 * eligible for being executed by the browser.
152
 */
153
function desanitize_script(script) {
154
    script.setAttribute("type", script.haketilo_blocked_type);
155

    
156
    if ([null, undefined].includes(script.haketilo_blocked_type))
157
	script.removeAttribute("type");
158

    
159
    delete script.haketilo_blocked_type;
160
}
161

    
162
/*
163
 * Blocking certain attributes that might allow 'javascript:' URLs. Some of
164
 * these are: <iframe>'s 'src' attributes (would normally execute js in URL upon
165
 * frame's load), <object>'s 'data' attribute (would also execute upon load) and
166
 * <a>'s 'href' attribute (would execute upon link click).
167
 */
168
const bad_url_reg = /^data:([^,;]*ml|unknown-content-type)|^javascript:/i;
169
function sanitize_element_urls(element) {
170
    if (element.haketilo_sanitized_urls)
171
	return;
172

    
173
    element.haketilo_sanitized_urls = true;
174

    
175
    let some_attr_blocked = false;
176

    
177
    const bad_attrs = [...(element.attributes || [])]
178
	  .filter(attr => /^(href|src|data)$/i.test(attr.localName))
179
	  .filter(attr => bad_url_reg.test(attr.value));
180

    
181
    for (const attr of bad_attrs) {
182
	/*
183
	 * Under some browsers (Mozilla) removing attributes doesn't stop their
184
	 * javascript from executing, but replacing them does. For 'src' and
185
	 * 'data' I chose to replace the attribute with a 'data:' URL and have
186
	 * it replace bad <iframe>'s/<object>'s contents with a "blocked"
187
	 * string. For 'href' (which appears on <a>'s) I chose to use a
188
	 * 'javascript:' URL to avoid having the page reloaded upon a link
189
	 * click.
190
	 */
191
	const replacement_value = /^href$/i.test(attr.localName) ?
192
              "javascript:void('blocked');" : "data:text/plain,blocked";
193
	some_attr_blocked = true;
194
	block_attribute(element, attr.localName, attr.namespaceURI,
195
			replacement_value);
196
    }
197

    
198
    /*
199
     * Trial and error shows that under certain browsers additional element
200
     * removal and re-addition might be necessary to prevent execution of a
201
     * 'javascript:' URL (Parabola's Iceweasel 75 requires it for 'src' URL of
202
     * an <iframe>).
203
     */
204
    if (some_attr_blocked) {
205
	const replacement_elem = document.createElement("a");
206
	element.replaceWith(replacement_elem);
207
	replacement_elem.replaceWith(element);
208
    }
209
}
210

    
211
function sanitize_tree_urls(root) {
212
    root.querySelectorAll("*[href], *[src], *[data]")
213
	.forEach(sanitize_element_urls);
214
}
215

    
216
#IF MOZILLA
217
function sanitize_element_onevent(element) {
218
    if (element.haketilo_sanitized_onevent)
219
	return;
220

    
221
    element.haketilo_sanitized_onevent = true;
222

    
223
    for (const attribute_node of [...(element.attributes || [])]) {
224
	const attr = attribute_node.localName, attr_lo = attr.toLowerCase();;
225
	if (!/^on/.test(attr_lo) || !(attr_lo in element.wrappedJSObject))
226
	    continue;
227

    
228
	/*
229
	 * Guard against redefined getter on DOM object property. This is a
230
	 * supplemental security measure since page's own scripts should be
231
	 * blocked and unable to redefine properties, anyway.
232
	 */
233
	if (Object.getOwnPropertyDescriptor(element.wrappedJSObject, attr)) {
234
	    console.error("Haketilo: Redefined property on a DOM object! The page might have bypassed our script blocking measures!");
235
	    continue;
236
	}
237
	element.wrappedJSObject[attr] = null;
238
	block_attribute(element, attr, attribute_node.namespaceURI,
239
			"javascript:void('blocked');");
240
    }
241
}
242

    
243
function sanitize_tree_onevent(root) {
244
    root.querySelectorAll("*")
245
	.forEach(sanitize_element_onevent);
246
}
247
#ENDIF
248

    
249
function start_mo_sanitizing(doc) {
250
    if (!doc.content_loaded) {
251
	function mutation_handler(mutation) {
252
	    mutation.addedNodes.forEach(sanitize_element_urls);
253
#IF MOZILLA
254
	    mutation.addedNodes.forEach(sanitize_element_onevent);
255
#ENDIF
256
	}
257
	const mo = new MutationObserver(ms => ms.forEach(mutation_handler));
258
	mo.observe(doc, {childList: true, subtree: true});
259
	wait_loaded(doc).then(() => mo.disconnect());
260
    }
261
}
262

    
263
#IF MOZILLA
264
/*
265
 * Normally, we block scripts with CSP. However, Mozilla does optimizations that
266
 * cause part of the DOM to be loaded when our content scripts get to run. Thus,
267
 * before the CSP rules we inject (for non-HTTP pages) become effective, we need
268
 * to somehow block the execution of `<script>'s and intrinsics that were
269
 * already there. Additionally, some browsers (IceCat 60) seem to have problems
270
 * applying this CSP to non-inline `<scripts>' in certain scenarios.
271
 */
272
function prevent_script_execution(event) {
273
    if (!event.target.haketilo_payload)
274
	event.preventDefault();
275
}
276
#ENDIF
277

    
278
/*
279
 * Here we block all scripts of a document which might be either an
280
 * HTMLDocument or an XMLDocument. Modifying an XML document might disrupt
281
 * Mozilla's XML preview. This is an unfortunate thing we have to accept for
282
 * now. XML documents *have to* be sanitized as well because they might
283
 * contain `<script>' tags (or on* attributes) with namespace declared as
284
 * "http://www.w3.org/1999/xhtml" or "http://www.w3.org/2000/svg" which allows
285
 * javascript execution.
286
 */
287
async function sanitize_document(doc, policy) {
288
#IF MOZILLA
289
    /*
290
     * Blocking of scripts that are in the DOM from the beginning. Needed for
291
     * Mozilla.
292
     */
293
    const listener_args = ["beforescriptexecute", prevent_script_execution];
294
    doc.addEventListener(...listener_args);
295
    wait_loaded(doc).then(() => doc.removeEventListener(...listener_args));
296

    
297
    sanitize_tree_urls(doc.documentElement);
298
    sanitize_tree_onevent(doc.documentElement);
299
#ENDIF
300

    
301
    /*
302
     * Ensure our CSP rules are employed from the beginning. This CSP injection
303
     * method is, when possible, going to be applied together with CSP rules
304
     * injected using webRequest.
305
     * Using elements namespaced as HTML makes this CSP injection also work for
306
     * non-HTML documents.
307
     */
308
    const source = `\
309
<!DOCTYPE html>
310
<html>
311
  <head>
312
    <meta http-equiv="Content-Security-Policy" content="${policy.csp}"/>
313
  </head>
314
  <body>
315
    Loading...
316
  </body>
317
</html>`;
318
    const temporary_html =
319
	  new DOMParser().parseFromString(source, "text/html").documentElement;
320

    
321
    /*
322
     * Root node gets hijacked now, to be re-attached after <head> is loaded
323
     * and sanitized.
324
     */
325
    const root = doc.documentElement;
326
    root.replaceWith(temporary_html);
327

    
328
    /*
329
     * When we don't inject payload, we neither block document's CSP `<meta>'
330
     * tags nor wait for `<head>' to be parsed.
331
     */
332
    if (policy.payload) {
333
	await wait_for_head(doc, root);
334

    
335
	root.querySelectorAll("head meta")
336
	    .forEach(m => sanitize_meta(m, policy));
337
    }
338

    
339
    sanitize_tree_urls(root);
340
    root.querySelectorAll("script").forEach(s => sanitize_script(s, policy));
341
    temporary_html.replaceWith(root);
342
    root.querySelectorAll("script").forEach(s => desanitize_script(s, policy));
343
#IF MOZILLA
344
    sanitize_tree_onevent(root);
345
#ENDIF
346

    
347
    start_mo_sanitizing(doc);
348
}
349

    
350
async function _disable_service_workers() {
351
    if (!navigator.serviceWorker)
352
	return;
353

    
354
    const registrations = await navigator.serviceWorker.getRegistrations();
355
    if (registrations.length === 0)
356
	return;
357

    
358
    console.warn("Haketilo: Service Workers detected on this page! Unregistering and reloading.");
359

    
360
    try {
361
	await Promise.all(registrations.map(r => r.unregister()));
362
    } finally {
363
	location.reload();
364
    }
365

    
366
    /* Never actually return! */
367
    return new Promise(() => 0);
368
}
369

    
370
/*
371
 * Trying to use service workers APIs might result in exceptions, for example
372
 * when in a non-HTML document. Because of this, we wrap the function that does
373
 * the actual work in a try {} block.
374
 */
375
async function disable_service_workers() {
376
    try {
377
	await _disable_service_workers()
378
    } catch (e) {
379
	console.warn("Haketilo: Exception thrown during an attempt to detect and disable service workers.", e);
380
    }
381
}
382

    
383
function enforce_blocking(policy) {
384
    if (policy.allow)
385
	return;
386

    
387
    return Promise.all([
388
	sanitize_document(document, policy),
389
	disable_service_workers(),
390
	wait_loaded(document)
391
    ]);
392
}
393
#EXPORT enforce_blocking
(2-2/3)