1
|
/**
|
2
|
* This file is part of Haketilo.
|
3
|
*
|
4
|
* Function: Enforcing script blocking rules on a given page, working from a
|
5
|
* content script.
|
6
|
*
|
7
|
* Copyright (C) 2021,2022 Wojtek Kosior
|
8
|
* Copyright (C) 2021 jahoti
|
9
|
*
|
10
|
* This program is free software: you can redistribute it and/or modify
|
11
|
* it under the terms of the GNU General Public License as published by
|
12
|
* the Free Software Foundation, either version 3 of the License, or
|
13
|
* (at your option) any later version.
|
14
|
*
|
15
|
* This program is distributed in the hope that it will be useful,
|
16
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
* GNU General Public License for more details.
|
19
|
*
|
20
|
* As additional permission under GNU GPL version 3 section 7, you
|
21
|
* may distribute forms of that code without the copy of the GNU
|
22
|
* GPL normally required by section 4, provided you include this
|
23
|
* license notice and, in case of non-source distribution, a URL
|
24
|
* through which recipients can access the Corresponding Source.
|
25
|
* If you modify file(s) with this exception, you may extend this
|
26
|
* exception to your version of the file(s), but you are not
|
27
|
* obligated to do so. If you do not wish to do so, delete this
|
28
|
* exception statement from your version.
|
29
|
*
|
30
|
* As a special exception to the GPL, any HTML file which merely
|
31
|
* makes function calls to this code, and for that purpose
|
32
|
* includes it by reference shall be deemed a separate work for
|
33
|
* copyright law purposes. If you modify this code, you may extend
|
34
|
* this exception to your version of the code, but you are not
|
35
|
* obligated to do so. If you do not wish to do so, delete this
|
36
|
* exception statement from your version.
|
37
|
*
|
38
|
* You should have received a copy of the GNU General Public License
|
39
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
40
|
*
|
41
|
* I, Wojtek Kosior, thereby promise not to sue for violation of this file's
|
42
|
* license. Although I request that you do not make use of this code in a
|
43
|
* proprietary program, I am not going to enforce this in court.
|
44
|
*/
|
45
|
|
46
|
#FROM common/misc.js IMPORT gen_nonce, csp_header_regex
|
47
|
|
48
|
const html_ns = "http://www.w3.org/1999/xhtml";
|
49
|
const svg_ns = "http://www.w3.org/2000/svg";
|
50
|
|
51
|
document.content_loaded = document.readyState === "complete";
|
52
|
const wait_loaded = e => e.content_loaded ? Promise.resolve() :
|
53
|
new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true}));
|
54
|
|
55
|
wait_loaded(document).then(() => document.content_loaded = true);
|
56
|
|
57
|
/*
|
58
|
* In the case of HTML documents:
|
59
|
* 1. When injecting some payload we need to sanitize <meta> CSP tags before
|
60
|
* they reach the document.
|
61
|
* 2. Only <meta> tags inside <head> are considered valid by the browser and
|
62
|
* need to be considered.
|
63
|
* 3. We want to detach <html> from document, wait until its <head> completes
|
64
|
* loading, sanitize it and re-attach <html>.
|
65
|
* 4. We shall wait for anything to appear in or after <body> and take that as
|
66
|
* a sign <head> has finished loading.
|
67
|
* 5. Otherwise, getting the `DOMContentLoaded' event on the document shall also
|
68
|
* be a sign that <head> is fully loaded.
|
69
|
*/
|
70
|
|
71
|
function make_body_start_observer(DOM_element, waiting) {
|
72
|
const observer = new MutationObserver(() => try_body_started(waiting));
|
73
|
observer.observe(DOM_element, {childList: true});
|
74
|
return observer;
|
75
|
}
|
76
|
|
77
|
function try_body_started(waiting) {
|
78
|
const body = waiting.detached_html.querySelector("body");
|
79
|
|
80
|
if ((body && (body.firstChild || body.nextSibling)) ||
|
81
|
waiting.doc.documentElement.nextSibling) {
|
82
|
finish_waiting(waiting);
|
83
|
return true;
|
84
|
}
|
85
|
|
86
|
if (body && waiting.observers.length < 2)
|
87
|
waiting.observers.push(make_body_start_observer(body, waiting));
|
88
|
}
|
89
|
|
90
|
function finish_waiting(waiting) {
|
91
|
if (waiting.finished)
|
92
|
return;
|
93
|
waiting.finished = true;
|
94
|
waiting.observers.forEach(observer => observer.disconnect());
|
95
|
setTimeout(waiting.callback, 0);
|
96
|
}
|
97
|
|
98
|
function _wait_for_head(doc, detached_html, callback) {
|
99
|
const waiting = {doc, detached_html, callback, observers: []};
|
100
|
|
101
|
if (try_body_started(waiting))
|
102
|
return;
|
103
|
|
104
|
waiting.observers = [make_body_start_observer(detached_html, waiting)];
|
105
|
|
106
|
wait_loaded(doc).then(() => finish_waiting(waiting));
|
107
|
}
|
108
|
|
109
|
function wait_for_head(doc, detached_html) {
|
110
|
return new Promise(cb => _wait_for_head(doc, detached_html, cb));
|
111
|
}
|
112
|
|
113
|
const blocked_str = "blocked";
|
114
|
|
115
|
function block_attribute(node, attr, ns=null, replace_with=null) {
|
116
|
const [hasa, geta, seta, rema] = ["has", "get", "set", "remove"]
|
117
|
.map(m => (n, ...args) => typeof ns === "string" ?
|
118
|
n[`${m}AttributeNS`](ns, ...args) : n[`${m}Attribute`](...args));
|
119
|
/*
|
120
|
* Disabling attributes by prepending `blocked-' allows them to still be
|
121
|
* relatively easily accessed in case they contain some useful data.
|
122
|
*/
|
123
|
const construct_name = [attr];
|
124
|
while (hasa(node, construct_name.join("-")))
|
125
|
construct_name.unshift(blocked_str);
|
126
|
|
127
|
while (construct_name.length > 1) {
|
128
|
construct_name.shift();
|
129
|
const name = construct_name.join("-");
|
130
|
seta(node, `${blocked_str}-${name}`, geta(node, name));
|
131
|
}
|
132
|
|
133
|
rema(node, attr);
|
134
|
if (replace_with !== null)
|
135
|
seta(node, attr, replace_with);
|
136
|
}
|
137
|
|
138
|
/*
|
139
|
* Used to disable `<script>'s and `<meta>'s that have not yet been added to
|
140
|
* live DOM (doesn't work for those already added).
|
141
|
*/
|
142
|
function sanitize_meta(meta) {
|
143
|
if (csp_header_regex.test(meta.httpEquiv) && meta.content)
|
144
|
block_attribute(meta, "content");
|
145
|
}
|
146
|
|
147
|
function sanitize_script(script) {
|
148
|
script.haketilo_blocked_type = script.getAttribute("type");
|
149
|
script.type = "text/plain";
|
150
|
}
|
151
|
|
152
|
/*
|
153
|
* Executed after `<script>' has been connected to the DOM, when it is no longer
|
154
|
* eligible for being executed by the browser.
|
155
|
*/
|
156
|
function desanitize_script(script) {
|
157
|
script.setAttribute("type", script.haketilo_blocked_type);
|
158
|
|
159
|
if ([null, undefined].includes(script.haketilo_blocked_type))
|
160
|
script.removeAttribute("type");
|
161
|
|
162
|
delete script.haketilo_blocked_type;
|
163
|
}
|
164
|
|
165
|
/*
|
166
|
* Blocking certain attributes that might allow 'javascript:' URLs. Some of
|
167
|
* these are: <iframe>'s 'src' attributes (would normally execute js in URL upon
|
168
|
* frame's load), <object>'s 'data' attribute (would also execute upon load) and
|
169
|
* <a>'s 'href' attribute (would execute upon link click).
|
170
|
*/
|
171
|
const bad_url_reg = /^data:([^,;]*ml|unknown-content-type)|^javascript:/i;
|
172
|
function sanitize_element_urls(element) {
|
173
|
if (element.haketilo_sanitized_urls)
|
174
|
return;
|
175
|
|
176
|
element.haketilo_sanitized_urls = true;
|
177
|
|
178
|
let some_attr_blocked = false;
|
179
|
|
180
|
const bad_attrs = [...(element.attributes || [])]
|
181
|
.filter(attr => /^(href|src|data)$/i.test(attr.localName))
|
182
|
.filter(attr => bad_url_reg.test(attr.value));
|
183
|
|
184
|
for (const attr of bad_attrs) {
|
185
|
/*
|
186
|
* Under some browsers (Mozilla) removing attributes doesn't stop their
|
187
|
* javascript from executing, but replacing them does. For 'src' and
|
188
|
* 'data' I chose to replace the attribute with a 'data:' URL and have
|
189
|
* it replace bad <iframe>'s/<object>'s contents with a "blocked"
|
190
|
* string. For 'href' (which appears on <a>'s) I chose to use a
|
191
|
* 'javascript:' URL to avoid having the page reloaded upon a link
|
192
|
* click.
|
193
|
*/
|
194
|
const replacement_value = /^href$/i.test(attr.localName) ?
|
195
|
"javascript:void('blocked');" : "data:text/plain,blocked";
|
196
|
some_attr_blocked = true;
|
197
|
block_attribute(element, attr.localName, attr.namespaceURI,
|
198
|
replacement_value);
|
199
|
}
|
200
|
|
201
|
/*
|
202
|
* Trial and error shows that under certain browsers additional element
|
203
|
* removal and re-addition might be necessary to prevent execution of a
|
204
|
* 'javascript:' URL (Parabola's Iceweasel 75 requires it for 'src' URL of
|
205
|
* an <iframe>).
|
206
|
*/
|
207
|
if (some_attr_blocked) {
|
208
|
const replacement_elem = document.createElement("a");
|
209
|
|
210
|
/* Prevent this node from being processed by our observer. */
|
211
|
replacement_elem.haketilo_trusted_node = true;
|
212
|
|
213
|
element.replaceWith(replacement_elem);
|
214
|
replacement_elem.replaceWith(element);
|
215
|
}
|
216
|
}
|
217
|
|
218
|
function sanitize_tree_urls(root) {
|
219
|
root.querySelectorAll("*[href], *[src], *[data]")
|
220
|
.forEach(sanitize_element_urls);
|
221
|
}
|
222
|
|
223
|
#IF MOZILLA
|
224
|
function sanitize_element_onevent(element) {
|
225
|
if (element.haketilo_sanitized_onevent)
|
226
|
return;
|
227
|
|
228
|
element.haketilo_sanitized_onevent = true;
|
229
|
|
230
|
for (const attribute_node of [...(element.attributes || [])]) {
|
231
|
const attr = attribute_node.localName, attr_lo = attr.toLowerCase();
|
232
|
if (!/^on/.test(attr_lo) || !(attr_lo in element))
|
233
|
continue;
|
234
|
|
235
|
/*
|
236
|
* Guard against redefined getter on DOM object property. This is a
|
237
|
* supplemental security measure since page's own scripts should be
|
238
|
* blocked and unable to redefine properties, anyway.
|
239
|
*/
|
240
|
if (Object.getOwnPropertyDescriptor(element.wrappedJSObject, attr)) {
|
241
|
console.error("Haketilo: Redefined property on a DOM object! The page might have bypassed our script blocking measures!");
|
242
|
continue;
|
243
|
}
|
244
|
element.wrappedJSObject[attr] = null;
|
245
|
block_attribute(element, attr, attribute_node.namespaceURI,
|
246
|
"javascript:void('blocked');");
|
247
|
}
|
248
|
}
|
249
|
|
250
|
function sanitize_tree_onevent(root) {
|
251
|
root.querySelectorAll("*")
|
252
|
.forEach(sanitize_element_onevent);
|
253
|
}
|
254
|
#ENDIF
|
255
|
|
256
|
/*
|
257
|
* Sanitize elements on-the-fly as they appear using MutationObserver.
|
258
|
*
|
259
|
* Under Abrowser 97 it was observed that MutationObserver does not always work
|
260
|
* as is should. When trying to observe nodes of an XMLDocument the behavior was
|
261
|
* as if the "subtree" option to MutationObserver.observe() was ignored. To work
|
262
|
* around this we avoid using the "subtree" option altogether and have the same
|
263
|
* code work in all scenarios.
|
264
|
*/
|
265
|
function MOSanitizer(root) {
|
266
|
this.root = root;
|
267
|
|
268
|
this.recursively_sanitize(root);
|
269
|
|
270
|
this.mo = new MutationObserver(ms => this.handle_mutations(ms));
|
271
|
}
|
272
|
|
273
|
MOSanitizer.prototype.observe = function() {
|
274
|
this.mo.disconnect();
|
275
|
|
276
|
let elem = this.root;
|
277
|
while (elem && !elem.haketilo_trusted_node) {
|
278
|
this.mo.observe(elem, {childList: true});
|
279
|
elem = elem.lastElementChild;
|
280
|
}
|
281
|
}
|
282
|
|
283
|
MOSanitizer.prototype.handle_mutations = function(mutations) {
|
284
|
for (const mut of mutations) {
|
285
|
for (const new_node of mut.addedNodes)
|
286
|
this.recursively_sanitize(new_node);
|
287
|
}
|
288
|
|
289
|
this.observe();
|
290
|
}
|
291
|
|
292
|
MOSanitizer.prototype.recursively_sanitize = function(elem) {
|
293
|
const to_process = [elem];
|
294
|
|
295
|
while (to_process.length > 0) {
|
296
|
const current_elem = to_process.pop();
|
297
|
|
298
|
if (current_elem.haketilo_trusted_node ||
|
299
|
current_elem.nodeType !== this.root.ELEMENT_NODE)
|
300
|
continue;
|
301
|
|
302
|
to_process.push(...current_elem.children);
|
303
|
|
304
|
sanitize_element_urls(current_elem);
|
305
|
#IF MOZILLA
|
306
|
sanitize_element_onevent(current_elem);
|
307
|
#ENDIF
|
308
|
}
|
309
|
}
|
310
|
|
311
|
MOSanitizer.prototype.start = function() {
|
312
|
this.recursively_sanitize(this.root);
|
313
|
this.observe();
|
314
|
}
|
315
|
|
316
|
MOSanitizer.prototype.stop = function() {
|
317
|
this.mo.disconnect();
|
318
|
}
|
319
|
|
320
|
#IF MOZILLA
|
321
|
/*
|
322
|
* Normally, we block scripts with CSP. However, Mozilla does optimizations that
|
323
|
* cause part of the DOM to be loaded when our content scripts get to run. Thus,
|
324
|
* before the CSP rules we inject (for non-HTTP pages) become effective, we need
|
325
|
* to somehow block the execution of `<script>'s and intrinsics that were
|
326
|
* already there. Additionally, some browsers (IceCat 60) seem to have problems
|
327
|
* applying this CSP to non-inline `<scripts>' in certain scenarios.
|
328
|
*/
|
329
|
function prevent_script_execution(event) {
|
330
|
event.preventDefault();
|
331
|
}
|
332
|
#ENDIF
|
333
|
|
334
|
/*
|
335
|
* Here we block all scripts of a document which might be either an
|
336
|
* HTMLDocument or an XMLDocument. Modifying an XML document might disrupt
|
337
|
* Mozilla's XML preview. This is an unfortunate thing we have to accept for
|
338
|
* now. XML documents *have to* be sanitized as well because they might
|
339
|
* contain `<script>' tags (or on* attributes) with namespace declared as
|
340
|
* "http://www.w3.org/1999/xhtml" or "http://www.w3.org/2000/svg" which allows
|
341
|
* javascript execution.
|
342
|
*/
|
343
|
async function sanitize_document(doc, policy) {
|
344
|
const root = doc.documentElement;
|
345
|
const substitute_doc =
|
346
|
new DOMParser().parseFromString("<!DOCTYPE html>", "text/html");
|
347
|
|
348
|
#IF MOZILLA
|
349
|
/*
|
350
|
* Blocking of scripts that are in the DOM from the beginning. Needed for
|
351
|
* Mozilla.
|
352
|
*/
|
353
|
const listener_args = ["beforescriptexecute", prevent_script_execution];
|
354
|
|
355
|
doc.addEventListener(...listener_args);
|
356
|
substitute_doc.addEventListener(...listener_args);
|
357
|
|
358
|
wait_loaded(doc).then(() => doc.removeEventListener(...listener_args));
|
359
|
#ENDIF
|
360
|
|
361
|
/*
|
362
|
* Ensure our CSP rules are employed from the beginning. This CSP injection
|
363
|
* method is, when possible, going to be applied together with CSP rules
|
364
|
* injected using webRequest.
|
365
|
* Using elements namespaced as HTML makes this CSP injection also work for
|
366
|
* non-HTML documents.
|
367
|
*/
|
368
|
const source = `\
|
369
|
<!DOCTYPE html>
|
370
|
<html>
|
371
|
<head>
|
372
|
<meta http-equiv="Content-Security-Policy" content="${policy.csp}"/>
|
373
|
</head>
|
374
|
<body>
|
375
|
Loading...
|
376
|
</body>
|
377
|
</html>`;
|
378
|
const temporary_html =
|
379
|
new DOMParser().parseFromString(source, "text/html").documentElement;
|
380
|
|
381
|
/*
|
382
|
* Root node gets hijacked now, to be re-attached after <head> is loaded
|
383
|
* and sanitized.
|
384
|
*/
|
385
|
root.replaceWith(temporary_html);
|
386
|
#IF MOZILLA
|
387
|
/*
|
388
|
* To be able to handle the onbeforescriptexecute event for scripts that
|
389
|
* appear under detached document.
|
390
|
*/
|
391
|
substitute_doc.documentElement.replaceWith(root);
|
392
|
#ENDIF
|
393
|
|
394
|
const sanitizer = new MOSanitizer(root);
|
395
|
sanitizer.start();
|
396
|
wait_loaded(doc).then(() => sanitizer.stop());
|
397
|
|
398
|
/*
|
399
|
* When we don't inject payload, we neither block document's CSP `<meta>'
|
400
|
* tags nor wait for `<head>' to be parsed.
|
401
|
*/
|
402
|
if (policy.payload) {
|
403
|
if (doc instanceof HTMLDocument)
|
404
|
await wait_for_head(doc, root);
|
405
|
|
406
|
root.querySelectorAll("head meta")
|
407
|
.forEach(m => sanitize_meta(m, policy));
|
408
|
}
|
409
|
|
410
|
const scripts = [...root.getElementsByTagNameNS(html_ns, "script"),
|
411
|
...root.getElementsByTagNameNS(svg_ns, "svg")];
|
412
|
scripts.forEach(s => sanitize_script(s, policy));
|
413
|
temporary_html.replaceWith(root);
|
414
|
scripts.forEach(s => desanitize_script(s, policy));
|
415
|
}
|
416
|
|
417
|
async function _disable_service_workers() {
|
418
|
if (!navigator.serviceWorker)
|
419
|
return;
|
420
|
|
421
|
const registrations = await navigator.serviceWorker.getRegistrations();
|
422
|
if (registrations.length === 0)
|
423
|
return;
|
424
|
|
425
|
console.warn("Haketilo: Service Workers detected on this page! Unregistering and reloading.");
|
426
|
|
427
|
try {
|
428
|
await Promise.all(registrations.map(r => r.unregister()));
|
429
|
} finally {
|
430
|
location.reload();
|
431
|
}
|
432
|
|
433
|
/* Never actually return! */
|
434
|
return new Promise(() => 0);
|
435
|
}
|
436
|
|
437
|
/*
|
438
|
* Trying to use service workers APIs might result in exceptions, for example
|
439
|
* when in a non-HTML document. Because of this, we wrap the function that does
|
440
|
* the actual work in a try {} block.
|
441
|
*/
|
442
|
async function disable_service_workers() {
|
443
|
try {
|
444
|
await _disable_service_workers()
|
445
|
} catch (e) {
|
446
|
console.warn("Haketilo: Exception thrown during an attempt to detect and disable service workers.", e);
|
447
|
}
|
448
|
}
|
449
|
|
450
|
function enforce_blocking(policy) {
|
451
|
if (policy.allow)
|
452
|
return;
|
453
|
|
454
|
return Promise.all([
|
455
|
sanitize_document(document, policy),
|
456
|
disable_service_workers(),
|
457
|
wait_loaded(document)
|
458
|
]);
|
459
|
}
|
460
|
#EXPORT enforce_blocking
|