Project

General

Profile

Download (9.51 KB) Statistics
| Branch: | Tag: | Revision:

haketilo / content / main.js @ 96068ada

1
/**
2
 * This file is part of Haketilo.
3
 *
4
 * Function: Main content script that runs in all frames.
5
 *
6
 * Copyright (C) 2021 Wojtek Kosior
7
 * Copyright (C) 2021 jahoti
8
 * Redistribution terms are gathered in the `copyright' file.
9
 */
10

    
11
/*
12
 * IMPORTS_START
13
 * IMPORT handle_page_actions
14
 * IMPORT gen_nonce
15
 * IMPORT is_privileged_url
16
 * IMPORT browser
17
 * IMPORT is_chrome
18
 * IMPORT is_mozilla
19
 * IMPORT start_activity_info_server
20
 * IMPORT make_csp_rule
21
 * IMPORT csp_header_regex
22
 * IMPORT report_settings
23
 * IMPORTS_END
24
 */
25

    
26
document.content_loaded = document.readyState === "complete";
27
const wait_loaded = e => e.content_loaded ? Promise.resolve() :
28
      new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true}));
29

    
30
wait_loaded(document).then(() => document.content_loaded = true);
31

    
32
/*
33
 * In the case of HTML documents:
34
 * 1. When injecting some payload we need to sanitize <meta> CSP tags before
35
 *    they reach the document.
36
 * 2. Only <meta> tags inside <head> are considered valid by the browser and
37
 *    need to be considered.
38
 * 3. We want to detach <html> from document, wait until its <head> completes
39
 *    loading, sanitize it and re-attach <html>.
40
 * 4. We shall wait for anything to appear in or after <body> and take that as
41
 *    a sign <head> has finished loading.
42
 * 5. Otherwise, getting the `DOMContentLoaded' event on the document shall also
43
 *    be a sign that <head> is fully loaded.
44
 */
45

    
46
function make_body_start_observer(DOM_element, waiting)
47
{
48
    const observer = new MutationObserver(() => try_body_started(waiting));
49
    observer.observe(DOM_element, {childList: true});
50
    return observer;
51
}
52

    
53
function try_body_started(waiting)
54
{
55
    const body = waiting.detached_html.querySelector("body");
56

    
57
    if ((body && (body.firstChild || body.nextSibling)) ||
58
	waiting.doc.documentElement.nextSibling) {
59
	finish_waiting(waiting);
60
	return true;
61
    }
62

    
63
    if (body && waiting.observers.length < 2)
64
	waiting.observers.push(make_body_start_observer(body, waiting));
65
}
66

    
67
function finish_waiting(waiting)
68
{
69
    if (waiting.finished)
70
	return;
71
    waiting.finished = true;
72
    waiting.observers.forEach(observer => observer.disconnect());
73
    setTimeout(waiting.callback, 0);
74
}
75

    
76
function _wait_for_head(doc, detached_html, callback)
77
{
78
    const waiting = {doc, detached_html, callback, observers: []};
79

    
80
    if (try_body_started(waiting))
81
	return;
82

    
83
    waiting.observers = [make_body_start_observer(detached_html, waiting)];
84

    
85
    wait_loaded(doc).then(() => finish_waiting(waiting));
86
}
87

    
88
function wait_for_head(doc, detached_html)
89
{
90
    return new Promise(cb => _wait_for_head(doc, detached_html, cb));
91
}
92

    
93
const blocked_str = "blocked";
94

    
95
function block_attribute(node, attr, ns=null)
96
{
97
    const [hasa, geta, seta, rema] = ["has", "get", "set", "remove"]
98
	  .map(m => (n, ...args) => typeof ns === "string" ?
99
	       n[`${m}AttributeNS`](ns, ...args) : n[`${m}Attribute`](...args));
100
    /*
101
     * Disabling attributes by prepending `-blocked' allows them to still be
102
     * relatively easily accessed in case they contain some useful data.
103
     */
104
    const construct_name = [attr];
105
    while (hasa(node, construct_name.join("")))
106
	construct_name.unshift(blocked_str);
107

    
108
    while (construct_name.length > 1) {
109
	construct_name.shift();
110
	const name = construct_name.join("");
111
	seta(node, `${blocked_str}-${name}`, geta(node, name));
112
    }
113

    
114
    rema(node, attr);
115
}
116

    
117
/*
118
 * Used to disable `<script>'s and `<meta>'s that have not yet been added to
119
 * live DOM (doesn't work for those already added).
120
 */
121
function sanitize_meta(meta)
122
{
123
    if (csp_header_regex.test(meta.httpEquiv) && meta.content)
124
	block_attribute(meta, "content");
125
}
126

    
127
function sanitize_script(script)
128
{
129
    script.haketilo_blocked_type = script.getAttribute("type");
130
    script.type = "text/plain";
131
}
132

    
133
/*
134
 * Executed after `<script>' has been connected to the DOM, when it is no longer
135
 * eligible for being executed by the browser.
136
 */
137
function desanitize_script(script)
138
{
139
    script.setAttribute("type", script.haketilo_blocked_type);
140

    
141
    if ([null, undefined].includes(script.haketilo_blocked_type))
142
	script.removeAttribute("type");
143

    
144
    delete script.haketilo_blocked_type;
145
}
146

    
147
const bad_url_reg = /^data:([^,;]*ml|unknown-content-type)/i;
148
function sanitize_urls(element)
149
{
150
    for (const attr of [...element.attributes || []]
151
	       .filter(attr => /^(href|src|data)$/i.test(attr.localName))
152
	       .filter(attr => bad_url_reg.test(attr.value)))
153
	block_attribute(element, attr.localName, attr.namespaceURI);
154
}
155

    
156
function start_data_urls_sanitizing(doc)
157
{
158
    doc.querySelectorAll("*[href], *[src], *[data]").forEach(sanitize_urls);
159
    if (!doc.content_loaded) {
160
	const mutation_handler = m => m.addedNodes.forEach(sanitize_urls);
161
	const mo = new MutationObserver(ms => ms.forEach(mutation_handler));
162
	mo.observe(doc, {childList: true, subtree: true});
163
	wait_loaded(doc).then(() => mo.disconnect());
164
    }
165
}
166

    
167
/*
168
 * Normally, we block scripts with CSP. However, Mozilla does optimizations that
169
 * cause part of the DOM to be loaded when our content scripts get to run. Thus,
170
 * before the CSP rules we inject (for non-HTTP pages) become effective, we need
171
 * to somehow block the execution of `<script>'s and intrinsics that were
172
 * already there. Additionally, some browsers (IceCat 60) seem to have problems
173
 * applying this CSP to non-inline `<scripts>' in certain scenarios.
174
 */
175
function prevent_script_execution(event)
176
{
177
    if (!event.target.haketilo_payload)
178
	event.preventDefault();
179
}
180

    
181
function mozilla_initial_block(doc)
182
{
183
    doc.addEventListener("beforescriptexecute", prevent_script_execution);
184

    
185
    for (const elem of doc.querySelectorAll("*")) {
186
	[...elem.attributes].map(attr => attr.localName)
187
	    .filter(attr => /^on/.test(attr) && elem.wrappedJSObject[attr])
188
	    .forEach(attr => elem.wrappedJSObject[attr] = null);
189
    }
190
}
191

    
192
/*
193
 * Here we block all scripts of a document which might be either and
194
 * HTMLDocument or an XMLDocument. Modifying an XML document might disrupt
195
 * Mozilla's XML preview. This is an unfortunate thing we have to accept for
196
 * now. XML documents *have to* be sanitized as well because they might
197
 * contain `<script>' tags (or on* attributes) with namespace declared as
198
 * "http://www.w3.org/1999/xhtml" or "http://www.w3.org/2000/svg" which allows
199
 * javascript execution.
200
 */
201
async function sanitize_document(doc, policy)
202
{
203
    /*
204
     * Blocking of scripts that are in the DOM from the beginning. Needed for
205
     * Mozilla.
206
     */
207
    if (is_mozilla)
208
	mozilla_initial_block(doc);
209

    
210
    /*
211
     * Ensure our CSP rules are employed from the beginning. This CSP injection
212
     * method is, when possible, going to be applied together with CSP rules
213
     * injected using webRequest.
214
     * Using elements namespaced as HTML makes this CSP injection also work for
215
     * non-HTML documents.
216
     */
217
    const html = new DOMParser().parseFromString(`<html><head><meta \
218
http-equiv="Content-Security-Policy" content="${make_csp_rule(policy)}"\
219
/></head><body>Loading...</body></html>`, "text/html").documentElement;
220

    
221
    /*
222
     * Root node gets hijacked now, to be re-attached after <head> is loaded
223
     * and sanitized.
224
     */
225
    const root = doc.documentElement;
226
    root.replaceWith(html);
227

    
228
    /*
229
     * When we don't inject payload, we neither block document's CSP `<meta>'
230
     * tags nor wait for `<head>' to be parsed.
231
     */
232
    if (policy.has_payload) {
233
	await wait_for_head(doc, root);
234

    
235
	root.querySelectorAll("head meta")
236
	    .forEach(m => sanitize_meta(m, policy));
237
    }
238

    
239
    root.querySelectorAll("script").forEach(s => sanitize_script(s, policy));
240
    html.replaceWith(root);
241
    root.querySelectorAll("script").forEach(s => desanitize_script(s, policy));
242

    
243
    start_data_urls_sanitizing(doc);
244
}
245

    
246
async function _disable_service_workers()
247
{
248
    if (!navigator.serviceWorker)
249
	return;
250

    
251
    const registrations = await navigator.serviceWorker.getRegistrations();
252
    if (registrations.length === 0)
253
	return;
254

    
255
    console.warn("Service Workers detected on this page! Unregistering and reloading.");
256

    
257
    try {
258
	await Promise.all(registrations.map(r => r.unregister()));
259
    } finally {
260
	location.reload();
261
    }
262

    
263
    /* Never actually return! */
264
    return new Promise(() => 0);
265
}
266

    
267
/*
268
 * Trying to use servce workers APIs might result in exceptions, for example
269
 * when in a non-HTML document. Because of this, we wrap the function that does
270
 * the actual work in a try {} block.
271
 */
272
async function disable_service_workers()
273
{
274
    try {
275
	await _disable_service_workers()
276
    } catch (e) {
277
	console.debug("Exception thrown during an attempt to detect and disable service workers.", e);
278
    }
279
}
280

    
281
function synchronously_get_policy(url)
282
{
283
    const encoded_url = encodeURIComponent(url);
284
    const request_url = `${browser.runtime.getURL("dummy")}?url=${encoded_url}`;
285

    
286
    try {
287
	var xhttp = new XMLHttpRequest();
288
	xhttp.open("GET", request_url, false);
289
	xhttp.send();
290
    } catch(e) {
291
	console.error("Failure to synchronously fetch policy for url.", e);
292
	return {allow: false};
293
    }
294

    
295
    const policy = /^[^?]*\?settings=(.*)$/.exec(xhttp.responseURL)[1];
296
    return JSON.parse(decodeURIComponent(policy));
297
}
298

    
299
if (!is_privileged_url(document.URL)) {
300
    const policy = synchronously_get_policy(document.URL);
301

    
302
    if (!(document instanceof HTMLDocument))
303
	delete policy.payload;
304

    
305
    console.debug("current policy", policy);
306

    
307
    report_settings(policy);
308

    
309
    policy.nonce = gen_nonce();
310

    
311
    const doc_ready = Promise.all([
312
	policy.allow ? Promise.resolve() : sanitize_document(document, policy),
313
	policy.allow ? Promise.resolve() : disable_service_workers(),
314
	wait_loaded(document)
315
    ]);
316

    
317
    handle_page_actions(policy, doc_ready);
318

    
319
    start_activity_info_server();
320
}
(2-2/4)