Project

General

Profile

Download (11.1 KB) Statistics
| Branch: | Tag: | Revision:

haketilo / content / main.js @ 2bd35bc4

1
/**
2
 * This file is part of Haketilo.
3
 *
4
 * Function: Main content script that runs in all frames.
5
 *
6
 * Copyright (C) 2021 Wojtek Kosior
7
 * Copyright (C) 2021 jahoti
8
 * Redistribution terms are gathered in the `copyright' file.
9
 */
10

    
11
/*
12
 * IMPORTS_START
13
 * IMPORT handle_page_actions
14
 * IMPORT extract_signed
15
 * IMPORT sign_data
16
 * IMPORT gen_nonce
17
 * IMPORT is_privileged_url
18
 * IMPORT is_chrome
19
 * IMPORT is_mozilla
20
 * IMPORT start_activity_info_server
21
 * IMPORT make_csp_rule
22
 * IMPORT csp_header_regex
23
 * IMPORTS_END
24
 */
25

    
26
document.content_loaded = document.readyState === "complete";
27
const wait_loaded = e => e.content_loaded ? Promise.resolve() :
28
      new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true}));
29

    
30
wait_loaded(document).then(() => document.content_loaded = true);
31

    
32
function extract_cookie_policy(cookie, min_time)
33
{
34
    let best_result = {time: -1};
35
    let policy = null;
36
    const extracted_signatures = [];
37

    
38
    for (const match of cookie.matchAll(/haketilo-(\w*)=([^;]*)/g)) {
39
	const new_result = extract_signed(...match.slice(1, 3));
40
	if (new_result.fail)
41
	    continue;
42

    
43
	extracted_signatures.push(match[1]);
44

    
45
	if (new_result.time < Math.max(min_time, best_result.time))
46
	    continue;
47

    
48
	/* This should succeed - it's our self-produced valid JSON. */
49
	const new_policy = JSON.parse(decodeURIComponent(new_result.data));
50
	if (new_policy.url !== document.URL)
51
	    continue;
52

    
53
	best_result = new_result;
54
	policy = new_policy;
55
    }
56

    
57
    return [policy, extracted_signatures];
58
}
59

    
60
function extract_url_policy(url, min_time)
61
{
62
    const [base_url, payload, anchor] =
63
	  /^([^#]*)#?([^#]*)(#?.*)$/.exec(url).splice(1, 4);
64

    
65
    const match = /^haketilo_([^_]+)_(.*)$/.exec(payload);
66
    if (!match)
67
	return [null, url];
68

    
69
    const result = extract_signed(...match.slice(1, 3));
70
    if (result.fail)
71
	return [null, url];
72

    
73
    const original_url = base_url + anchor;
74
    const policy = result.time < min_time ? null :
75
	  JSON.parse(decodeURIComponent(result.data));
76

    
77
    return [policy.url === original_url ? policy : null, original_url];
78
}
79

    
80
function employ_nonhttp_policy(policy)
81
{
82
    if (!policy.allow)
83
	return;
84

    
85
    policy.nonce = gen_nonce();
86
    const [base_url, target] = /^([^#]*)(#?.*)$/.exec(policy.url).slice(1, 3);
87
    const encoded_policy = encodeURIComponent(JSON.stringify(policy));
88
    const payload = "haketilo_" +
89
	  sign_data(encoded_policy, new Date().getTime()).join("_");
90
    const resulting_url = `${base_url}#${payload}${target}`;
91
    location.href = resulting_url;
92
    location.reload();
93
}
94

    
95
/*
96
 * In the case of HTML documents:
97
 * 1. When injecting some payload we need to sanitize <meta> CSP tags before
98
 *    they reach the document.
99
 * 2. Only <meta> tags inside <head> are considered valid by the browser and
100
 *    need to be considered.
101
 * 3. We want to detach <html> from document, wait until its <head> completes
102
 *    loading, sanitize it and re-attach <html>.
103
 * 4. We shall wait for anything to appear in or after <body> and take that as
104
 *    a sign <head> has finished loading.
105
 * 5. Otherwise, getting the `DOMContentLoaded' event on the document shall also
106
 *    be a sign that <head> is fully loaded.
107
 */
108

    
109
function make_body_start_observer(DOM_element, waiting)
110
{
111
    const observer = new MutationObserver(() => try_body_started(waiting));
112
    observer.observe(DOM_element, {childList: true});
113
    return observer;
114
}
115

    
116
function try_body_started(waiting)
117
{
118
    const body = waiting.detached_html.querySelector("body");
119

    
120
    if ((body && (body.firstChild || body.nextSibling)) ||
121
	waiting.doc.documentElement.nextSibling) {
122
	finish_waiting(waiting);
123
	return true;
124
    }
125

    
126
    if (body && waiting.observers.length < 2)
127
	waiting.observers.push(make_body_start_observer(body, waiting));
128
}
129

    
130
function finish_waiting(waiting)
131
{
132
    if (waiting.finished)
133
	return;
134
    waiting.finished = true;
135
    waiting.observers.forEach(observer => observer.disconnect());
136
    setTimeout(waiting.callback, 0);
137
}
138

    
139
function _wait_for_head(doc, detached_html, callback)
140
{
141
    const waiting = {doc, detached_html, callback, observers: []};
142

    
143
    if (try_body_started(waiting))
144
	return;
145

    
146
    waiting.observers = [make_body_start_observer(detached_html, waiting)];
147

    
148
    wait_loaded(doc).then(() => finish_waiting(waiting));
149
}
150

    
151
function wait_for_head(doc, detached_html)
152
{
153
    return new Promise(cb => _wait_for_head(doc, detached_html, cb));
154
}
155

    
156
const blocked_str = "blocked";
157

    
158
function block_attribute(node, attr, ns=null)
159
{
160
    const [hasa, geta, seta, rema] = ["has", "get", "set", "remove"]
161
	  .map(m => (n, ...args) => typeof ns === "string" ?
162
	       n[`${m}AttributeNS`](ns, ...args) : n[`${m}Attribute`](...args));
163
    /*
164
     * Disabling attributes by prepending `-blocked' allows them to still be
165
     * relatively easily accessed in case they contain some useful data.
166
     */
167
    const construct_name = [attr];
168
    while (hasa(node, construct_name.join("")))
169
	construct_name.unshift(blocked_str);
170

    
171
    while (construct_name.length > 1) {
172
	construct_name.shift();
173
	const name = construct_name.join("");
174
	seta(node, `${blocked_str}-${name}`, geta(node, name));
175
    }
176

    
177
    rema(node, attr);
178
}
179

    
180
/*
181
 * Used to disable `<script>'s and `<meta>'s that have not yet been added to
182
 * live DOM (doesn't work for those already added).
183
 */
184
function sanitize_meta(meta)
185
{
186
    if (csp_header_regex.test(meta.httpEquiv) && meta.content)
187
	block_attribute(meta, "content");
188
}
189

    
190
function sanitize_script(script)
191
{
192
    script.haketilo_blocked_type = script.getAttribute("type");
193
    script.type = "text/plain";
194
}
195

    
196
/*
197
 * Executed after `<script>' has been connected to the DOM, when it is no longer
198
 * eligible for being executed by the browser.
199
 */
200
function desanitize_script(script)
201
{
202
    script.setAttribute("type", script.haketilo_blocked_type);
203

    
204
    if ([null, undefined].includes(script.haketilo_blocked_type))
205
	script.removeAttribute("type");
206

    
207
    delete script.haketilo_blocked_type;
208
}
209

    
210
const bad_url_reg = /^data:([^,;]*ml|unknown-content-type)/i;
211
function sanitize_urls(element)
212
{
213
    for (const attr of [...element.attributes || []]
214
	       .filter(attr => /^(href|src|data)$/i.test(attr.localName))
215
	       .filter(attr => bad_url_reg.test(attr.value)))
216
	block_attribute(element, attr.localName, attr.namespaceURI);
217
}
218

    
219
function start_data_urls_sanitizing(doc)
220
{
221
    doc.querySelectorAll("*[href], *[src], *[data]").forEach(sanitize_urls);
222
    if (!doc.content_loaded) {
223
	const mutation_handler = m => m.addedNodes.forEach(sanitize_urls);
224
	const mo = new MutationObserver(ms => ms.forEach(mutation_handler));
225
	mo.observe(doc, {childList: true, subtree: true});
226
	wait_loaded(doc).then(() => mo.disconnect());
227
    }
228
}
229

    
230
/*
231
 * Normally, we block scripts with CSP. However, Mozilla does optimizations that
232
 * cause part of the DOM to be loaded when our content scripts get to run. Thus,
233
 * before the CSP rules we inject (for non-HTTP pages) become effective, we need
234
 * to somehow block the execution of `<script>'s and intrinsics that were
235
 * already there. Additionally, some browsers (IceCat 60) seem to have problems
236
 * applying this CSP to non-inline `<scripts>' in certain scenarios.
237
 */
238
function prevent_script_execution(event)
239
{
240
    if (!event.target.haketilo_payload)
241
	event.preventDefault();
242
}
243

    
244
function mozilla_initial_block(doc)
245
{
246
    doc.addEventListener("beforescriptexecute", prevent_script_execution);
247

    
248
    for (const elem of doc.querySelectorAll("*")) {
249
	[...elem.attributes].map(attr => attr.localName)
250
	    .filter(attr => /^on/.test(attr) && elem.wrappedJSObject[attr])
251
	    .forEach(attr => elem.wrappedJSObject[attr] = null);
252
    }
253
}
254

    
255
/*
256
 * Here we block all scripts of a document which might be either and
257
 * HTMLDocument or an XMLDocument. Modifying an XML document might disrupt
258
 * Mozilla's XML preview. This is an unfortunate thing we have to accept for
259
 * now. XML documents *have to* be sanitized as well because they might
260
 * contain `<script>' tags (or on* attributes) with namespace declared as
261
 * "http://www.w3.org/1999/xhtml" or "http://www.w3.org/2000/svg" which allows
262
 * javascript execution.
263
 */
264
async function sanitize_document(doc, policy)
265
{
266
    /*
267
     * Blocking of scripts that are in the DOM from the beginning. Needed for
268
     * Mozilla.
269
     */
270
    if (is_mozilla)
271
	mozilla_initial_block(doc);
272

    
273
    /*
274
     * Ensure our CSP rules are employed from the beginning. This CSP injection
275
     * method is, when possible, going to be applied together with CSP rules
276
     * injected using webRequest.
277
     * Using elements namespaced as HTML makes this CSP injection also work for
278
     * non-HTML documents.
279
     */
280
    const html = new DOMParser().parseFromString(`<html><head><meta \
281
http-equiv="Content-Security-Policy" content="${make_csp_rule(policy)}"\
282
/></head><body>Loading...</body></html>`, "text/html").documentElement;
283

    
284
    /*
285
     * Root node gets hijacked now, to be re-attached after <head> is loaded
286
     * and sanitized.
287
     */
288
    const root = doc.documentElement;
289
    root.replaceWith(html);
290

    
291
    /*
292
     * When we don't inject payload, we neither block document's CSP `<meta>'
293
     * tags nor wait for `<head>' to be parsed.
294
     */
295
    if (policy.has_payload) {
296
	await wait_for_head(doc, root);
297

    
298
	root.querySelectorAll("head meta")
299
	    .forEach(m => sanitize_meta(m, policy));
300
    }
301

    
302
    root.querySelectorAll("script").forEach(s => sanitize_script(s, policy));
303
    html.replaceWith(root);
304
    root.querySelectorAll("script").forEach(s => desanitize_script(s, policy));
305

    
306
    start_data_urls_sanitizing(doc);
307
}
308

    
309
async function disable_service_workers()
310
{
311
    if (!navigator.serviceWorker)
312
	return;
313

    
314
    const registrations = await navigator.serviceWorker.getRegistrations();
315
    if (registrations.length === 0)
316
	return;
317

    
318
    console.warn("Service Workers detected on this page! Unregistering and reloading");
319

    
320
    try {
321
	await Promise.all(registrations.map(r => r.unregister()));
322
    } finally {
323
	location.reload();
324
    }
325

    
326
    /* Never actually return! */
327
    return new Promise(() => 0);
328
}
329

    
330
if (!is_privileged_url(document.URL)) {
331
    let policy_received_callback = () => undefined;
332
    let policy;
333

    
334
    /* Signature valid for half an hour. */
335
    const min_time = new Date().getTime() - 1800 * 1000;
336

    
337
    if (/^https?:/.test(document.URL)) {
338
	let signatures;
339
	[policy, signatures] = extract_cookie_policy(document.cookie, min_time);
340
	for (const signature of signatures)
341
	    document.cookie = `haketilo-${signature}=; Max-Age=-1;`;
342
    } else {
343
	const scheme = /^([^:]*)/.exec(document.URL)[1];
344
	const known_scheme = ["file", "ftp"].includes(scheme);
345

    
346
	if (!known_scheme)
347
	    console.warn(`Unknown url scheme: \`${scheme}'!`);
348

    
349
	let original_url;
350
	[policy, original_url] = extract_url_policy(document.URL, min_time);
351
	history.replaceState(null, "", original_url);
352

    
353
	if (known_scheme && !policy)
354
	    policy_received_callback = employ_nonhttp_policy;
355
    }
356

    
357
    if (!policy) {
358
	console.debug("Using fallback policy!");
359
	policy = {allow: false, nonce: gen_nonce()};
360
    }
361

    
362
    if (!(document instanceof HTMLDocument))
363
	policy.has_payload = false;
364

    
365
    console.debug("current policy", policy);
366

    
367
    const doc_ready = Promise.all([
368
	policy.allow ? Promise.resolve() : sanitize_document(document, policy),
369
	policy.allow ? Promise.resolve() : disable_service_workers(),
370
	wait_loaded(document)
371
    ]);
372

    
373
    handle_page_actions(policy.nonce, policy_received_callback, doc_ready);
374

    
375
    start_activity_info_server();
376
}
(2-2/4)