Project

General

Profile

Download (12.9 KB) Statistics
| Branch: | Tag: | Revision:

haketilo / content / main.js @ 263d03d5

1
/**
2
 * This file is part of Haketilo.
3
 *
4
 * Function: Main content script that runs in all frames.
5
 *
6
 * Copyright (C) 2021 Wojtek Kosior
7
 * Copyright (C) 2021 jahoti
8
 *
9
 * This program is free software: you can redistribute it and/or modify
10
 * it under the terms of the GNU General Public License as published by
11
 * the Free Software Foundation, either version 3 of the License, or
12
 * (at your option) any later version.
13
 *
14
 * This program is distributed in the hope that it will be useful,
15
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
 * GNU General Public License for more details.
18
 *
19
 * As additional permission under GNU GPL version 3 section 7, you
20
 * may distribute forms of that code without the copy of the GNU
21
 * GPL normally required by section 4, provided you include this
22
 * license notice and, in case of non-source distribution, a URL
23
 * through which recipients can access the Corresponding Source.
24
 * If you modify file(s) with this exception, you may extend this
25
 * exception to your version of the file(s), but you are not
26
 * obligated to do so. If you do not wish to do so, delete this
27
 * exception statement from your version.
28
 *
29
 * As a special exception to the GPL, any HTML file which merely
30
 * makes function calls to this code, and for that purpose
31
 * includes it by reference shall be deemed a separate work for
32
 * copyright law purposes. If you modify this code, you may extend
33
 * this exception to your version of the code, but you are not
34
 * obligated to do so. If you do not wish to do so, delete this
35
 * exception statement from your version.
36
 *
37
 * You should have received a copy of the GNU General Public License
38
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
39
 *
40
 * I, Wojtek Kosior, thereby promise not to sue for violation of this file's
41
 * license. Although I request that you do not make use this code in a
42
 * proprietary program, I am not going to enforce this in court.
43
 */
44

    
45
/*
46
 * IMPORTS_START
47
 * IMPORT handle_page_actions
48
 * IMPORT extract_signed
49
 * IMPORT sign_data
50
 * IMPORT gen_nonce
51
 * IMPORT is_privileged_url
52
 * IMPORT is_chrome
53
 * IMPORT is_mozilla
54
 * IMPORT start_activity_info_server
55
 * IMPORT make_csp_rule
56
 * IMPORT csp_header_regex
57
 * IMPORTS_END
58
 */
59

    
60
document.content_loaded = document.readyState === "complete";
61
const wait_loaded = e => e.content_loaded ? Promise.resolve() :
62
      new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true}));
63

    
64
wait_loaded(document).then(() => document.content_loaded = true);
65

    
66
function extract_cookie_policy(cookie, min_time)
67
{
68
    let best_result = {time: -1};
69
    let policy = null;
70
    const extracted_signatures = [];
71

    
72
    for (const match of cookie.matchAll(/haketilo-(\w*)=([^;]*)/g)) {
73
	const new_result = extract_signed(...match.slice(1, 3));
74
	if (new_result.fail)
75
	    continue;
76

    
77
	extracted_signatures.push(match[1]);
78

    
79
	if (new_result.time < Math.max(min_time, best_result.time))
80
	    continue;
81

    
82
	/* This should succeed - it's our self-produced valid JSON. */
83
	const new_policy = JSON.parse(decodeURIComponent(new_result.data));
84
	if (new_policy.url !== document.URL)
85
	    continue;
86

    
87
	best_result = new_result;
88
	policy = new_policy;
89
    }
90

    
91
    return [policy, extracted_signatures];
92
}
93

    
94
function extract_url_policy(url, min_time)
95
{
96
    const [base_url, payload, anchor] =
97
	  /^([^#]*)#?([^#]*)(#?.*)$/.exec(url).splice(1, 4);
98

    
99
    const match = /^haketilo_([^_]+)_(.*)$/.exec(payload);
100
    if (!match)
101
	return [null, url];
102

    
103
    const result = extract_signed(...match.slice(1, 3));
104
    if (result.fail)
105
	return [null, url];
106

    
107
    const original_url = base_url + anchor;
108
    const policy = result.time < min_time ? null :
109
	  JSON.parse(decodeURIComponent(result.data));
110

    
111
    return [policy.url === original_url ? policy : null, original_url];
112
}
113

    
114
function employ_nonhttp_policy(policy)
115
{
116
    if (!policy.allow)
117
	return;
118

    
119
    policy.nonce = gen_nonce();
120
    const [base_url, target] = /^([^#]*)(#?.*)$/.exec(policy.url).slice(1, 3);
121
    const encoded_policy = encodeURIComponent(JSON.stringify(policy));
122
    const payload = "haketilo_" +
123
	  sign_data(encoded_policy, new Date().getTime()).join("_");
124
    const resulting_url = `${base_url}#${payload}${target}`;
125
    location.href = resulting_url;
126
    location.reload();
127
}
128

    
129
/*
130
 * In the case of HTML documents:
131
 * 1. When injecting some payload we need to sanitize <meta> CSP tags before
132
 *    they reach the document.
133
 * 2. Only <meta> tags inside <head> are considered valid by the browser and
134
 *    need to be considered.
135
 * 3. We want to detach <html> from document, wait until its <head> completes
136
 *    loading, sanitize it and re-attach <html>.
137
 * 4. We shall wait for anything to appear in or after <body> and take that as
138
 *    a sign <head> has finished loading.
139
 * 5. Otherwise, getting the `DOMContentLoaded' event on the document shall also
140
 *    be a sign that <head> is fully loaded.
141
 */
142

    
143
function make_body_start_observer(DOM_element, waiting)
144
{
145
    const observer = new MutationObserver(() => try_body_started(waiting));
146
    observer.observe(DOM_element, {childList: true});
147
    return observer;
148
}
149

    
150
function try_body_started(waiting)
151
{
152
    const body = waiting.detached_html.querySelector("body");
153

    
154
    if ((body && (body.firstChild || body.nextSibling)) ||
155
	waiting.doc.documentElement.nextSibling) {
156
	finish_waiting(waiting);
157
	return true;
158
    }
159

    
160
    if (body && waiting.observers.length < 2)
161
	waiting.observers.push(make_body_start_observer(body, waiting));
162
}
163

    
164
function finish_waiting(waiting)
165
{
166
    if (waiting.finished)
167
	return;
168
    waiting.finished = true;
169
    waiting.observers.forEach(observer => observer.disconnect());
170
    setTimeout(waiting.callback, 0);
171
}
172

    
173
function _wait_for_head(doc, detached_html, callback)
174
{
175
    const waiting = {doc, detached_html, callback, observers: []};
176

    
177
    if (try_body_started(waiting))
178
	return;
179

    
180
    waiting.observers = [make_body_start_observer(detached_html, waiting)];
181

    
182
    wait_loaded(doc).then(() => finish_waiting(waiting));
183
}
184

    
185
function wait_for_head(doc, detached_html)
186
{
187
    return new Promise(cb => _wait_for_head(doc, detached_html, cb));
188
}
189

    
190
const blocked_str = "blocked";
191

    
192
function block_attribute(node, attr, ns=null)
193
{
194
    const [hasa, geta, seta, rema] = ["has", "get", "set", "remove"]
195
	  .map(m => (n, ...args) => typeof ns === "string" ?
196
	       n[`${m}AttributeNS`](ns, ...args) : n[`${m}Attribute`](...args));
197
    /*
198
     * Disabling attributes by prepending `-blocked' allows them to still be
199
     * relatively easily accessed in case they contain some useful data.
200
     */
201
    const construct_name = [attr];
202
    while (hasa(node, construct_name.join("")))
203
	construct_name.unshift(blocked_str);
204

    
205
    while (construct_name.length > 1) {
206
	construct_name.shift();
207
	const name = construct_name.join("");
208
	seta(node, `${blocked_str}-${name}`, geta(node, name));
209
    }
210

    
211
    rema(node, attr);
212
}
213

    
214
/*
215
 * Used to disable `<script>'s and `<meta>'s that have not yet been added to
216
 * live DOM (doesn't work for those already added).
217
 */
218
function sanitize_meta(meta)
219
{
220
    if (csp_header_regex.test(meta.httpEquiv) && meta.content)
221
	block_attribute(meta, "content");
222
}
223

    
224
function sanitize_script(script)
225
{
226
    script.haketilo_blocked_type = script.getAttribute("type");
227
    script.type = "text/plain";
228
}
229

    
230
/*
231
 * Executed after `<script>' has been connected to the DOM, when it is no longer
232
 * eligible for being executed by the browser.
233
 */
234
function desanitize_script(script)
235
{
236
    script.setAttribute("type", script.haketilo_blocked_type);
237

    
238
    if ([null, undefined].includes(script.haketilo_blocked_type))
239
	script.removeAttribute("type");
240

    
241
    delete script.haketilo_blocked_type;
242
}
243

    
244
const bad_url_reg = /^data:([^,;]*ml|unknown-content-type)/i;
245
function sanitize_urls(element)
246
{
247
    for (const attr of [...element.attributes || []]
248
	       .filter(attr => /^(href|src|data)$/i.test(attr.localName))
249
	       .filter(attr => bad_url_reg.test(attr.value)))
250
	block_attribute(element, attr.localName, attr.namespaceURI);
251
}
252

    
253
function start_data_urls_sanitizing(doc)
254
{
255
    doc.querySelectorAll("*[href], *[src], *[data]").forEach(sanitize_urls);
256
    if (!doc.content_loaded) {
257
	const mutation_handler = m => m.addedNodes.forEach(sanitize_urls);
258
	const mo = new MutationObserver(ms => ms.forEach(mutation_handler));
259
	mo.observe(doc, {childList: true, subtree: true});
260
	wait_loaded(doc).then(() => mo.disconnect());
261
    }
262
}
263

    
264
/*
265
 * Normally, we block scripts with CSP. However, Mozilla does optimizations that
266
 * cause part of the DOM to be loaded when our content scripts get to run. Thus,
267
 * before the CSP rules we inject (for non-HTTP pages) become effective, we need
268
 * to somehow block the execution of `<script>'s and intrinsics that were
269
 * already there. Additionally, some browsers (IceCat 60) seem to have problems
270
 * applying this CSP to non-inline `<scripts>' in certain scenarios.
271
 */
272
function prevent_script_execution(event)
273
{
274
    if (!event.target.haketilo_payload)
275
	event.preventDefault();
276
}
277

    
278
function mozilla_initial_block(doc)
279
{
280
    doc.addEventListener("beforescriptexecute", prevent_script_execution);
281

    
282
    for (const elem of doc.querySelectorAll("*")) {
283
	[...elem.attributes].map(attr => attr.localName)
284
	    .filter(attr => /^on/.test(attr) && elem.wrappedJSObject[attr])
285
	    .forEach(attr => elem.wrappedJSObject[attr] = null);
286
    }
287
}
288

    
289
/*
290
 * Here we block all scripts of a document which might be either and
291
 * HTMLDocument or an XMLDocument. Modifying an XML document might disrupt
292
 * Mozilla's XML preview. This is an unfortunate thing we have to accept for
293
 * now. XML documents *have to* be sanitized as well because they might
294
 * contain `<script>' tags (or on* attributes) with namespace declared as
295
 * "http://www.w3.org/1999/xhtml" or "http://www.w3.org/2000/svg" which allows
296
 * javascript execution.
297
 */
298
async function sanitize_document(doc, policy)
299
{
300
    /*
301
     * Blocking of scripts that are in the DOM from the beginning. Needed for
302
     * Mozilla.
303
     */
304
    if (is_mozilla)
305
	mozilla_initial_block(doc);
306

    
307
    /*
308
     * Ensure our CSP rules are employed from the beginning. This CSP injection
309
     * method is, when possible, going to be applied together with CSP rules
310
     * injected using webRequest.
311
     * Using elements namespaced as HTML makes this CSP injection also work for
312
     * non-HTML documents.
313
     */
314
    const html = new DOMParser().parseFromString(`<html><head><meta \
315
http-equiv="Content-Security-Policy" content="${make_csp_rule(policy)}"\
316
/></head><body>Loading...</body></html>`, "text/html").documentElement;
317

    
318
    /*
319
     * Root node gets hijacked now, to be re-attached after <head> is loaded
320
     * and sanitized.
321
     */
322
    const root = doc.documentElement;
323
    root.replaceWith(html);
324

    
325
    /*
326
     * When we don't inject payload, we neither block document's CSP `<meta>'
327
     * tags nor wait for `<head>' to be parsed.
328
     */
329
    if (policy.has_payload) {
330
	await wait_for_head(doc, root);
331

    
332
	root.querySelectorAll("head meta")
333
	    .forEach(m => sanitize_meta(m, policy));
334
    }
335

    
336
    root.querySelectorAll("script").forEach(s => sanitize_script(s, policy));
337
    html.replaceWith(root);
338
    root.querySelectorAll("script").forEach(s => desanitize_script(s, policy));
339

    
340
    start_data_urls_sanitizing(doc);
341
}
342

    
343
async function disable_service_workers()
344
{
345
    if (!navigator.serviceWorker)
346
	return;
347

    
348
    const registrations = await navigator.serviceWorker.getRegistrations();
349
    if (registrations.length === 0)
350
	return;
351

    
352
    console.warn("Service Workers detected on this page! Unregistering and reloading");
353

    
354
    try {
355
	await Promise.all(registrations.map(r => r.unregister()));
356
    } finally {
357
	location.reload();
358
    }
359

    
360
    /* Never actually return! */
361
    return new Promise(() => 0);
362
}
363

    
364
if (!is_privileged_url(document.URL)) {
365
    let policy_received_callback = () => undefined;
366
    let policy;
367

    
368
    /* Signature valid for half an hour. */
369
    const min_time = new Date().getTime() - 1800 * 1000;
370

    
371
    if (/^https?:/.test(document.URL)) {
372
	let signatures;
373
	[policy, signatures] = extract_cookie_policy(document.cookie, min_time);
374
	for (const signature of signatures)
375
	    document.cookie = `haketilo-${signature}=; Max-Age=-1;`;
376
    } else {
377
	const scheme = /^([^:]*)/.exec(document.URL)[1];
378
	const known_scheme = ["file", "ftp"].includes(scheme);
379

    
380
	if (!known_scheme)
381
	    console.warn(`Unknown url scheme: \`${scheme}'!`);
382

    
383
	let original_url;
384
	[policy, original_url] = extract_url_policy(document.URL, min_time);
385
	history.replaceState(null, "", original_url);
386

    
387
	if (known_scheme && !policy)
388
	    policy_received_callback = employ_nonhttp_policy;
389
    }
390

    
391
    if (!policy) {
392
	console.debug("Using fallback policy!");
393
	policy = {allow: false, nonce: gen_nonce()};
394
    }
395

    
396
    if (!(document instanceof HTMLDocument))
397
	policy.has_payload = false;
398

    
399
    console.debug("current policy", policy);
400

    
401
    const doc_ready = Promise.all([
402
	policy.allow ? Promise.resolve() : sanitize_document(document, policy),
403
	policy.allow ? Promise.resolve() : disable_service_workers(),
404
	wait_loaded(document)
405
    ]);
406

    
407
    handle_page_actions(policy.nonce, policy_received_callback, doc_ready);
408

    
409
    start_activity_info_server();
410
}
(2-2/4)