Project

General

Profile

Download (11.1 KB) Statistics
| Branch: | Tag: | Revision:

haketilo / content / main.js @ d658cadf

1
/**
2
 * Hachette main content script run in all frames
3
 *
4
 * Copyright (C) 2021 Wojtek Kosior
5
 * Copyright (C) 2021 jahoti
6
 * Redistribution terms are gathered in the `copyright' file.
7
 */
8

    
9
/*
10
 * IMPORTS_START
11
 * IMPORT handle_page_actions
12
 * IMPORT extract_signed
13
 * IMPORT sign_data
14
 * IMPORT gen_nonce
15
 * IMPORT is_privileged_url
16
 * IMPORT is_chrome
17
 * IMPORT is_mozilla
18
 * IMPORT start_activity_info_server
19
 * IMPORT make_csp_rule
20
 * IMPORT csp_header_regex
21
 * IMPORTS_END
22
 */
23

    
24
document.content_loaded = document.readyState === "complete";
25
const wait_loaded = e => e.content_loaded ? Promise.resolve() :
26
      new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true}));
27

    
28
wait_loaded(document).then(() => document.content_loaded = true);
29

    
30
function extract_cookie_policy(cookie, min_time)
31
{
32
    let best_result = {time: -1};
33
    let policy = null;
34
    const extracted_signatures = [];
35

    
36
    for (const match of cookie.matchAll(/hachette-(\w*)=([^;]*)/g)) {
37
	const new_result = extract_signed(...match.slice(1, 3));
38
	if (new_result.fail)
39
	    continue;
40

    
41
	extracted_signatures.push(match[1]);
42

    
43
	if (new_result.time < Math.max(min_time, best_result.time))
44
	    continue;
45

    
46
	/* This should succeed - it's our self-produced valid JSON. */
47
	const new_policy = JSON.parse(decodeURIComponent(new_result.data));
48
	if (new_policy.url !== document.URL)
49
	    continue;
50

    
51
	best_result = new_result;
52
	policy = new_policy;
53
    }
54

    
55
    return [policy, extracted_signatures];
56
}
57

    
58
function extract_url_policy(url, min_time)
59
{
60
    const [base_url, payload, anchor] =
61
	  /^([^#]*)#?([^#]*)(#?.*)$/.exec(url).splice(1, 4);
62

    
63
    const match = /^hachette_([^_]+)_(.*)$/.exec(payload);
64
    if (!match)
65
	return [null, url];
66

    
67
    const result = extract_signed(...match.slice(1, 3));
68
    if (result.fail)
69
	return [null, url];
70

    
71
    const original_url = base_url + anchor;
72
    const policy = result.time < min_time ? null :
73
	  JSON.parse(decodeURIComponent(result.data));
74

    
75
    return [policy.url === original_url ? policy : null, original_url];
76
}
77

    
78
function employ_nonhttp_policy(policy)
79
{
80
    if (!policy.allow)
81
	return;
82

    
83
    policy.nonce = gen_nonce();
84
    const [base_url, target] = /^([^#]*)(#?.*)$/.exec(policy.url).slice(1, 3);
85
    const encoded_policy = encodeURIComponent(JSON.stringify(policy));
86
    const payload = "hachette_" +
87
	  sign_data(encoded_policy, new Date().getTime()).join("_");
88
    const resulting_url = `${base_url}#${payload}${target}`;
89
    location.href = resulting_url;
90
    location.reload();
91
}
92

    
93
/*
94
 * In the case of HTML documents:
95
 * 1. When injecting some payload we need to sanitize <meta> CSP tags before
96
 *    they reach the document.
97
 * 2. Only <meta> tags inside <head> are considered valid by the browser and
98
 *    need to be considered.
99
 * 3. We want to detach <html> from document, wait until its <head> completes
100
 *    loading, sanitize it and re-attach <html>.
101
 * 4. We shall wait for anything to appear in or after <body> and take that as
102
 *    a sign <head> has finished loading.
103
 * 5. Otherwise, getting the `DOMContentLoaded' event on the document shall also
104
 *    be a sign that <head> is fully loaded.
105
 */
106

    
107
function make_body_start_observer(DOM_element, waiting)
108
{
109
    const observer = new MutationObserver(() => try_body_started(waiting));
110
    observer.observe(DOM_element, {childList: true});
111
    return observer;
112
}
113

    
114
function try_body_started(waiting)
115
{
116
    const body = waiting.detached_html.querySelector("body");
117

    
118
    if ((body && (body.firstChild || body.nextSibling)) ||
119
	waiting.doc.documentElement.nextSibling) {
120
	finish_waiting(waiting);
121
	return true;
122
    }
123

    
124
    if (body && waiting.observers.length < 2)
125
	waiting.observers.push(make_body_start_observer(body, waiting));
126
}
127

    
128
function finish_waiting(waiting)
129
{
130
    if (waiting.finished)
131
	return;
132
    waiting.finished = true;
133
    waiting.observers.forEach(observer => observer.disconnect());
134
    setTimeout(waiting.callback, 0);
135
}
136

    
137
function _wait_for_head(doc, detached_html, callback)
138
{
139
    const waiting = {doc, detached_html, callback, observers: []};
140

    
141
    if (try_body_started(waiting))
142
	return;
143

    
144
    waiting.observers = [make_body_start_observer(detached_html, waiting)];
145

    
146
    wait_loaded(doc).then(() => finish_waiting(waiting));
147
}
148

    
149
function wait_for_head(doc, detached_html)
150
{
151
    return new Promise(cb => _wait_for_head(doc, detached_html, cb));
152
}
153

    
154
const blocked_str = "blocked";
155

    
156
function block_attribute(node, attr, ns=null)
157
{
158
    const [hasa, geta, seta, rema] = ["has", "get", "set", "remove"]
159
	  .map(m => (n, ...args) => typeof ns === "string" ?
160
	       n[`${m}AttributeNS`](ns, ...args) : n[`${m}Attribute`](...args));
161
    /*
162
     * Disabling attributes by prepending `-blocked' allows them to still be
163
     * relatively easily accessed in case they contain some useful data.
164
     */
165
    const construct_name = [attr];
166
    while (hasa(node, construct_name.join("")))
167
	construct_name.unshift(blocked_str);
168

    
169
    while (construct_name.length > 1) {
170
	construct_name.shift();
171
	const name = construct_name.join("");
172
	seta(node, `${blocked_str}-${name}`, geta(node, name));
173
    }
174

    
175
    rema(node, attr);
176
}
177

    
178
/*
179
 * Used to disable `<script>'s and `<meta>'s that have not yet been added to
180
 * live DOM (doesn't work for those already added).
181
 */
182
function sanitize_meta(meta)
183
{
184
    if (csp_header_regex.test(meta.httpEquiv) && meta.content)
185
	block_attribute(meta, "content");
186
}
187

    
188
function sanitize_script(script)
189
{
190
    script.hachette_blocked_type = script.getAttribute("type");
191
    script.type = "text/plain";
192
}
193

    
194
/*
195
 * Executed after `<script>' has been connected to the DOM, when it is no longer
196
 * eligible for being executed by the browser.
197
 */
198
function desanitize_script(script)
199
{
200
    script.setAttribute("type", script.hachette_blocked_type);
201

    
202
    if ([null, undefined].includes(script.hachette_blocked_type))
203
	script.removeAttribute("type");
204

    
205
    delete script.hachette_blocked_type;
206
}
207

    
208
const bad_url_reg = /^data:([^,;]*ml|unknown-content-type)/i;
209
function sanitize_urls(element)
210
{
211
    for (const attr of [...element.attributes || []]
212
	       .filter(attr => /^(href|src|data)$/i.test(attr.localName))
213
	       .filter(attr => bad_url_reg.test(attr.value)))
214
	block_attribute(element, attr.localName, attr.namespaceURI);
215
}
216

    
217
function start_data_urls_sanitizing(doc)
218
{
219
    doc.querySelectorAll("*[href], *[src], *[data]").forEach(sanitize_urls);
220
    if (!doc.content_loaded) {
221
	const mutation_handler = m => m.addedNodes.forEach(sanitize_urls);
222
	const mo = new MutationObserver(ms => ms.forEach(mutation_handler));
223
	mo.observe(doc, {childList: true, subtree: true});
224
	wait_loaded(doc).then(() => mo.disconnect());
225
    }
226
}
227

    
228
/*
229
 * Normally, we block scripts with CSP. However, Mozilla does optimizations that
230
 * cause part of the DOM to be loaded when our content scripts get to run. Thus,
231
 * before the CSP rules we inject (for non-HTTP pages) become effective, we need
232
 * to somehow block the execution of `<script>'s and intrinsics that were
233
 * already there. Additionally, some browsers (IceCat 60) seem to have problems
234
 * applying this CSP to non-inline `<scripts>' in certain scenarios.
235
 */
236
function prevent_script_execution(event)
237
{
238
    if (!event.target._hachette_payload)
239
	event.preventDefault();
240
}
241

    
242
function mozilla_initial_block(doc)
243
{
244
    doc.addEventListener("beforescriptexecute", prevent_script_execution);
245

    
246
    for (const elem of doc.querySelectorAll("*")) {
247
	[...elem.attributes].map(attr => attr.localName)
248
	    .filter(attr => /^on/.test(attr) && elem.wrappedJSObject[attr])
249
	    .forEach(attr => elem.wrappedJSObject[attr] = null);
250
    }
251
}
252

    
253
/*
254
 * Here we block all scripts of a document which might be either and
255
 * HTMLDocument or an XMLDocument. Modifying an XML document might disrupt
256
 * Mozilla's XML preview. This is an unfortunate thing we have to accept for
257
 * now. XML documents *have to* be sanitized as well because they might
258
 * contain `<script>' tags (or on* attributes) with namespace declared as
259
 * "http://www.w3.org/1999/xhtml" or "http://www.w3.org/2000/svg" which allows
260
 * javascript execution.
261
 */
262
async function sanitize_document(doc, policy)
263
{
264
    /*
265
     * Blocking of scripts that are in the DOM from the beginning. Needed for
266
     * Mozilla.
267
     */
268
    if (is_mozilla)
269
	mozilla_initial_block(doc);
270

    
271
    /*
272
     * Ensure our CSP rules are employed from the beginning. This CSP injection
273
     * method is, when possible, going to be applied together with CSP rules
274
     * injected using webRequest.
275
     * Using elements namespaced as HTML makes this CSP injection also work for
276
     * non-HTML documents.
277
     */
278
    const html = new DOMParser().parseFromString(`<html><head><meta \
279
http-equiv="Content-Security-Policy" content="${make_csp_rule(policy)}"\
280
/></head><body>Loading...</body></html>`, "text/html").documentElement;
281

    
282
    /*
283
     * Root node gets hijacked now, to be re-attached after <head> is loaded
284
     * and sanitized.
285
     */
286
    const root = doc.documentElement;
287
    root.replaceWith(html);
288

    
289
    /*
290
     * When we don't inject payload, we neither block document's CSP `<meta>'
291
     * tags nor wait for `<head>' to be parsed.
292
     */
293
    if (policy.has_payload) {
294
	await wait_for_head(doc, root);
295

    
296
	root.querySelectorAll("head meta")
297
	    .forEach(m => sanitize_meta(m, policy));
298
    }
299

    
300
    root.querySelectorAll("script").forEach(s => sanitize_script(s, policy));
301
    html.replaceWith(root);
302
    root.querySelectorAll("script").forEach(s => desanitize_script(s, policy));
303

    
304
    start_data_urls_sanitizing(doc);
305
}
306

    
307
async function disable_service_workers()
308
{
309
    if (!navigator.serviceWorker)
310
	return;
311

    
312
    const registrations = await navigator.serviceWorker.getRegistrations();
313
    if (registrations.length === 0)
314
	return;
315

    
316
    console.warn("Service Workers detected on this page! Unregistering and reloading");
317

    
318
    try {
319
	await Promise.all(registrations.map(r => r.unregister()));
320
    } finally {
321
	location.reload();
322
    }
323

    
324
    /* Never actually return! */
325
    return new Promise(() => 0);
326
}
327

    
328
if (!is_privileged_url(document.URL)) {
329
    let policy_received_callback = () => undefined;
330
    let policy;
331

    
332
    /* Signature valid for half an hour. */
333
    const min_time = new Date().getTime() - 1800 * 1000;
334

    
335
    if (/^https?:/.test(document.URL)) {
336
	let signatures;
337
	[policy, signatures] = extract_cookie_policy(document.cookie, min_time);
338
	for (const signature of signatures)
339
	    document.cookie = `hachette-${signature}=; Max-Age=-1;`;
340
    } else {
341
	const scheme = /^([^:]*)/.exec(document.URL)[1];
342
	const known_scheme = ["file", "ftp"].includes(scheme);
343

    
344
	if (!known_scheme)
345
	    console.warn(`Unknown url scheme: \`${scheme}'!`);
346

    
347
	let original_url;
348
	[policy, original_url] = extract_url_policy(document.URL, min_time);
349
	history.replaceState(null, "", original_url);
350

    
351
	if (known_scheme && !policy)
352
	    policy_received_callback = employ_nonhttp_policy;
353
    }
354

    
355
    if (!policy) {
356
	console.debug("Using fallback policy!");
357
	policy = {allow: false, nonce: gen_nonce()};
358
    }
359

    
360
    if (!(document instanceof HTMLDocument))
361
	policy.has_payload = false;
362

    
363
    console.debug("current policy", policy);
364

    
365
    const doc_ready = Promise.all([
366
	policy.allow ? Promise.resolve() : sanitize_document(document, policy),
367
	policy.allow ? Promise.resolve() : disable_service_workers(),
368
	wait_loaded(document)
369
    ]);
370

    
371
    handle_page_actions(policy.nonce, policy_received_callback, doc_ready);
372

    
373
    start_activity_info_server();
374
}
(2-2/4)