Project

General

Profile

Download (10.5 KB) Statistics
| Branch: | Tag: | Revision:

haketilo / content / main.js @ e2d26bad

1
/**
2
 * Hachette main content script run in all frames
3
 *
4
 * Copyright (C) 2021 Wojtek Kosior
5
 * Copyright (C) 2021 jahoti
6
 * Redistribution terms are gathered in the `copyright' file.
7
 */
8

    
9
/*
10
 * IMPORTS_START
11
 * IMPORT handle_page_actions
12
 * IMPORT extract_signed
13
 * IMPORT sign_data
14
 * IMPORT gen_nonce
15
 * IMPORT is_privileged_url
16
 * IMPORT is_chrome
17
 * IMPORT is_mozilla
18
 * IMPORT start_activity_info_server
19
 * IMPORT csp_rule
20
 * IMPORT is_csp_header_name
21
 * IMPORT sanitize_csp_header
22
 * IMPORTS_END
23
 */
24

    
25
document.content_loaded = document.readyState === "complete";
26
const wait_loaded = e => e.content_loaded ? Promise.resolve() :
27
      new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true}));
28

    
29
wait_loaded(document).then(() => document.content_loaded = true);
30

    
31
function extract_cookie_policy(cookie, min_time)
32
{
33
    let best_result = {time: -1};
34
    let policy = null;
35
    const extracted_signatures = [];
36

    
37
    for (const match of cookie.matchAll(/hachette-(\w*)=([^;]*)/g)) {
38
	const new_result = extract_signed(...match.slice(1, 3));
39
	if (new_result.fail)
40
	    continue;
41

    
42
	extracted_signatures.push(match[1]);
43

    
44
	if (new_result.time < Math.max(min_time, best_result.time))
45
	    continue;
46

    
47
	/* This should succeed - it's our self-produced valid JSON. */
48
	const new_policy = JSON.parse(decodeURIComponent(new_result.data));
49
	if (new_policy.url !== document.URL)
50
	    continue;
51

    
52
	best_result = new_result;
53
	policy = new_policy;
54
    }
55

    
56
    return [policy, extracted_signatures];
57
}
58

    
59
function extract_url_policy(url, min_time)
60
{
61
    const [base_url, payload, anchor] =
62
	  /^([^#]*)#?([^#]*)(#?.*)$/.exec(url).splice(1, 4);
63

    
64
    const match = /^hachette_([^_]+)_(.*)$/.exec(payload);
65
    if (!match)
66
	return [null, url];
67

    
68
    const result = extract_signed(...match.slice(1, 3));
69
    if (result.fail)
70
	return [null, url];
71

    
72
    const original_url = base_url + anchor;
73
    const policy = result.time < min_time ? null :
74
	  JSON.parse(decodeURIComponent(result.data));
75

    
76
    return [policy.url === original_url ? policy : null, original_url];
77
}
78

    
79
function employ_nonhttp_policy(policy)
80
{
81
    if (!policy.allow)
82
	return;
83

    
84
    policy.nonce = gen_nonce();
85
    const [base_url, target] = /^([^#]*)(#?.*)$/.exec(policy.url).slice(1, 3);
86
    const encoded_policy = encodeURIComponent(JSON.stringify(policy));
87
    const payload = "hachette_" +
88
	  sign_data(encoded_policy, new Date().getTime()).join("_");
89
    const resulting_url = `${base_url}#${payload}${target}`;
90
    location.href = resulting_url;
91
    location.reload();
92
}
93

    
94
/*
95
 * In the case of HTML documents:
96
 * 1. When injecting some payload we need to sanitize <meta> CSP tags before
97
 *    they reach the document.
98
 * 2. Only <meta> tags inside <head> are considered valid by the browser and
99
 *    need to be considered.
100
 * 3. We want to detach <html> from document, wait until its <head> completes
101
 *    loading, sanitize it and re-attach <html>.
102
 * 4. We shall wait for anything to appear in or after <body> and take that as
103
 *    a sign <head> has finished loading.
104
 * 5. Otherwise, getting the `DOMContentLoaded' event on the document shall also
105
 *    be a sign that <head> is fully loaded.
106
 */
107

    
108
function make_body_start_observer(DOM_element, waiting)
109
{
110
    const observer = new MutationObserver(() => try_body_started(waiting));
111
    observer.observe(DOM_element, {childList: true});
112
    return observer;
113
}
114

    
115
function try_body_started(waiting)
116
{
117
    const body = waiting.detached_html.querySelector("body");
118

    
119
    if ((body && (body.firstChild || body.nextSibling)) ||
120
	waiting.doc.documentElement.nextSibling) {
121
	finish_waiting(waiting);
122
	return true;
123
    }
124

    
125
    if (body && waiting.observers.length < 2)
126
	waiting.observers.push(make_body_start_observer(body, waiting));
127
}
128

    
129
function finish_waiting(waiting)
130
{
131
    if (waiting.finished)
132
	return;
133
    waiting.finished = true;
134
    waiting.observers.forEach(observer => observer.disconnect());
135
    setTimeout(waiting.callback, 0);
136
}
137

    
138
function _wait_for_head(doc, detached_html, callback)
139
{
140
    const waiting = {doc, detached_html, callback, observers: []};
141

    
142
    if (try_body_started(waiting))
143
	return;
144

    
145
    waiting.observers = [make_body_start_observer(detached_html, waiting)];
146

    
147
    wait_loaded(doc).then(() => finish_waiting(waiting));
148
}
149

    
150
function wait_for_head(doc, detached_html)
151
{
152
    return new Promise(cb => _wait_for_head(doc, detached_html, cb));
153
}
154

    
155
const blocked_str = "blocked";
156

    
157
function block_attribute(node, attr, ns=null)
158
{
159
    const [hasa, geta, seta, rema] = ["has", "get", "set", "remove"]
160
	  .map(m => (n, ...args) => typeof ns === "string" ?
161
	       n[`${m}AttributeNS`](ns, ...args) : n[`${m}Attribute`](...args));
162
    /*
163
     * Disabling attributes by prepending `-blocked' allows them to still be
164
     * relatively easily accessed in case they contain some useful data.
165
     */
166
    const construct_name = [attr];
167
    while (hasa(node, construct_name.join("")))
168
	construct_name.unshift(blocked_str);
169

    
170
    while (construct_name.length > 1) {
171
	construct_name.shift();
172
	const name = construct_name.join("");
173
	seta(node, `${blocked_str}-${name}`, geta(node, name));
174
    }
175
}
176

    
177
function sanitize_meta(meta, policy)
178
{
179
    const value = meta.content || "";
180

    
181
    if (!value || !is_csp_header_name(meta.httpEquiv || "", true))
182
	return;
183

    
184
    block_attribute(meta, "content");
185
}
186

    
187
/*
188
 * Used to disable <script> that has not yet been added to live DOM (doesn't
189
 * work for those already added).
190
 */
191
function sanitize_script(script)
192
{
193
    script.hachette_blocked_type = script.getAttribute("type");
194
    script.type = "text/plain";
195
}
196

    
197
/*
198
 * Executed after script has been connected to the DOM, when it is no longer
199
 * eligible for being executed by the browser
200
 */
201
function desanitize_script(script, policy)
202
{
203
    script.setAttribute("type", script.hachette_blocked_type);
204

    
205
    if (script.hachette_blocked_type === null)
206
	script.removeAttribute("type");
207

    
208
    delete script.hachette_blocked_type;
209
}
210

    
211
const bad_url_reg = /^data:([^,;]*ml|unknown-content-type)/i;
212
function sanitize_urls(element)
213
{
214
    for (const attr of [...element.attributes || []]
215
	       .filter(attr => /^(href|src|data)$/i.test(attr.localName))
216
	       .filter(attr => bad_url_reg.test(attr.value)))
217
	block_attribute(element, attr.localName, attr.namespaceURI);
218
}
219

    
220
function start_data_urls_sanitizing(doc)
221
{
222
    doc.querySelectorAll("*[href], *[src], *[data]").forEach(sanitize_urls);
223
    if (!doc.content_loaded) {
224
	const mutation_handler = m => m.addedNodes.forEach(sanitize_urls);
225
	const mo = new MutationObserver(ms => ms.forEach(mutation_handler));
226
	mo.observe(doc, {childList: true, subtree: true});
227
	wait_loaded(doc).then(() => mo.disconnect());
228
    }
229
}
230

    
231
/*
232
 * Normally, we block scripts with CSP. However, Mozilla does optimizations that
233
 * cause part of the DOM to be loaded when our content scripts get to run. Thus,
234
 * before the CSP rules we inject (for non-HTTP pages) become effective, we need
235
 * to somehow block the execution of `<script>'s and intrinsics that were
236
 * already there.
237
 */
238
function mozilla_initial_block(doc)
239
{
240
    const blocker = e => e.preventDefault();
241
    doc.addEventListener("beforescriptexecute", blocker);
242
    setTimeout(() => doc.removeEventListener("beforescriptexecute", blocker));
243

    
244
    [...doc.all].flatMap(ele => [...ele.attributes].map(attr => [ele, attr]))
245
	.map(([ele, attr]) => [ele, attr.localName])
246
	.filter(([ele, attr]) => /^on/.test(attr) && ele.wrappedJSObject[attr])
247
	.forEach(([ele, attr]) => ele.wrappedJSObject[attr] = null);
248
}
249

    
250
/*
251
 * Here we block all scripts of a document which might be either and
252
 * HTMLDocument or an XMLDocument. Modifying an XML document might disrupt
253
 * Mozilla's XML preview. This is an unfortunate thing we have to accept for
254
 * now. XML documents *have to* be sanitized as well because they might
255
 * contain `<script>' tags (or on* attributes) with namespace declared as
256
 * "http://www.w3.org/1999/xhtml" or "http://www.w3.org/2000/svg" which allows
257
 * javascript execution.
258
 */
259
async function sanitize_document(doc, policy)
260
{
261
    /*
262
     * Blocking of scripts that are in the DOM from the beginning. Needed for
263
     * Mozilla.
264
     */
265
    if (is_mozilla)
266
	mozilla_initial_block(doc);
267

    
268
    /*
269
     * Ensure our CSP rules are employed from the beginning. This CSP injection
270
     * method is, when possible, going to be applied together with CSP rules
271
     * injected using webRequest.
272
     * Using elements namespaced as HTML makes this CSP injection also work for
273
     * non-HTML documents.
274
     */
275
    const html = new DOMParser().parseFromString(`<html><head><meta \
276
http-equiv="Content-Security-Policy" content="${csp_rule(policy.nonce)}"\
277
/></head><body>Loading...</body></html>`, "text/html").documentElement;
278

    
279
    /*
280
     * Root node gets hijacked now, to be re-attached after <head> is loaded
281
     * and sanitized.
282
     */
283
    const root = doc.documentElement;
284
    root.replaceWith(html);
285

    
286
    /*
287
     * For XML documents, we don't intend to inject payload, so we neither block
288
     * document's CSP `<meta>' tags nor wait for `<head>' to be parsed.
289
     */
290
    if (document instanceof HTMLDocument) {
291
	await wait_for_head(doc, root);
292

    
293
	root.querySelectorAll("head meta")
294
	    .forEach(m => sanitize_meta(m, policy));
295
    }
296

    
297
    root.querySelectorAll("script").forEach(s => sanitize_script(s, policy));
298
    html.replaceWith(root);
299
    root.querySelectorAll("script").forEach(s => desanitize_script(s, policy));
300

    
301
    start_data_urls_sanitizing(doc);
302
}
303

    
304
if (!is_privileged_url(document.URL)) {
305
    let policy_received_callback = () => undefined;
306
    let policy;
307

    
308
    /* Signature valid for half an hour. */
309
    const min_time = new Date().getTime() - 1800 * 1000;
310

    
311
    if (/^https?:/.test(document.URL)) {
312
	let signatures;
313
	[policy, signatures] = extract_cookie_policy(document.cookie, min_time);
314
	for (const signature of signatures)
315
	    document.cookie = `hachette-${signature}=; Max-Age=-1;`;
316
    } else {
317
	const scheme = /^([^:]*)/.exec(document.URL)[1];
318
	const known_scheme = ["file", "ftp"].includes(scheme);
319

    
320
	if (!known_scheme)
321
	    console.warn(`Unknown url scheme: \`${scheme}'!`);
322

    
323
	let original_url;
324
	[policy, original_url] = extract_url_policy(document.URL, min_time);
325
	history.replaceState(null, "", original_url);
326

    
327
	if (known_scheme && !policy)
328
	    policy_received_callback = employ_nonhttp_policy;
329
    }
330

    
331
    if (!policy) {
332
	console.debug("Using fallback policy!");
333
	policy = {allow: false, nonce: gen_nonce()};
334
    }
335

    
336
    console.debug("current policy", policy);
337

    
338
    const doc_ready = Promise.all([
339
	policy.allow ? Promise.resolve() : sanitize_document(document, policy),
340
	wait_loaded(document)
341
    ]);
342

    
343
    handle_page_actions(policy.nonce, policy_received_callback, doc_ready);
344

    
345
    start_activity_info_server();
346
}
(2-2/4)