Project

General

Profile

Download (9.96 KB) Statistics
| Branch: | Tag: | Revision:

haketilo / content / main.js @ 704f2da0

1
/**
2
 * Hachette main content script run in all frames
3
 *
4
 * Copyright (C) 2021 Wojtek Kosior
5
 * Copyright (C) 2021 jahoti
6
 * Redistribution terms are gathered in the `copyright' file.
7
 */
8

    
9
/*
10
 * IMPORTS_START
11
 * IMPORT handle_page_actions
12
 * IMPORT extract_signed
13
 * IMPORT sign_data
14
 * IMPORT gen_nonce
15
 * IMPORT is_privileged_url
16
 * IMPORT is_chrome
17
 * IMPORT is_mozilla
18
 * IMPORT start_activity_info_server
19
 * IMPORT csp_rule
20
 * IMPORT is_csp_header_name
21
 * IMPORT sanitize_csp_header
22
 * IMPORTS_END
23
 */
24

    
25
function extract_cookie_policy(cookie, min_time)
26
{
27
    let best_result = {time: -1};
28
    let policy = null;
29
    const extracted_signatures = [];
30

    
31
    for (const match of cookie.matchAll(/hachette-(\w*)=([^;]*)/g)) {
32
	const new_result = extract_signed(...match.slice(1, 3));
33
	if (new_result.fail)
34
	    continue;
35

    
36
	extracted_signatures.push(match[1]);
37

    
38
	if (new_result.time < Math.max(min_time, best_result.time))
39
	    continue;
40

    
41
	/* This should succeed - it's our self-produced valid JSON. */
42
	const new_policy = JSON.parse(decodeURIComponent(new_result.data));
43
	if (new_policy.url !== document.URL)
44
	    continue;
45

    
46
	best_result = new_result;
47
	policy = new_policy;
48
    }
49

    
50
    return [policy, extracted_signatures];
51
}
52

    
53
function extract_url_policy(url, min_time)
54
{
55
    const [base_url, payload, anchor] =
56
	  /^([^#]*)#?([^#]*)(#?.*)$/.exec(url).splice(1, 4);
57

    
58
    const match = /^hachette_([^_]+)_(.*)$/.exec(payload);
59
    if (!match)
60
	return [null, url];
61

    
62
    const result = extract_signed(...match.slice(1, 3));
63
    if (result.fail)
64
	return [null, url];
65

    
66
    const original_url = base_url + anchor;
67
    const policy = result.time < min_time ? null :
68
	  JSON.parse(decodeURIComponent(result.data));
69

    
70
    return [policy.url === original_url ? policy : null, original_url];
71
}
72

    
73
function employ_nonhttp_policy(policy)
74
{
75
    if (!policy.allow)
76
	return;
77

    
78
    policy.nonce = gen_nonce();
79
    const [base_url, target] = /^([^#]*)(#?.*)$/.exec(policy.url).slice(1, 3);
80
    const encoded_policy = encodeURIComponent(JSON.stringify(policy));
81
    const payload = "hachette_" +
82
	  sign_data(encoded_policy, new Date().getTime()).join("_");
83
    const resulting_url = `${base_url}#${payload}${target}`;
84
    location.href = resulting_url;
85
    location.reload();
86
}
87

    
88
/*
89
 * 1. When injecting some payload we need to sanitize <meta> CSP tags before
90
 *    they reach the document.
91
 * 2. Only <meta> tags inside <head> are considered valid by the browser and
92
 *    need to be considered.
93
 * 3. We want to detach <html> from document, wait until its <head> completes
94
 *    loading, sanitize it and re-attach <html>.
95
 * 4. Browsers are eager to add <meta>'s that appear after `</head>' but before
96
 *    `<body>'. Due to this behavior the `DOMContentLoaded' event is considered
97
 *    unreliable (although it could still work properly, it is just problematic
98
 *    to verify).
99
 * 5. We shall wait for anything to appear in or after <body> and take that as
100
 *    a sign <head> has _really_ finished loading.
101
 */
102

    
103
function make_body_start_observer(DOM_element, waiting)
104
{
105
    const observer = new MutationObserver(() => try_body_started(waiting));
106
    observer.observe(DOM_element, {childList: true});
107
    return observer;
108
}
109

    
110
function try_body_started(waiting)
111
{
112
    const body = waiting.detached_html.querySelector("body");
113

    
114
    if ((body && (body.firstChild || body.nextSibling)) ||
115
	waiting.doc.documentElement.nextSibling) {
116
	finish_waiting(waiting);
117
	return true;
118
    }
119

    
120
    if (body && waiting.observers.length < 2)
121
	waiting.observers.push(make_body_start_observer(body, waiting));
122
}
123

    
124
function finish_waiting(waiting)
125
{
126
    waiting.observers.forEach(observer => observer.disconnect());
127
    waiting.doc.removeEventListener("DOMContentLoaded", waiting.loaded_cb);
128
    setTimeout(waiting.callback, 0);
129
}
130

    
131
function _wait_for_head(doc, detached_html, callback)
132
{
133
    const waiting = {doc, detached_html, callback, observers: []};
134

    
135
    /*
136
     * For XML and SVG documents, instead of waiting for `<head>', we wait
137
     * for the entire document to finish loading.
138
     */
139
    if (doc instanceof HTMLDocument) {
140
	if (try_body_started(waiting))
141
	    return;
142

    
143
	waiting.observers = [make_body_start_observer(detached_html, waiting)];
144
    }
145

    
146
    waiting.loaded_cb = () => finish_waiting(waiting);
147
    doc.addEventListener("DOMContentLoaded", waiting.loaded_cb);
148
}
149

    
150
function wait_for_head(doc, detached_html)
151
{
152
    return new Promise(cb => _wait_for_head(doc, detached_html, cb));
153
}
154

    
155
const blocked_str = "blocked";
156

    
157
function block_attribute(node, attr)
158
{
159
    /*
160
     * Disabling attributes this way allows them to still be relatively
161
     * easily accessed in case they contain some useful data.
162
     */
163
    const construct_name = [attr];
164
    while (node.hasAttribute(construct_name.join("")))
165
	construct_name.unshift(blocked_str);
166

    
167
    while (construct_name.length > 1) {
168
	construct_name.shift();
169
	const name = construct_name.join("");
170
	node.setAttribute(`${blocked_str}-${name}`, node.getAttribute(name));
171
    }
172

    
173
    node.removeAttribute(attr);
174
}
175

    
176
function sanitize_meta(meta, policy)
177
{
178
    const http_equiv = meta.getAttribute("http-equiv");
179
    const value = meta.content;
180

    
181
    if (!value || !is_csp_header_name(http_equiv, true))
182
	return;
183

    
184
    block_attribute(meta, "content");
185

    
186
    if (is_csp_header_name(http_equiv, false))
187
	meta.content = sanitize_csp_header({value}, policy).value;
188
}
189

    
190
function sanitize_script(script)
191
{
192
    script.hachette_blocked_type = script.type;
193
    script.type = "text/plain";
194
}
195

    
196
/*
197
 * Executed after script has been connected to the DOM, when it is no longer
198
 * eligible for being executed by the browser
199
 */
200
function desanitize_script(script, policy)
201
{
202
    script.setAttribute("type", script.hachette_blocked_type);
203

    
204
    if (script.hachette_blocked_type === undefined)
205
	script.removeAttribute("type");
206

    
207
    delete script.hachette_blocked_type;
208
}
209

    
210
function apply_hachette_csp_rules(doc, head, policy)
211
{
212
    const meta = doc.createElement("meta");
213
    meta.setAttribute("http-equiv", "Content-Security-Policy");
214
    meta.setAttribute("content", csp_rule(policy.nonce));
215
    head.append(meta);
216
    /* CSP is already in effect, we can remove the <meta> now. */
217
    meta.remove();
218
}
219

    
220
function sanitize_urls(element)
221
{
222
    for (const attribute of [...element.attributes]) {
223
	if (/^(href|src|data)$/i.test(attribute.localName) &&
224
	    /^data:([^,;]*ml|unknown-content-type)/i.test(attribute.value))
225
	    block_attribute(element, attribute.localName);
226
    }
227
}
228

    
229
function start_data_urls_sanitizing(doc)
230
{
231
    doc.querySelectorAll("*[href], *[src], *[data]").forEach(sanitize_urls);
232
    const mutation_handler = m => m.addedNodes.forEach(sanitize_urls);
233
    const mo = new MutationObserver(ms => ms.forEach(mutation_handler));
234
    mo.observe(doc, {childList: true, subtree: true});
235
}
236

    
237
function apply_intrinsics_sanitizing(root_element)
238
{
239
    for (const subelem of root_element.querySelectorAll("*")) {
240
	[...subelem.attributes]
241
	    .filter(a => /^on/i.test(a.localName))
242
	    .filter(a => /^javascript:/i.test(a.value))
243
	    .forEach(a => block_attribute(subelem, a.localName));
244
    }
245
}
246

    
247
async function sanitize_document(doc, policy)
248
{
249
    /*
250
     * Blocking of scripts that are in the DOM from the beginning. Needed for
251
     * Mozilla, harmless on Chromium.
252
     * Note that at least in SVG documents the `src' attr on `<script>'s seems
253
     * to be ignored by Firefox, so we don't need to sanitize it.
254
     */
255
    for (const script of document.getElementsByTagName("script")) {
256
	const old_children = [...script.childNodes];
257
	script.innerHTML = "";
258
	setTimeout(() => old_children.forEach(c => script.append(c)), 0);
259
    }
260

    
261
    /*
262
     * Ensure our CSP rules are employed from the beginning. This CSP injection
263
     * method is, when possible, going to be applied together with CSP rules
264
     * injected using webRequest.
265
     * For non-HTML documents this is just a dummy operation of adding and
266
     * removing `head'.
267
     */
268
    let added_head = doc.createElement("head");
269
    if (!doc.head)
270
	doc.documentElement.prepend(added_head);
271

    
272
    apply_hachette_csp_rules(doc, added_head, policy);
273

    
274
    /* Proceed with DOM in its initial state. */
275
    added_head.remove();
276

    
277
    /*
278
     * <html> node gets hijacked now, to be re-attached after <head> is loaded
279
     * and sanitized.
280
     */
281
    const old_html = doc.documentElement;
282
    const new_html = doc.createElement("html");
283
    old_html.replaceWith(new_html);
284

    
285
    await wait_for_head(doc, old_html);
286

    
287
    for (const meta of old_html.querySelectorAll("head meta"))
288
	sanitize_meta(meta, policy);
289

    
290
    for (const script of old_html.querySelectorAll("script"))
291
	sanitize_script(script, policy);
292

    
293
    if (!(doc instanceof HTMLDocument))
294
	apply_intrinsics_sanitizing(old_html);
295

    
296
    new_html.replaceWith(old_html);
297

    
298
    for (const script of old_html.querySelectorAll("script"))
299
	desanitize_script(script, policy);
300

    
301
    start_data_urls_sanitizing(doc);
302
}
303

    
304
if (!is_privileged_url(document.URL)) {
305
    let policy_received_callback = () => undefined;
306
    let policy;
307

    
308
    /* Signature valid for half an hour. */
309
    const min_time = new Date().getTime() - 1800 * 1000;
310

    
311
    if (/^https?:/.test(document.URL)) {
312
	let signatures;
313
	[policy, signatures] = extract_cookie_policy(document.cookie, min_time);
314
	for (const signature of signatures)
315
	    document.cookie = `hachette-${signature}=; Max-Age=-1;`;
316
    } else {
317
	const scheme = /^([^:]*)/.exec(document.URL)[1];
318
	const known_scheme = ["file", "ftp"].includes(scheme);
319

    
320
	if (!known_scheme)
321
	    console.warn(`Unknown url scheme: \`${scheme}'!`);
322

    
323
	let original_url;
324
	[policy, original_url] = extract_url_policy(document.URL, min_time);
325
	history.replaceState(null, "", original_url);
326

    
327
	if (known_scheme && !policy)
328
	    policy_received_callback = employ_nonhttp_policy;
329
    }
330

    
331
    if (!policy) {
332
	console.warn("Using fallback policy!");
333
	policy = {allow: false, nonce: gen_nonce()};
334
    }
335

    
336
    const doc_ready = Promise.all([
337
	policy.allow ? Promise.resolve : sanitize_document(document, policy),
338
	new Promise(cb => document.addEventListener("DOMContentLoaded",
339
						    cb, {once: true}))
340
    ]);
341

    
342
    handle_page_actions(policy.nonce, policy_received_callback, doc_ready);
343

    
344
    start_activity_info_server();
345
}
(2-2/4)