1
|
/**
|
2
|
* Hachette main content script run in all frames
|
3
|
*
|
4
|
* Copyright (C) 2021 Wojtek Kosior
|
5
|
* Copyright (C) 2021 jahoti
|
6
|
* Redistribution terms are gathered in the `copyright' file.
|
7
|
*/
|
8
|
|
9
|
/*
|
10
|
* IMPORTS_START
|
11
|
* IMPORT handle_page_actions
|
12
|
* IMPORT extract_signed
|
13
|
* IMPORT sign_data
|
14
|
* IMPORT gen_nonce
|
15
|
* IMPORT is_privileged_url
|
16
|
* IMPORT mozilla_suppress_scripts
|
17
|
* IMPORT is_chrome
|
18
|
* IMPORT is_mozilla
|
19
|
* IMPORT start_activity_info_server
|
20
|
* IMPORT csp_rule
|
21
|
* IMPORT is_csp_header_name
|
22
|
* IMPORT sanitize_csp_header
|
23
|
* IMPORTS_END
|
24
|
*/
|
25
|
|
26
|
function extract_cookie_policy(cookie, min_time)
|
27
|
{
|
28
|
let best_result = {time: -1};
|
29
|
let policy = null;
|
30
|
const extracted_signatures = [];
|
31
|
|
32
|
for (const match of cookie.matchAll(/hachette-(\w*)=([^;]*)/g)) {
|
33
|
const new_result = extract_signed(...match.slice(1, 3));
|
34
|
if (new_result.fail)
|
35
|
continue;
|
36
|
|
37
|
extracted_signatures.push(match[1]);
|
38
|
|
39
|
if (new_result.time < Math.max(min_time, best_result.time))
|
40
|
continue;
|
41
|
|
42
|
/* This should succeed - it's our self-produced valid JSON. */
|
43
|
const new_policy = JSON.parse(decodeURIComponent(new_result.data));
|
44
|
if (new_policy.url !== document.URL)
|
45
|
continue;
|
46
|
|
47
|
best_result = new_result;
|
48
|
policy = new_policy;
|
49
|
}
|
50
|
|
51
|
return [policy, extracted_signatures];
|
52
|
}
|
53
|
|
54
|
function extract_url_policy(url, min_time)
|
55
|
{
|
56
|
const [base_url, payload, anchor] =
|
57
|
/^([^#]*)#?([^#]*)(#?.*)$/.exec(url).splice(1, 4);
|
58
|
|
59
|
const match = /^hachette_([^_]+)_(.*)$/.exec(payload);
|
60
|
if (!match)
|
61
|
return [null, url];
|
62
|
|
63
|
const result = extract_signed(...match.slice(1, 3));
|
64
|
if (result.fail)
|
65
|
return [null, url];
|
66
|
|
67
|
const original_url = base_url + anchor;
|
68
|
const policy = result.time < min_time ? null :
|
69
|
JSON.parse(decodeURIComponent(result.data));
|
70
|
|
71
|
return [policy.url === original_url ? policy : null, original_url];
|
72
|
}
|
73
|
|
74
|
function employ_nonhttp_policy(policy)
|
75
|
{
|
76
|
if (!policy.allow)
|
77
|
return;
|
78
|
|
79
|
policy.nonce = gen_nonce();
|
80
|
const [base_url, target] = /^([^#]*)(#?.*)$/.exec(policy.url).slice(1, 3);
|
81
|
const encoded_policy = encodeURIComponent(JSON.stringify(policy));
|
82
|
const payload = "hachette_" +
|
83
|
sign_data(encoded_policy, new Date().getTime()).join("_");
|
84
|
const resulting_url = `${base_url}#${payload}${target}`;
|
85
|
location.href = resulting_url;
|
86
|
location.reload();
|
87
|
}
|
88
|
|
89
|
/*
|
90
|
* 1. When injecting some payload we need to sanitize <meta> CSP tags before
|
91
|
* they reach the document.
|
92
|
* 2. Only <meta> tags inside <head> are considered valid by the browser and
|
93
|
* need to be considered.
|
94
|
* 3. We want to detach <html> from document, wait until its <head> completes
|
95
|
* loading, sanitize it and re-attach <html>.
|
96
|
* 4. Browsers are eager to add <meta>'s that appear after `</head>' but before
|
97
|
* `<body>'. Due to this behavior the `DOMContentLoaded' event is considered
|
98
|
* unreliable (although it could still work properly, it is just problematic
|
99
|
* to verify).
|
100
|
* 5. We shall wait for anything to appear in or after <body> and take that as
|
101
|
* a sign <head> has _really_ finished loading.
|
102
|
*/
|
103
|
|
104
|
function make_body_start_observer(DOM_element, waiting)
|
105
|
{
|
106
|
const observer = new MutationObserver(() => try_body_started(waiting));
|
107
|
observer.observe(DOM_element, {childList: true});
|
108
|
return observer;
|
109
|
}
|
110
|
|
111
|
function try_body_started(waiting)
|
112
|
{
|
113
|
const body = waiting.detached_html.querySelector("body");
|
114
|
|
115
|
if ((body && (body.firstChild || body.nextSibling)) ||
|
116
|
waiting.doc.documentElement.nextSibling) {
|
117
|
finish_waiting(waiting);
|
118
|
return true;
|
119
|
}
|
120
|
|
121
|
if (body && waiting.observers.length < 2)
|
122
|
waiting.observers.push(make_body_start_observer(body, waiting));
|
123
|
}
|
124
|
|
125
|
function finish_waiting(waiting)
|
126
|
{
|
127
|
waiting.observers.forEach(observer => observer.disconnect());
|
128
|
waiting.doc.removeEventListener("DOMContentLoaded", waiting.loaded_cb);
|
129
|
setTimeout(waiting.callback, 0);
|
130
|
}
|
131
|
|
132
|
function _wait_for_head(doc, detached_html, callback)
|
133
|
{
|
134
|
const waiting = {doc, detached_html, callback, observers: []};
|
135
|
if (try_body_started(waiting))
|
136
|
return;
|
137
|
|
138
|
waiting.observers = [make_body_start_observer(detached_html, waiting)];
|
139
|
waiting.loaded_cb = () => finish_waiting(waiting);
|
140
|
doc.addEventListener("DOMContentLoaded", waiting.loaded_cb);
|
141
|
}
|
142
|
|
143
|
function wait_for_head(doc, detached_html)
|
144
|
{
|
145
|
return new Promise(cb => _wait_for_head(doc, detached_html, cb));
|
146
|
}
|
147
|
|
148
|
const blocked_str = "blocked";
|
149
|
|
150
|
function block_attribute(node, attr)
|
151
|
{
|
152
|
/*
|
153
|
* Disabling attributes this way allows them to still be relatively
|
154
|
* easily accessed in case they contain some useful data.
|
155
|
*/
|
156
|
const construct_name = [attr];
|
157
|
while (node.hasAttribute(construct_name.join("")))
|
158
|
construct_name.unshift(blocked_str);
|
159
|
|
160
|
while (construct_name.length > 1) {
|
161
|
construct_name.shift();
|
162
|
const name = construct_name.join("");
|
163
|
node.setAttribute(`${blocked_str}-${name}`, node.getAttribute(name));
|
164
|
}
|
165
|
|
166
|
node.removeAttribute(attr);
|
167
|
}
|
168
|
|
169
|
function sanitize_meta(meta, policy)
|
170
|
{
|
171
|
const http_equiv = meta.getAttribute("http-equiv");
|
172
|
const value = meta.content;
|
173
|
|
174
|
if (!value || !is_csp_header_name(http_equiv, true))
|
175
|
return;
|
176
|
|
177
|
block_attribute(meta, "content");
|
178
|
|
179
|
if (is_csp_header_name(http_equiv, false))
|
180
|
meta.content = sanitize_csp_header({value}, policy).value;
|
181
|
}
|
182
|
|
183
|
function sanitize_script(script)
|
184
|
{
|
185
|
script.hachette_blocked_type = script.type;
|
186
|
script.type = "text/plain";
|
187
|
}
|
188
|
|
189
|
/*
|
190
|
* Executed after script has been connected to the DOM, when it is no longer
|
191
|
* eligible for being executed by the browser
|
192
|
*/
|
193
|
function desanitize_script(script, policy)
|
194
|
{
|
195
|
script.setAttribute("type", script.hachette_blocked_type);
|
196
|
|
197
|
if (script.hachette_blocked_type === undefined)
|
198
|
script.removeAttribute("type");
|
199
|
|
200
|
delete script.hachette_blocked_type;
|
201
|
}
|
202
|
|
203
|
function apply_hachette_csp_rules(doc, policy)
|
204
|
{
|
205
|
const meta = doc.createElement("meta");
|
206
|
meta.setAttribute("http-equiv", "Content-Security-Policy");
|
207
|
meta.setAttribute("content", csp_rule(policy.nonce));
|
208
|
doc.head.append(meta);
|
209
|
/* CSP is already in effect, we can remove the <meta> now. */
|
210
|
meta.remove();
|
211
|
}
|
212
|
|
213
|
async function sanitize_document(doc, policy)
|
214
|
{
|
215
|
/*
|
216
|
* Ensure our CSP rules are employed from the beginning. This CSP injection
|
217
|
* method is, when possible, going to be applied together with CSP rules
|
218
|
* injected using webRequest.
|
219
|
*/
|
220
|
const has_own_head = doc.head;
|
221
|
if (!has_own_head)
|
222
|
doc.documentElement.prepend(doc.createElement("head"));
|
223
|
|
224
|
apply_hachette_csp_rules(doc, policy);
|
225
|
|
226
|
/* Probably not needed, but...: proceed with DOM in its initial state. */
|
227
|
if (!has_own_head)
|
228
|
doc.head.remove();
|
229
|
|
230
|
/*
|
231
|
* <html> node gets hijacked now, to be re-attached after <head> is loaded
|
232
|
* and sanitized.
|
233
|
*/
|
234
|
const old_html = doc.documentElement;
|
235
|
const new_html = doc.createElement("html");
|
236
|
old_html.replaceWith(new_html);
|
237
|
|
238
|
await wait_for_head(doc, old_html);
|
239
|
|
240
|
for (const meta of old_html.querySelectorAll("head meta"))
|
241
|
sanitize_meta(meta, policy);
|
242
|
|
243
|
for (const script of old_html.querySelectorAll("script"))
|
244
|
sanitize_script(script, policy);
|
245
|
|
246
|
new_html.replaceWith(old_html);
|
247
|
|
248
|
for (const script of old_html.querySelectorAll("script"))
|
249
|
desanitize_script(script, policy);
|
250
|
}
|
251
|
|
252
|
if (!is_privileged_url(document.URL)) {
|
253
|
let policy_received_callback = () => undefined;
|
254
|
let policy;
|
255
|
|
256
|
/* Signature valid for half an hour. */
|
257
|
const min_time = new Date().getTime() - 1800 * 1000;
|
258
|
|
259
|
if (/^https?:/.test(document.URL)) {
|
260
|
let signatures;
|
261
|
[policy, signatures] = extract_cookie_policy(document.cookie, min_time);
|
262
|
for (const signature of signatures)
|
263
|
document.cookie = `hachette-${signature}=; Max-Age=-1;`;
|
264
|
} else {
|
265
|
const scheme = /^([^:]*)/.exec(document.URL)[1];
|
266
|
const known_scheme = ["file", "ftp"].includes(scheme);
|
267
|
|
268
|
if (!known_scheme)
|
269
|
console.warn(`Unknown url scheme: \`${scheme}'!`);
|
270
|
|
271
|
let original_url;
|
272
|
[policy, original_url] = extract_url_policy(document.URL, min_time);
|
273
|
history.replaceState(null, "", original_url);
|
274
|
|
275
|
if (known_scheme && !policy)
|
276
|
policy_received_callback = employ_nonhttp_policy;
|
277
|
}
|
278
|
|
279
|
if (!policy) {
|
280
|
console.warn("Using fallback policy!");
|
281
|
policy = {allow: false, nonce: gen_nonce()};
|
282
|
}
|
283
|
|
284
|
const doc_ready = Promise.all([
|
285
|
policy.allow ? Promise.resolve : sanitize_document(document, policy),
|
286
|
new Promise(cb => document.addEventListener("DOMContentLoaded",
|
287
|
cb, {once: true}))
|
288
|
]);
|
289
|
|
290
|
handle_page_actions(policy.nonce, policy_received_callback, doc_ready);
|
291
|
|
292
|
start_activity_info_server();
|
293
|
}
|