22 |
22 |
* IMPORTS_END
|
23 |
23 |
*/
|
24 |
24 |
|
|
25 |
document.content_loaded = document.readyState === "complete";
|
|
26 |
const wait_loaded = e => e.content_loaded ? Promise.resolve() :
|
|
27 |
new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true}));
|
|
28 |
|
|
29 |
wait_loaded(document).then(() => document.content_loaded = true);
|
|
30 |
|
25 |
31 |
function extract_cookie_policy(cookie, min_time)
|
26 |
32 |
{
|
27 |
33 |
let best_result = {time: -1};
|
... | ... | |
86 |
92 |
}
|
87 |
93 |
|
88 |
94 |
/*
|
|
95 |
* In the case of HTML documents:
|
89 |
96 |
* 1. When injecting some payload we need to sanitize <meta> CSP tags before
|
90 |
97 |
* they reach the document.
|
91 |
98 |
* 2. Only <meta> tags inside <head> are considered valid by the browser and
|
92 |
99 |
* need to be considered.
|
93 |
100 |
* 3. We want to detach <html> from document, wait until its <head> completes
|
94 |
101 |
* loading, sanitize it and re-attach <html>.
|
95 |
|
* 4. Browsers are eager to add <meta>'s that appear after `</head>' but before
|
96 |
|
* `<body>'. Due to this behavior the `DOMContentLoaded' event is considered
|
97 |
|
* unreliable (although it could still work properly, it is just problematic
|
98 |
|
* to verify).
|
99 |
|
* 5. We shall wait for anything to appear in or after <body> and take that as
|
100 |
|
* a sign <head> has _really_ finished loading.
|
|
102 |
* 4. We shall wait for anything to appear in or after <body> and take that as
|
|
103 |
* a sign <head> has finished loading.
|
|
104 |
* 5. Otherwise, getting the `DOMContentLoaded' event on the document shall also
|
|
105 |
* be a sign that <head> is fully loaded.
|
101 |
106 |
*/
|
102 |
107 |
|
103 |
108 |
function make_body_start_observer(DOM_element, waiting)
|
... | ... | |
123 |
128 |
|
124 |
129 |
function finish_waiting(waiting)
|
125 |
130 |
{
|
|
131 |
if (waiting.finished)
|
|
132 |
return;
|
|
133 |
waiting.finished = true;
|
126 |
134 |
waiting.observers.forEach(observer => observer.disconnect());
|
127 |
|
waiting.doc.removeEventListener("DOMContentLoaded", waiting.loaded_cb);
|
128 |
135 |
setTimeout(waiting.callback, 0);
|
129 |
136 |
}
|
130 |
137 |
|
... | ... | |
132 |
139 |
{
|
133 |
140 |
const waiting = {doc, detached_html, callback, observers: []};
|
134 |
141 |
|
135 |
|
/*
|
136 |
|
* For XML and SVG documents, instead of waiting for `<head>', we wait
|
137 |
|
* for the entire document to finish loading.
|
138 |
|
*/
|
139 |
|
if (doc instanceof HTMLDocument) {
|
140 |
|
if (try_body_started(waiting))
|
141 |
|
return;
|
|
142 |
if (try_body_started(waiting))
|
|
143 |
return;
|
142 |
144 |
|
143 |
|
waiting.observers = [make_body_start_observer(detached_html, waiting)];
|
144 |
|
}
|
|
145 |
waiting.observers = [make_body_start_observer(detached_html, waiting)];
|
145 |
146 |
|
146 |
|
waiting.loaded_cb = () => finish_waiting(waiting);
|
147 |
|
doc.addEventListener("DOMContentLoaded", waiting.loaded_cb);
|
|
147 |
wait_loaded(doc).then(() => finish_waiting(waiting));
|
148 |
148 |
}
|
149 |
149 |
|
150 |
150 |
function wait_for_head(doc, detached_html)
|
... | ... | |
154 |
154 |
|
155 |
155 |
const blocked_str = "blocked";
|
156 |
156 |
|
157 |
|
function block_attribute(node, attr)
|
|
157 |
function block_attribute(node, attr, ns=null)
|
158 |
158 |
{
|
|
159 |
const [hasa, geta, seta, rema] = ["has", "get", "set", "remove"]
|
|
160 |
.map(m => (n, ...args) => typeof ns === "string" ?
|
|
161 |
n[`${m}AttributeNS`](ns, ...args) : n[`${m}Attribute`](...args));
|
159 |
162 |
/*
|
160 |
|
* Disabling attributes this way allows them to still be relatively
|
161 |
|
* easily accessed in case they contain some useful data.
|
|
163 |
* Disabling attributes by prepending `-blocked' allows them to still be
|
|
164 |
* relatively easily accessed in case they contain some useful data.
|
162 |
165 |
*/
|
163 |
166 |
const construct_name = [attr];
|
164 |
|
while (node.hasAttribute(construct_name.join("")))
|
|
167 |
while (hasa(node, construct_name.join("")))
|
165 |
168 |
construct_name.unshift(blocked_str);
|
166 |
169 |
|
167 |
170 |
while (construct_name.length > 1) {
|
168 |
171 |
construct_name.shift();
|
169 |
172 |
const name = construct_name.join("");
|
170 |
|
node.setAttribute(`${blocked_str}-${name}`, node.getAttribute(name));
|
|
173 |
seta(node, `${blocked_str}-${name}`, geta(node, name));
|
171 |
174 |
}
|
172 |
|
|
173 |
|
node.removeAttribute(attr);
|
174 |
175 |
}
|
175 |
176 |
|
176 |
177 |
function sanitize_meta(meta, policy)
|
177 |
178 |
{
|
178 |
|
const http_equiv = meta.getAttribute("http-equiv");
|
179 |
|
const value = meta.content;
|
|
179 |
const value = meta.content || "";
|
180 |
180 |
|
181 |
|
if (!value || !is_csp_header_name(http_equiv, true))
|
|
181 |
if (!value || !is_csp_header_name(meta.httpEquiv || "", true))
|
182 |
182 |
return;
|
183 |
183 |
|
184 |
184 |
block_attribute(meta, "content");
|
185 |
|
|
186 |
|
if (is_csp_header_name(http_equiv, false))
|
187 |
|
meta.content = sanitize_csp_header({value}, policy).value;
|
188 |
185 |
}
|
189 |
186 |
|
|
187 |
/*
|
|
188 |
* Used to disable <script> that has not yet been added to live DOM (doesn't
|
|
189 |
* work for those already added).
|
|
190 |
*/
|
190 |
191 |
function sanitize_script(script)
|
191 |
192 |
{
|
192 |
|
script.hachette_blocked_type = script.type;
|
|
193 |
script.hachette_blocked_type = script.getAttribute("type");
|
193 |
194 |
script.type = "text/plain";
|
194 |
195 |
}
|
195 |
196 |
|
... | ... | |
201 |
202 |
{
|
202 |
203 |
script.setAttribute("type", script.hachette_blocked_type);
|
203 |
204 |
|
204 |
|
if (script.hachette_blocked_type === undefined)
|
|
205 |
if (script.hachette_blocked_type === null)
|
205 |
206 |
script.removeAttribute("type");
|
206 |
207 |
|
207 |
208 |
delete script.hachette_blocked_type;
|
208 |
209 |
}
|
209 |
210 |
|
210 |
|
function apply_hachette_csp_rules(doc, head, policy)
|
211 |
|
{
|
212 |
|
const meta = doc.createElement("meta");
|
213 |
|
meta.setAttribute("http-equiv", "Content-Security-Policy");
|
214 |
|
meta.setAttribute("content", csp_rule(policy.nonce));
|
215 |
|
head.append(meta);
|
216 |
|
/* CSP is already in effect, we can remove the <meta> now. */
|
217 |
|
meta.remove();
|
218 |
|
}
|
219 |
|
|
|
211 |
const bad_url_reg = /^data:([^,;]*ml|unknown-content-type)/i;
|
220 |
212 |
function sanitize_urls(element)
|
221 |
213 |
{
|
222 |
|
for (const attribute of [...element.attributes]) {
|
223 |
|
if (/^(href|src|data)$/i.test(attribute.localName) &&
|
224 |
|
/^data:([^,;]*ml|unknown-content-type)/i.test(attribute.value))
|
225 |
|
block_attribute(element, attribute.localName);
|
226 |
|
}
|
|
214 |
for (const attr of [...element.attributes || []]
|
|
215 |
.filter(attr => /^(href|src|data)$/i.test(attr.localName))
|
|
216 |
.filter(attr => bad_url_reg.test(attr.value)))
|
|
217 |
block_attribute(element, attr.localName, attr.namespaceURI);
|
227 |
218 |
}
|
228 |
219 |
|
229 |
220 |
function start_data_urls_sanitizing(doc)
|
230 |
221 |
{
|
231 |
222 |
doc.querySelectorAll("*[href], *[src], *[data]").forEach(sanitize_urls);
|
232 |
|
const mutation_handler = m => m.addedNodes.forEach(sanitize_urls);
|
233 |
|
const mo = new MutationObserver(ms => ms.forEach(mutation_handler));
|
234 |
|
mo.observe(doc, {childList: true, subtree: true});
|
|
223 |
if (!doc.content_loaded) {
|
|
224 |
const mutation_handler = m => m.addedNodes.forEach(sanitize_urls);
|
|
225 |
const mo = new MutationObserver(ms => ms.forEach(mutation_handler));
|
|
226 |
mo.observe(doc, {childList: true, subtree: true});
|
|
227 |
wait_loaded(doc).then(() => mo.disconnect());
|
|
228 |
}
|
235 |
229 |
}
|
236 |
230 |
|
237 |
|
function apply_intrinsics_sanitizing(root_element)
|
|
231 |
/*
|
|
232 |
* Normally, we block scripts with CSP. However, Mozilla does optimizations that
|
|
233 |
* cause part of the DOM to be loaded when our content scripts get to run. Thus,
|
|
234 |
* before the CSP rules we inject (for non-HTTP pages) become effective, we need
|
|
235 |
* to somehow block the execution of `<script>'s and intrinsics that were
|
|
236 |
* already there.
|
|
237 |
*/
|
|
238 |
function mozilla_initial_block(doc)
|
238 |
239 |
{
|
239 |
|
for (const subelem of root_element.querySelectorAll("*")) {
|
240 |
|
[...subelem.attributes]
|
241 |
|
.filter(a => /^on/i.test(a.localName))
|
242 |
|
.filter(a => /^javascript:/i.test(a.value))
|
243 |
|
.forEach(a => block_attribute(subelem, a.localName));
|
244 |
|
}
|
|
240 |
const blocker = e => e.preventDefault();
|
|
241 |
doc.addEventListener("beforescriptexecute", blocker);
|
|
242 |
setTimeout(() => doc.removeEventListener("beforescriptexecute", blocker));
|
|
243 |
|
|
244 |
[...doc.all].flatMap(ele => [...ele.attributes].map(attr => [ele, attr]))
|
|
245 |
.map(([ele, attr]) => [ele, attr.localName])
|
|
246 |
.filter(([ele, attr]) => /^on/.test(attr) && ele.wrappedJSObject[attr])
|
|
247 |
.forEach(([ele, attr]) => ele.wrappedJSObject[attr] = null);
|
245 |
248 |
}
|
246 |
249 |
|
|
250 |
/*
|
|
251 |
* Here we block all scripts of a document which might be either and
|
|
252 |
* HTMLDocument or an XMLDocument. Modifying an XML document might disrupt
|
|
253 |
* Mozilla's XML preview. This is an unfortunate thing we have to accept for
|
|
254 |
* now. XML documents *have to* be sanitized as well because they might
|
|
255 |
* contain `<script>' tags (or on* attributes) with namespace declared as
|
|
256 |
* "http://www.w3.org/1999/xhtml" or "http://www.w3.org/2000/svg" which allows
|
|
257 |
* javascript execution.
|
|
258 |
*/
|
247 |
259 |
async function sanitize_document(doc, policy)
|
248 |
260 |
{
|
249 |
261 |
/*
|
250 |
262 |
* Blocking of scripts that are in the DOM from the beginning. Needed for
|
251 |
|
* Mozilla, harmless on Chromium.
|
252 |
|
* Note that at least in SVG documents the `src' attr on `<script>'s seems
|
253 |
|
* to be ignored by Firefox, so we don't need to sanitize it.
|
|
263 |
* Mozilla.
|
254 |
264 |
*/
|
255 |
|
for (const script of document.getElementsByTagName("script")) {
|
256 |
|
const old_children = [...script.childNodes];
|
257 |
|
script.innerHTML = "";
|
258 |
|
setTimeout(() => old_children.forEach(c => script.append(c)), 0);
|
259 |
|
}
|
|
265 |
if (is_mozilla)
|
|
266 |
mozilla_initial_block(doc);
|
260 |
267 |
|
261 |
268 |
/*
|
262 |
269 |
* Ensure our CSP rules are employed from the beginning. This CSP injection
|
263 |
270 |
* method is, when possible, going to be applied together with CSP rules
|
264 |
271 |
* injected using webRequest.
|
265 |
|
* For non-HTML documents this is just a dummy operation of adding and
|
266 |
|
* removing `head'.
|
|
272 |
* Using elements namespaced as HTML makes this CSP injection also work for
|
|
273 |
* non-HTML documents.
|
267 |
274 |
*/
|
268 |
|
let added_head = doc.createElement("head");
|
269 |
|
if (!doc.head)
|
270 |
|
doc.documentElement.prepend(added_head);
|
271 |
|
|
272 |
|
apply_hachette_csp_rules(doc, added_head, policy);
|
273 |
|
|
274 |
|
/* Proceed with DOM in its initial state. */
|
275 |
|
added_head.remove();
|
|
275 |
const html = new DOMParser().parseFromString(`<html><head><meta \
|
|
276 |
http-equiv="Content-Security-Policy" content="${csp_rule(policy.nonce)}"\
|
|
277 |
/></head><body>Loading...</body></html>`, "text/html").documentElement;
|
276 |
278 |
|
277 |
279 |
/*
|
278 |
|
* <html> node gets hijacked now, to be re-attached after <head> is loaded
|
|
280 |
* Root node gets hijacked now, to be re-attached after <head> is loaded
|
279 |
281 |
* and sanitized.
|
280 |
282 |
*/
|
281 |
|
const old_html = doc.documentElement;
|
282 |
|
const new_html = doc.createElement("html");
|
283 |
|
old_html.replaceWith(new_html);
|
|
283 |
const root = doc.documentElement;
|
|
284 |
root.replaceWith(html);
|
284 |
285 |
|
285 |
|
await wait_for_head(doc, old_html);
|
286 |
|
|
287 |
|
for (const meta of old_html.querySelectorAll("head meta"))
|
288 |
|
sanitize_meta(meta, policy);
|
289 |
|
|
290 |
|
for (const script of old_html.querySelectorAll("script"))
|
291 |
|
sanitize_script(script, policy);
|
292 |
|
|
293 |
|
if (!(doc instanceof HTMLDocument))
|
294 |
|
apply_intrinsics_sanitizing(old_html);
|
|
286 |
/*
|
|
287 |
* For XML documents, we don't intend to inject payload, so we neither block
|
|
288 |
* document's CSP `<meta>' tags nor wait for `<head>' to be parsed.
|
|
289 |
*/
|
|
290 |
if (document instanceof HTMLDocument) {
|
|
291 |
await wait_for_head(doc, root);
|
295 |
292 |
|
296 |
|
new_html.replaceWith(old_html);
|
|
293 |
root.querySelectorAll("head meta")
|
|
294 |
.forEach(m => sanitize_meta(m, policy));
|
|
295 |
}
|
297 |
296 |
|
298 |
|
for (const script of old_html.querySelectorAll("script"))
|
299 |
|
desanitize_script(script, policy);
|
|
297 |
root.querySelectorAll("script").forEach(s => sanitize_script(s, policy));
|
|
298 |
html.replaceWith(root);
|
|
299 |
root.querySelectorAll("script").forEach(s => desanitize_script(s, policy));
|
300 |
300 |
|
301 |
301 |
start_data_urls_sanitizing(doc);
|
302 |
302 |
}
|
... | ... | |
329 |
329 |
}
|
330 |
330 |
|
331 |
331 |
if (!policy) {
|
332 |
|
console.warn("Using fallback policy!");
|
|
332 |
console.debug("Using fallback policy!");
|
333 |
333 |
policy = {allow: false, nonce: gen_nonce()};
|
334 |
334 |
}
|
335 |
335 |
|
|
336 |
console.debug("current policy", policy);
|
|
337 |
|
336 |
338 |
const doc_ready = Promise.all([
|
337 |
|
policy.allow ? Promise.resolve : sanitize_document(document, policy),
|
338 |
|
new Promise(cb => document.addEventListener("DOMContentLoaded",
|
339 |
|
cb, {once: true}))
|
|
339 |
policy.allow ? Promise.resolve() : sanitize_document(document, policy),
|
|
340 |
wait_loaded(document)
|
340 |
341 |
]);
|
341 |
342 |
|
342 |
343 |
handle_page_actions(policy.nonce, policy_received_callback, doc_ready);
|
Fix sanitizing of non-HTML XMLDocument's