Revision e2d26bad
Added by koszko almost 2 years ago
content/main.js | ||
---|---|---|
22 | 22 |
* IMPORTS_END |
23 | 23 |
*/ |
24 | 24 |
|
25 |
document.content_loaded = document.readyState === "complete"; |
|
26 |
const wait_loaded = e => e.content_loaded ? Promise.resolve() : |
|
27 |
new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true})); |
|
28 |
|
|
29 |
wait_loaded(document).then(() => document.content_loaded = true); |
|
30 |
|
|
25 | 31 |
function extract_cookie_policy(cookie, min_time) |
26 | 32 |
{ |
27 | 33 |
let best_result = {time: -1}; |
... | ... | |
86 | 92 |
} |
87 | 93 |
|
88 | 94 |
/* |
95 |
* In the case of HTML documents: |
|
89 | 96 |
* 1. When injecting some payload we need to sanitize <meta> CSP tags before |
90 | 97 |
* they reach the document. |
91 | 98 |
* 2. Only <meta> tags inside <head> are considered valid by the browser and |
92 | 99 |
* need to be considered. |
93 | 100 |
* 3. We want to detach <html> from document, wait until its <head> completes |
94 | 101 |
* loading, sanitize it and re-attach <html>. |
95 |
* 4. Browsers are eager to add <meta>'s that appear after `</head>' but before |
|
96 |
* `<body>'. Due to this behavior the `DOMContentLoaded' event is considered |
|
97 |
* unreliable (although it could still work properly, it is just problematic |
|
98 |
* to verify). |
|
99 |
* 5. We shall wait for anything to appear in or after <body> and take that as |
|
100 |
* a sign <head> has _really_ finished loading. |
|
102 |
* 4. We shall wait for anything to appear in or after <body> and take that as |
|
103 |
* a sign <head> has finished loading. |
|
104 |
* 5. Otherwise, getting the `DOMContentLoaded' event on the document shall also |
|
105 |
* be a sign that <head> is fully loaded. |
|
101 | 106 |
*/ |
102 | 107 |
|
103 | 108 |
function make_body_start_observer(DOM_element, waiting) |
... | ... | |
123 | 128 |
|
124 | 129 |
function finish_waiting(waiting) |
125 | 130 |
{ |
131 |
if (waiting.finished) |
|
132 |
return; |
|
133 |
waiting.finished = true; |
|
126 | 134 |
waiting.observers.forEach(observer => observer.disconnect()); |
127 |
waiting.doc.removeEventListener("DOMContentLoaded", waiting.loaded_cb); |
|
128 | 135 |
setTimeout(waiting.callback, 0); |
129 | 136 |
} |
130 | 137 |
|
... | ... | |
132 | 139 |
{ |
133 | 140 |
const waiting = {doc, detached_html, callback, observers: []}; |
134 | 141 |
|
135 |
/* |
|
136 |
* For XML and SVG documents, instead of waiting for `<head>', we wait |
|
137 |
* for the entire document to finish loading. |
|
138 |
*/ |
|
139 |
if (doc instanceof HTMLDocument) { |
|
140 |
if (try_body_started(waiting)) |
|
141 |
return; |
|
142 |
if (try_body_started(waiting)) |
|
143 |
return; |
|
142 | 144 |
|
143 |
waiting.observers = [make_body_start_observer(detached_html, waiting)]; |
|
144 |
} |
|
145 |
waiting.observers = [make_body_start_observer(detached_html, waiting)]; |
|
145 | 146 |
|
146 |
waiting.loaded_cb = () => finish_waiting(waiting); |
|
147 |
doc.addEventListener("DOMContentLoaded", waiting.loaded_cb); |
|
147 |
wait_loaded(doc).then(() => finish_waiting(waiting)); |
|
148 | 148 |
} |
149 | 149 |
|
150 | 150 |
function wait_for_head(doc, detached_html) |
... | ... | |
154 | 154 |
|
155 | 155 |
const blocked_str = "blocked"; |
156 | 156 |
|
157 |
function block_attribute(node, attr) |
|
157 |
function block_attribute(node, attr, ns=null)
|
|
158 | 158 |
{ |
159 |
const [hasa, geta, seta, rema] = ["has", "get", "set", "remove"] |
|
160 |
.map(m => (n, ...args) => typeof ns === "string" ? |
|
161 |
n[`${m}AttributeNS`](ns, ...args) : n[`${m}Attribute`](...args)); |
|
159 | 162 |
/* |
160 |
* Disabling attributes this way allows them to still be relatively
|
|
161 |
* easily accessed in case they contain some useful data. |
|
163 |
* Disabling attributes by prepending `-blocked' allows them to still be
|
|
164 |
* relatively easily accessed in case they contain some useful data.
|
|
162 | 165 |
*/ |
163 | 166 |
const construct_name = [attr]; |
164 |
while (node.hasAttribute(construct_name.join("")))
|
|
167 |
while (hasa(node, construct_name.join("")))
|
|
165 | 168 |
construct_name.unshift(blocked_str); |
166 | 169 |
|
167 | 170 |
while (construct_name.length > 1) { |
168 | 171 |
construct_name.shift(); |
169 | 172 |
const name = construct_name.join(""); |
170 |
node.setAttribute(`${blocked_str}-${name}`, node.getAttribute(name));
|
|
173 |
seta(node, `${blocked_str}-${name}`, geta(node, name));
|
|
171 | 174 |
} |
172 |
|
|
173 |
node.removeAttribute(attr); |
|
174 | 175 |
} |
175 | 176 |
|
176 | 177 |
function sanitize_meta(meta, policy) |
177 | 178 |
{ |
178 |
const http_equiv = meta.getAttribute("http-equiv"); |
|
179 |
const value = meta.content; |
|
179 |
const value = meta.content || ""; |
|
180 | 180 |
|
181 |
if (!value || !is_csp_header_name(http_equiv, true))
|
|
181 |
if (!value || !is_csp_header_name(meta.httpEquiv || "", true))
|
|
182 | 182 |
return; |
183 | 183 |
|
184 | 184 |
block_attribute(meta, "content"); |
185 |
|
|
186 |
if (is_csp_header_name(http_equiv, false)) |
|
187 |
meta.content = sanitize_csp_header({value}, policy).value; |
|
188 | 185 |
} |
189 | 186 |
|
187 |
/* |
|
188 |
* Used to disable <script> that has not yet been added to live DOM (doesn't |
|
189 |
* work for those already added). |
|
190 |
*/ |
|
190 | 191 |
function sanitize_script(script) |
191 | 192 |
{ |
192 |
script.hachette_blocked_type = script.type;
|
|
193 |
script.hachette_blocked_type = script.getAttribute("type");
|
|
193 | 194 |
script.type = "text/plain"; |
194 | 195 |
} |
195 | 196 |
|
... | ... | |
201 | 202 |
{ |
202 | 203 |
script.setAttribute("type", script.hachette_blocked_type); |
203 | 204 |
|
204 |
if (script.hachette_blocked_type === undefined)
|
|
205 |
if (script.hachette_blocked_type === null)
|
|
205 | 206 |
script.removeAttribute("type"); |
206 | 207 |
|
207 | 208 |
delete script.hachette_blocked_type; |
208 | 209 |
} |
209 | 210 |
|
210 |
function apply_hachette_csp_rules(doc, head, policy) |
|
211 |
{ |
|
212 |
const meta = doc.createElement("meta"); |
|
213 |
meta.setAttribute("http-equiv", "Content-Security-Policy"); |
|
214 |
meta.setAttribute("content", csp_rule(policy.nonce)); |
|
215 |
head.append(meta); |
|
216 |
/* CSP is already in effect, we can remove the <meta> now. */ |
|
217 |
meta.remove(); |
|
218 |
} |
|
219 |
|
|
211 |
const bad_url_reg = /^data:([^,;]*ml|unknown-content-type)/i; |
|
220 | 212 |
function sanitize_urls(element) |
221 | 213 |
{ |
222 |
for (const attribute of [...element.attributes]) { |
|
223 |
if (/^(href|src|data)$/i.test(attribute.localName) && |
|
224 |
/^data:([^,;]*ml|unknown-content-type)/i.test(attribute.value)) |
|
225 |
block_attribute(element, attribute.localName); |
|
226 |
} |
|
214 |
for (const attr of [...element.attributes || []] |
|
215 |
.filter(attr => /^(href|src|data)$/i.test(attr.localName)) |
|
216 |
.filter(attr => bad_url_reg.test(attr.value))) |
|
217 |
block_attribute(element, attr.localName, attr.namespaceURI); |
|
227 | 218 |
} |
228 | 219 |
|
229 | 220 |
function start_data_urls_sanitizing(doc) |
230 | 221 |
{ |
231 | 222 |
doc.querySelectorAll("*[href], *[src], *[data]").forEach(sanitize_urls); |
232 |
const mutation_handler = m => m.addedNodes.forEach(sanitize_urls); |
|
233 |
const mo = new MutationObserver(ms => ms.forEach(mutation_handler)); |
|
234 |
mo.observe(doc, {childList: true, subtree: true}); |
|
223 |
if (!doc.content_loaded) { |
|
224 |
const mutation_handler = m => m.addedNodes.forEach(sanitize_urls); |
|
225 |
const mo = new MutationObserver(ms => ms.forEach(mutation_handler)); |
|
226 |
mo.observe(doc, {childList: true, subtree: true}); |
|
227 |
wait_loaded(doc).then(() => mo.disconnect()); |
|
228 |
} |
|
235 | 229 |
} |
236 | 230 |
|
237 |
function apply_intrinsics_sanitizing(root_element) |
|
231 |
/* |
|
232 |
* Normally, we block scripts with CSP. However, Mozilla does optimizations that |
|
233 |
* cause part of the DOM to be loaded when our content scripts get to run. Thus, |
|
234 |
* before the CSP rules we inject (for non-HTTP pages) become effective, we need |
|
235 |
* to somehow block the execution of `<script>'s and intrinsics that were |
|
236 |
* already there. |
|
237 |
*/ |
|
238 |
function mozilla_initial_block(doc) |
|
238 | 239 |
{ |
239 |
for (const subelem of root_element.querySelectorAll("*")) { |
|
240 |
[...subelem.attributes] |
|
241 |
.filter(a => /^on/i.test(a.localName)) |
|
242 |
.filter(a => /^javascript:/i.test(a.value)) |
|
243 |
.forEach(a => block_attribute(subelem, a.localName)); |
|
244 |
} |
|
240 |
const blocker = e => e.preventDefault(); |
|
241 |
doc.addEventListener("beforescriptexecute", blocker); |
|
242 |
setTimeout(() => doc.removeEventListener("beforescriptexecute", blocker)); |
|
243 |
|
|
244 |
[...doc.all].flatMap(ele => [...ele.attributes].map(attr => [ele, attr])) |
|
245 |
.map(([ele, attr]) => [ele, attr.localName]) |
|
246 |
.filter(([ele, attr]) => /^on/.test(attr) && ele.wrappedJSObject[attr]) |
|
247 |
.forEach(([ele, attr]) => ele.wrappedJSObject[attr] = null); |
|
245 | 248 |
} |
246 | 249 |
|
250 |
/* |
|
251 |
* Here we block all scripts of a document which might be either and |
|
252 |
* HTMLDocument or an XMLDocument. Modifying an XML document might disrupt |
|
253 |
* Mozilla's XML preview. This is an unfortunate thing we have to accept for |
|
254 |
* now. XML documents *have to* be sanitized as well because they might |
|
255 |
* contain `<script>' tags (or on* attributes) with namespace declared as |
|
256 |
* "http://www.w3.org/1999/xhtml" or "http://www.w3.org/2000/svg" which allows |
|
257 |
* javascript execution. |
|
258 |
*/ |
|
247 | 259 |
async function sanitize_document(doc, policy) |
248 | 260 |
{ |
249 | 261 |
/* |
250 | 262 |
* Blocking of scripts that are in the DOM from the beginning. Needed for |
251 |
* Mozilla, harmless on Chromium. |
|
252 |
* Note that at least in SVG documents the `src' attr on `<script>'s seems |
|
253 |
* to be ignored by Firefox, so we don't need to sanitize it. |
|
263 |
* Mozilla. |
|
254 | 264 |
*/ |
255 |
for (const script of document.getElementsByTagName("script")) { |
|
256 |
const old_children = [...script.childNodes]; |
|
257 |
script.innerHTML = ""; |
|
258 |
setTimeout(() => old_children.forEach(c => script.append(c)), 0); |
|
259 |
} |
|
265 |
if (is_mozilla) |
|
266 |
mozilla_initial_block(doc); |
|
260 | 267 |
|
261 | 268 |
/* |
262 | 269 |
* Ensure our CSP rules are employed from the beginning. This CSP injection |
263 | 270 |
* method is, when possible, going to be applied together with CSP rules |
264 | 271 |
* injected using webRequest. |
265 |
* For non-HTML documents this is just a dummy operation of adding and
|
|
266 |
* removing `head'.
|
|
272 |
* Using elements namespaced as HTML makes this CSP injection also work for
|
|
273 |
* non-HTML documents.
|
|
267 | 274 |
*/ |
268 |
let added_head = doc.createElement("head"); |
|
269 |
if (!doc.head) |
|
270 |
doc.documentElement.prepend(added_head); |
|
271 |
|
|
272 |
apply_hachette_csp_rules(doc, added_head, policy); |
|
273 |
|
|
274 |
/* Proceed with DOM in its initial state. */ |
|
275 |
added_head.remove(); |
|
275 |
const html = new DOMParser().parseFromString(`<html><head><meta \ |
|
276 |
http-equiv="Content-Security-Policy" content="${csp_rule(policy.nonce)}"\ |
|
277 |
/></head><body>Loading...</body></html>`, "text/html").documentElement; |
|
276 | 278 |
|
277 | 279 |
/* |
278 |
* <html> node gets hijacked now, to be re-attached after <head> is loaded
|
|
280 |
* Root node gets hijacked now, to be re-attached after <head> is loaded
|
|
279 | 281 |
* and sanitized. |
280 | 282 |
*/ |
281 |
const old_html = doc.documentElement; |
|
282 |
const new_html = doc.createElement("html"); |
|
283 |
old_html.replaceWith(new_html); |
|
283 |
const root = doc.documentElement; |
|
284 |
root.replaceWith(html); |
|
284 | 285 |
|
285 |
await wait_for_head(doc, old_html); |
|
286 |
|
|
287 |
for (const meta of old_html.querySelectorAll("head meta")) |
|
288 |
sanitize_meta(meta, policy); |
|
289 |
|
|
290 |
for (const script of old_html.querySelectorAll("script")) |
|
291 |
sanitize_script(script, policy); |
|
292 |
|
|
293 |
if (!(doc instanceof HTMLDocument)) |
|
294 |
apply_intrinsics_sanitizing(old_html); |
|
286 |
/* |
|
287 |
* For XML documents, we don't intend to inject payload, so we neither block |
|
288 |
* document's CSP `<meta>' tags nor wait for `<head>' to be parsed. |
|
289 |
*/ |
|
290 |
if (document instanceof HTMLDocument) { |
|
291 |
await wait_for_head(doc, root); |
|
295 | 292 |
|
296 |
new_html.replaceWith(old_html); |
|
293 |
root.querySelectorAll("head meta") |
|
294 |
.forEach(m => sanitize_meta(m, policy)); |
|
295 |
} |
|
297 | 296 |
|
298 |
for (const script of old_html.querySelectorAll("script")) |
|
299 |
desanitize_script(script, policy); |
|
297 |
root.querySelectorAll("script").forEach(s => sanitize_script(s, policy)); |
|
298 |
html.replaceWith(root); |
|
299 |
root.querySelectorAll("script").forEach(s => desanitize_script(s, policy)); |
|
300 | 300 |
|
301 | 301 |
start_data_urls_sanitizing(doc); |
302 | 302 |
} |
... | ... | |
329 | 329 |
} |
330 | 330 |
|
331 | 331 |
if (!policy) { |
332 |
console.warn("Using fallback policy!");
|
|
332 |
console.debug("Using fallback policy!");
|
|
333 | 333 |
policy = {allow: false, nonce: gen_nonce()}; |
334 | 334 |
} |
335 | 335 |
|
336 |
console.debug("current policy", policy); |
|
337 |
|
|
336 | 338 |
const doc_ready = Promise.all([ |
337 |
policy.allow ? Promise.resolve : sanitize_document(document, policy), |
|
338 |
new Promise(cb => document.addEventListener("DOMContentLoaded", |
|
339 |
cb, {once: true})) |
|
339 |
policy.allow ? Promise.resolve() : sanitize_document(document, policy), |
|
340 |
wait_loaded(document) |
|
340 | 341 |
]); |
341 | 342 |
|
342 | 343 |
handle_page_actions(policy.nonce, policy_received_callback, doc_ready); |
Also available in: Unified diff
Fix sanitizing of non-HTML XMLDocument's