/content/sanitize_document.js - Diff - Haketilo - Hydrilla issue tracker

« Previous | Next »

Revision 6b53d6c8

Added by koszko about 2 years ago

ID 6b53d6c840140fc5df6d7638808b978d96502a35
Parent d09b7ee1
Child 2875397f, 44958e6a

use StreamFilter under Mozilla to prevent csp tags from blocking our injected scripts

         node.removeAttribute(attr);
+    }
     function sanitize_script(script, policy)
     function sanitize_script(script, data)
+    {
         if (policy.allow)
         if (script.getAttribute("data-hachette-deleteme") === data.policy.nonce) {
     	script.remove();
     	script.hachette_deleted = true;
     	script.hachette_ignore = true;
+        }
         if (data.policy.allow)
     	return;
         block_attribute(script, "type");
         script.setAttribute("type", "application/json");
+    }
     function inject_csp(head, policy)
     function inject_csp(head, data)
+    {
         if (policy.allow)
         if (data.policy.allow)
     	return;
         const meta = document.createElement("meta");
         meta.setAttribute("http-equiv", "Content-Security-Policy");
         meta.setAttribute("content", csp_rule(policy.nonce));
         meta.setAttribute("content", csp_rule(data.policy.nonce));
         meta.hachette_ignore = true;
         head.prepend(meta);
         data.new_added.unshift([meta, head]);
+    }
     function sanitize_http_equiv_csp_rule(meta, policy)
     function sanitize_http_equiv_csp_rule(meta, data)
+    {
         const http_equiv = meta.getAttribute("http-equiv");
         const value = meta.content;
         if (!is_csp_header_name(http_equiv, !policy.allow))
         if (!value || !is_csp_header_name(http_equiv, !data.policy.allow))
     	return;
         if (policy.allow || is_csp_header_name(http_equiv, false)) {
     	let value = meta.getAttribute("content");
     	block_attribute(meta, "content");
     	if (value) {
     	    value = sanitize_csp_header({value}, policy).value;
     	    meta.setAttribute("content", value);
+    	}
     	return;
+        }
         block_attribute(meta, "content");
         block_attribute(meta, "http-equiv");
         if (data.policy.allow || is_csp_header_name(http_equiv, false))
     	meta.content = sanitize_csp_header({value}, data.policy).value;
+    }
     function sanitize_node(node, policy)
     function sanitize_node(node, data)
+    {
         if (node.tagName === "SCRIPT")
     	sanitize_script(node, policy);
     	sanitize_script(node, data);
         if (node.tagName === "HEAD")
     	inject_csp(node, policy);
     	inject_csp(node, data);
         if (node.tagName === "META")
     	sanitize_http_equiv_csp_rule(node, policy);
     	sanitize_http_equiv_csp_rule(node, data);
         if (!data.policy.allow)
     	sanitize_attributes(node, data);
+    }
         if (!policy.allow)
     	sanitize_attributes(node, policy);
     /*
      * Instead of calling writer directly with multiple small chunks of reconstruced
      * HTML code, we utilize `setTimeout()' to only have it called once,
      * asynchronously.
      */
     function do_write_callback(data)
+    {
         data.writer(data.chunks.join(""));
         data.chunks = [];
         if (data.finished && data.finisher)
     	data.finisher();
+    }
     function do_write(chunk, data)
+    {
         data.chunks.push(chunk);
         clearTimeout(data.write_timeout);
         data.write_timeout = setTimeout(() => do_write_callback(data), 0);
+    }
     const serializer = new XMLSerializer();
     function start_node(node, data)
     function start_serializing_node(node, data)
+    {
         node.hachette_started = true;
         if (!data.writer)
     	return;
         node.hachette_started = true;
         const clone = node.cloneNode(false);
         clone.textContent = data.uniq;
         data.writer(data.uniq_reg.exec(clone.outerHTML)[1]);
         do_write(data.uniq_reg.exec(clone.outerHTML)[1], data);
+    }
     function finish_node(node, data)
     function finish_serializing_node(node, data)
+    {
         const nodes_to_process = [node];
-...
         while (nodes_to_process.length > 0) {
     	const node = nodes_to_process.pop();
     	node.remove();
     	node.hachette_ignore = true;
     	if (!data.writer)
     	    continue;
     	if (node.hachette_started) {
     	    node.textContent = data.uniq;
     	    data.writer(data.uniq_reg.exec(node.outerHTML)[2]);
     	    do_write(data.uniq_reg.exec(node.outerHTML)[2], data);
     	    continue;
+    	}
     	do_write(node.outerHTML || serializer.serializeToString(node), data);
+        }
+    }
     function process_initial_nodes(node, data)
+    {
         if (data.processed_initial_nodes)
     	return;
         data.processed_initial_nodes = true;
         start_serializing_node(data.html_root, data);
         const new_added = [];
         const nodes_to_process = [data.html_root];
         let i = 0;
         while (nodes_to_process.length > 0) {
     	let current = nodes_to_process.shift();
     	if (current.firstChild) {
     	    if (current.firstChild === node)
     		break;
     	    nodes_to_process.unshift(current.firstChild, current);
     	    new_added.push([current.firstChild, current]);
     	    continue;
+    	}
     	data.writer(node.outerHTML || serializer.serializeToString(node));
     	while (current && !current.nextSibling)
     	    current = nodes_to_process.shift();
     	if (!current || current.nextSibling === node)
     	    break;
     	nodes_to_process.unshift(current.nextSibling);
     	new_added.push([current.nextSibling, nodes_to_process[1]]);
+        }
         data.new_added.unshift(...new_added);
+    }
     /*
      * Important! Due to some weirdness node.parentElement is not alway correct
      * under Chromium. Track node relations manually.
      * in MutationRecords under Chromium. Track node relations manually.
      */
     function handle_added_node(node, true_parent, data)
+    {
         if (node.hachette_ignore || true_parent.hachette_ignore)
     	return;
         /*
          * Functions we call here might cause new nodes to be injected or found
          * that require processing before the one we got in function argument.
          * We rely on those functions putting the node(s) they create/find at the
          * very beginning of the `new_added' queue and (for created nodes) setting
          * their `hachette_ignore' property, based on which their MutationRecord
          * will not be processed. A function can also mark a node already in the
          * `new_added' queue as not eligible for processing by setting its
          * `hachette_deleted' property.
          */
         if (!true_parent.hachette_started)
     	start_node(true_parent, data)
         process_initial_nodes(node, data);
         sanitize_node(node, data.policy);
         data.new_added.push([node, true_parent]);
         if (data.node_eater)
     	data.node_eater(node, true_parent);
         while (data.new_added.length > 0) {
     	[node, true_parent] = data.new_added.shift();
         finish_node(true_parent.hachette_last_added, data);
     	if (true_parent.hachette_deleted)
     	    node.hachette_deleted = true;
     	if (node.hachette_deleted)
     	    continue;
     	if (!true_parent.hachette_started)
     	    start_serializing_node(true_parent, data)
     	if (!node.hachette_ignore)
     	    sanitize_node(node, data);
     	if (node.hachette_deleted)
     	    continue;
     	if (data.node_eater)
     	    data.node_eater(node, true_parent);
         true_parent.hachette_last_added = node;
     	finish_serializing_node(true_parent.hachette_last_added, data);
     	true_parent.hachette_last_added = node;
+        }
+    }
     function handle_mutation(mutations, data)
-...
          * node.parentElement. The former is the correct one.
          */
         for (const mutation of mutations) {
     	for (const node of mutation.addedNodes)
     	for (const node of mutation.addedNodes) {
     	    /* Check for nodes added by ourselves. */
     	    if (mutation.target.hachette_ignore)
     		node.hachette_ignore = true;
     	    if (node.hachette_ignore)
     		continue;
     	    handle_added_node(node, mutation.target, data);
+    	}
+        }
+    }
     function finish_processing(data)
+    {
         process_initial_nodes(undefined, data);
         /*
          * The `finisher' callback should be called, if provided. Normally our
          * function that performs the last write does it after seeing `finished'
          * set to `true'. If, however, there's no `writer' callback and hence no
          * writes to perform, we need to take care of calling `finisher' here.
          */
         data.finished = true;
         handle_mutation(data.observer.takeRecords(), data);
         finish_node(data.html_element, data);
         data.observer.disconnect();
         /*
          * Additional whitespace that was after `</body>' gets appended to body.
          * Although it's a minor issue, it is not what we want. There's no way to
          * tell exactly what part of that whitespace was after `</body>' and what
          * was before, so we just replace it with a single newline which looks good
          * when printed.
          */
         const body = data.html_root.lastChild;
         const text = body && body.tagName === "BODY" && body.lastChild;
         if (text && text.nodeName === "#text") {
     	const new_content = /^([\S\s]*\S)?\s*$/.exec(text.textContent)[1] || "";
     	text.textContent = new_content + "\n";
+        }
         finish_serializing_node(data.html_root, data);
         if (!data.writer && data.finisher)
     	setTimeout(data.finisher, 0);
+    }
     function modify_on_the_fly(html_element, policy, consumers)
     /*
      * This function sanitizes `html_root' according to `policy'. It is capable of
      * working on an HTML document that is being written to, sanitizing new nodes
      * as they appear.
+     *
      * `consumers' object may contain 3 optional callback functions: `writer',
      * `node_eater' and `finisher'. The first one, if present, is called with chunks
      * of reconstructed HTML code. The second one, if present, gets called for every
      * added node with 2 arguments: that node and its parent. The third one is
      * called at the end, after all processing has been done.
+     *
      * `modify_on_the_fly()' returns a callback that should be called (with no
      * arguments) once the document of html_root has finished being written to.
      * Unfortunately, due to specifics behavior of document that has had its
      * documentElement replaced
      */
     function modify_on_the_fly(html_root, policy, consumers)
+    {
         const uniq = gen_nonce();
         const uniq_reg = new RegExp(`^(.*)${uniq}(.*)$`);
         const data = {policy, html_element, uniq, uniq_reg, ...consumers};
         start_node(data.html_element, data);
         const uniq_reg = new RegExp(`^([\\s\\S]*)${uniq}([\\s\\S]*)$`);
         const data = {policy, html_root, uniq, uniq_reg, chunks: [], new_added: []};
         Object.assign(data, consumers);
         var observer = new MutationObserver(m => handle_mutation(m, data));
         observer.observe(data.html_element, {
         observer.observe(data.html_root, {
          	attributes: true,
     	childList: true,
     	subtree: true

Also available in: Unified diff