/content/sanitize_document.js - Haketilo - Hydrilla issue tracker

Download (9.16 KB) Statistics

haketilo / content / sanitize_document.js @ 6b53d6c8

       /**
        * Hachette modify HTML document as it loads and reconstruct HTML code from it
+       *
        * Copyright (C) 2021 Wojtek Kosior
        * Redistribution terms are gathered in the `copyright' file.
        */
       /*
        * IMPORTS_START
        * IMPORT gen_nonce
        * IMPORT csp_rule
        * IMPORT is_csp_header_name
        * IMPORT sanitize_csp_header
        * IMPORT sanitize_attributes
        * IMPORTS_END
        */
       /*
        * Functions that sanitize elements. The script blocking measures are, when
        * possible, going to be applied together with CSP rules injected using
        * webRequest.
        */
       const blocked = "blocked";
       function block_attribute(node, attr)
+      {
           /*
            * Disabling attributed this way allows them to still be relatively
            * easily accessed in case they contain some useful data.
            */
           const construct_name = [attr];
           while (node.hasAttribute(construct_name.join("")))
       	construct_name.unshift(blocked);
           while (construct_name.length > 1) {
       	construct_name.shift();
       	const name = construct_name.join("");
       	node.setAttribute(`${blocked}-${name}`, node.getAttribute(name));
+          }
           node.removeAttribute(attr);
+      }
       function sanitize_script(script, data)
+      {
           if (script.getAttribute("data-hachette-deleteme") === data.policy.nonce) {
       	script.remove();
       	script.hachette_deleted = true;
       	script.hachette_ignore = true;
+          }
           if (data.policy.allow)
       	return;
           block_attribute(script, "type");
           script.setAttribute("type", "application/json");
+      }
       function inject_csp(head, data)
+      {
           if (data.policy.allow)
       	return;
           const meta = document.createElement("meta");
           meta.setAttribute("http-equiv", "Content-Security-Policy");
           meta.setAttribute("content", csp_rule(data.policy.nonce));
           meta.hachette_ignore = true;
           head.prepend(meta);
           data.new_added.unshift([meta, head]);
+      }
       function sanitize_http_equiv_csp_rule(meta, data)
+      {
           const http_equiv = meta.getAttribute("http-equiv");
           const value = meta.content;
           if (!value || !is_csp_header_name(http_equiv, !data.policy.allow))
       	return;
           block_attribute(meta, "content");
           if (data.policy.allow || is_csp_header_name(http_equiv, false))
       	meta.content = sanitize_csp_header({value}, data.policy).value;
+      }
       function sanitize_node(node, data)
+      {
           if (node.tagName === "SCRIPT")
       	sanitize_script(node, data);
           if (node.tagName === "HEAD")
       	inject_csp(node, data);
           if (node.tagName === "META")
       	sanitize_http_equiv_csp_rule(node, data);
           if (!data.policy.allow)
       	sanitize_attributes(node, data);
+      }
       /*
        * Instead of calling writer directly with multiple small chunks of reconstruced
        * HTML code, we utilize `setTimeout()' to only have it called once,
        * asynchronously.
        */
       function do_write_callback(data)
+      {
           data.writer(data.chunks.join(""));
           data.chunks = [];
           if (data.finished && data.finisher)
       	data.finisher();
+      }
       function do_write(chunk, data)
+      {
           data.chunks.push(chunk);
           clearTimeout(data.write_timeout);
           data.write_timeout = setTimeout(() => do_write_callback(data), 0);
+      }
       const serializer = new XMLSerializer();
       function start_serializing_node(node, data)
+      {
           node.hachette_started = true;
           if (!data.writer)
       	return;
           const clone = node.cloneNode(false);
           clone.textContent = data.uniq;
           do_write(data.uniq_reg.exec(clone.outerHTML)[1], data);
+      }
       function finish_serializing_node(node, data)
+      {
           const nodes_to_process = [node];
           while (true) {
       	node = nodes_to_process.pop();
       	if (!node)
       	    break;
       	nodes_to_process.push(node, node.hachette_last_added);
+          }
           while (nodes_to_process.length > 0) {
       	const node = nodes_to_process.pop();
       	node.remove();
       	node.hachette_ignore = true;
       	if (!data.writer)
       	    continue;
       	if (node.hachette_started) {
       	    node.textContent = data.uniq;
       	    do_write(data.uniq_reg.exec(node.outerHTML)[2], data);
       	    continue;
+      	}
       	do_write(node.outerHTML || serializer.serializeToString(node), data);
+          }
+      }
       function process_initial_nodes(node, data)
+      {
           if (data.processed_initial_nodes)
       	return;
           data.processed_initial_nodes = true;
           start_serializing_node(data.html_root, data);
           const new_added = [];
           const nodes_to_process = [data.html_root];
           let i = 0;
           while (nodes_to_process.length > 0) {
       	let current = nodes_to_process.shift();
       	if (current.firstChild) {
       	    if (current.firstChild === node)
       		break;
       	    nodes_to_process.unshift(current.firstChild, current);
       	    new_added.push([current.firstChild, current]);
       	    continue;
+      	}
       	while (current && !current.nextSibling)
       	    current = nodes_to_process.shift();
       	if (!current || current.nextSibling === node)
       	    break;
       	nodes_to_process.unshift(current.nextSibling);
       	new_added.push([current.nextSibling, nodes_to_process[1]]);
+          }
           data.new_added.unshift(...new_added);
+      }
       /*
        * Important! Due to some weirdness node.parentElement is not alway correct
        * in MutationRecords under Chromium. Track node relations manually.
        */
       function handle_added_node(node, true_parent, data)
+      {
           /*
            * Functions we call here might cause new nodes to be injected or found
            * that require processing before the one we got in function argument.
            * We rely on those functions putting the node(s) they create/find at the
            * very beginning of the `new_added' queue and (for created nodes) setting
            * their `hachette_ignore' property, based on which their MutationRecord
            * will not be processed. A function can also mark a node already in the
            * `new_added' queue as not eligible for processing by setting its
            * `hachette_deleted' property.
            */
           process_initial_nodes(node, data);
           data.new_added.push([node, true_parent]);
           while (data.new_added.length > 0) {
       	[node, true_parent] = data.new_added.shift();
       	if (true_parent.hachette_deleted)
       	    node.hachette_deleted = true;
       	if (node.hachette_deleted)
       	    continue;
       	if (!true_parent.hachette_started)
       	    start_serializing_node(true_parent, data)
       	if (!node.hachette_ignore)
       	    sanitize_node(node, data);
       	if (node.hachette_deleted)
       	    continue;
       	if (data.node_eater)
       	    data.node_eater(node, true_parent);
       	finish_serializing_node(true_parent.hachette_last_added, data);
       	true_parent.hachette_last_added = node;
+          }
+      }
       function handle_mutation(mutations, data)
+      {
           /*
            * Chromium: for an unknown reason mutation.target is not always the same as
            * node.parentElement. The former is the correct one.
            */
           for (const mutation of mutations) {
       	for (const node of mutation.addedNodes) {
       	    /* Check for nodes added by ourselves. */
       	    if (mutation.target.hachette_ignore)
       		node.hachette_ignore = true;
       	    if (node.hachette_ignore)
       		continue;
       	    handle_added_node(node, mutation.target, data);
+      	}
+          }
+      }
       function finish_processing(data)
+      {
           process_initial_nodes(undefined, data);
           /*
            * The `finisher' callback should be called, if provided. Normally our
            * function that performs the last write does it after seeing `finished'
            * set to `true'. If, however, there's no `writer' callback and hence no
            * writes to perform, we need to take care of calling `finisher' here.
            */
           data.finished = true;
           handle_mutation(data.observer.takeRecords(), data);
           data.observer.disconnect();
           /*
            * Additional whitespace that was after `</body>' gets appended to body.
            * Although it's a minor issue, it is not what we want. There's no way to
            * tell exactly what part of that whitespace was after `</body>' and what
            * was before, so we just replace it with a single newline which looks good
            * when printed.
            */
           const body = data.html_root.lastChild;
           const text = body && body.tagName === "BODY" && body.lastChild;
           if (text && text.nodeName === "#text") {
       	const new_content = /^([\S\s]*\S)?\s*$/.exec(text.textContent)[1] || "";
       	text.textContent = new_content + "\n";
+          }
           finish_serializing_node(data.html_root, data);
           if (!data.writer && data.finisher)
       	setTimeout(data.finisher, 0);
+      }
       /*
        * This function sanitizes `html_root' according to `policy'. It is capable of
        * working on an HTML document that is being written to, sanitizing new nodes
        * as they appear.
+       *
        * `consumers' object may contain 3 optional callback functions: `writer',
        * `node_eater' and `finisher'. The first one, if present, is called with chunks
        * of reconstructed HTML code. The second one, if present, gets called for every
        * added node with 2 arguments: that node and its parent. The third one is
        * called at the end, after all processing has been done.
+       *
        * `modify_on_the_fly()' returns a callback that should be called (with no
        * arguments) once the document of html_root has finished being written to.
        * Unfortunately, due to specifics behavior of document that has had its
        * documentElement replaced
        */
       function modify_on_the_fly(html_root, policy, consumers)
+      {
           const uniq = gen_nonce();
           const uniq_reg = new RegExp(`^([\\s\\S]*)${uniq}([\\s\\S]*)$`);
           const data = {policy, html_root, uniq, uniq_reg, chunks: [], new_added: []};
           Object.assign(data, consumers);
           var observer = new MutationObserver(m => handle_mutation(m, data));
           observer.observe(data.html_root, {
            	attributes: true,
       	childList: true,
       	subtree: true
           });
           data.observer = observer;
           return () => finish_processing(data);
+      }
       /*
        * EXPORTS_START
        * EXPORT modify_on_the_fly
        * EXPORTS_END
        */

« Previous
1
…
4
5
6
Next »

(6-6/6)