/ - Diff - Haketilo - Hydrilla issue tracker

« Previous | Next »

Revision 6b53d6c8

Added by koszko about 2 years ago

ID 6b53d6c840140fc5df6d7638808b978d96502a35
Parent d09b7ee1
Child 2875397f, 44958e6a

use StreamFilter under Mozilla to prevent csp tags from blocking our injected scripts

      * IMPORT get_storage
      * IMPORT start_storage_server
      * IMPORT start_page_actions_server
      * IMPORT start_policy_injector
      * IMPORT browser
      * IMPORT is_privileged_url
      * IMPORT query_best
      * IMPORT gen_nonce
      * IMPORT inject_csp_headers
      * IMPORT apply_stream_filter
      * IMPORT is_chrome
      * IMPORTS_END
      */
     start_storage_server();
     start_page_actions_server();
     start_policy_injector();
     async function init_ext(install_details)
+    {
         console.log("details:", install_details);
         if (install_details.reason != "install")
     	return;
-...
     browser.runtime.onInstalled.addListener(init_ext);
     console.log("hello, hachette");
     let storage;
     function on_headers_received(details)
+    {
         const url = details.url;
         if (is_privileged_url(details.url))
     	return;
         const [pattern, settings] = query_best(storage, details.url);
         const allow = !!(settings && settings.allow);
         const nonce = gen_nonce();
         const policy = {allow, url, nonce};
         let headers = details.responseHeaders;
         let skip = false;
         for (const header of headers) {
     	if ((header.name.toLowerCase().trim() === "content-disposition" &&
     	     /^\s*attachment\s*(;.*)$/i.test(header.value)))
     	    skip = true;
+        }
         headers = inject_csp_headers(details, headers, policy);
         skip = skip || (details.statusCode >= 300 && details.statusCode < 400);
         if (!skip) {
     	/* Check for API availability. */
     	if (browser.webRequest.filterResponseData)
     	    headers = apply_stream_filter(details, headers, policy);
+        }
         return {responseHeaders: headers};
+    }
     async function start_webRequest_operations()
+    {
         storage = await get_storage();
         const extra_opts = ["blocking", "responseHeaders"];
         if (is_chrome)
     	extra_opts.push("extraHeaders");
         browser.webRequest.onHeadersReceived.addListener(
     	on_headers_received,
     	{urls: ["<all_urls>"], types: ["main_frame", "sub_frame"]},
     	extra_opts
         );
+    }
     start_webRequest_operations();

     /*
      * IMPORTS_START
      * IMPORT get_storage
      * IMPORT browser
      * IMPORT is_chrome
      * IMPORT gen_nonce
      * IMPORT is_privileged_url
      * IMPORT sign_data
      * IMPORT extract_signed
      * IMPORT query_best
      * IMPORT sanitize_csp_header
      * IMPORT csp_rule
      * IMPORT is_csp_header_name
      * IMPORTS_END
      */
     var storage;
     function headers_inject(details)
     function inject_csp_headers(details, headers, policy)
+    {
         const url = details.url;
         if (is_privileged_url(url))
     	return;
         const [pattern, settings] = query_best(storage, url);
         const allow = !!(settings && settings.allow);
         const nonce = gen_nonce();
         let orig_csp_headers;
         let old_signature;
         let hachette_header;
         let headers = details.responseHeaders;
         for (const header of headers.filter(h => h.name === "x-hachette")) {
     	const match = /^([^%])(%.*)$/.exec(header.value);
-...
     	/* Confirmed- it's the originals, smuggled in! */
     	orig_csp_headers = old_data.csp_headers;
     	old_signature = old_data.policy_signature;
     	old_signature = old_data.policy_sig;
     	hachette_header = header;
     	break;
-...
     	headers.filter(h => is_csp_header_name(h.name));
         /* When blocking remove report-only CSP headers that snitch on us. */
         headers = headers.filter(h => !is_csp_header_name(h.name, !allow));
         headers = headers.filter(h => !is_csp_header_name(h.name, !policy.allow));
         if (old_signature)
     	headers = headers.filter(h => h.name.search(old_signature) === -1);
         const policy_object = {allow, nonce, url};
         const sanitizer = h => sanitize_csp_header(h, policy_object);
         const sanitizer = h => sanitize_csp_header(h, policy);
         headers.push(...orig_csp_headers.map(sanitizer));
         const policy = encodeURIComponent(JSON.stringify(policy_object));
         const policy_signature = sign_data(policy, new Date());
         const policy_str = encodeURIComponent(JSON.stringify(policy));
         const policy_sig = sign_data(policy_str, new Date());
         const later_30sec = new Date(new Date().getTime() + 30000).toGMTString();
         headers.push({
     	name: "Set-Cookie",
     	value: `hachette-${policy_signature}=${policy}; Expires=${later_30sec};`
     	value: `hachette-${policy_sig}=${policy_str}; Expires=${later_30sec};`
         });
         /*
-...
          * These are signed with a time of 0, as it's not clear there is a limit on
          * how long Firefox might retain headers in the cache.
          */
         let hachette_data = {csp_headers: orig_csp_headers, policy_signature, url};
         let hachette_data = {csp_headers: orig_csp_headers, policy_sig, url};
         hachette_data = encodeURIComponent(JSON.stringify(hachette_data));
         hachette_header.value = sign_data(hachette_data, 0) + hachette_data;
         /* To ensure there is a CSP header if required */
         if (!allow)
     	headers.push({name: "content-security-policy", value: csp_rule(nonce)});
         if (!policy.allow)
     	headers.push({
     	    name: "content-security-policy",
     	    value: csp_rule(policy.nonce)
     	});
         return {responseHeaders: headers};
+    }
     async function start_policy_injector()
+    {
         storage = await get_storage();
         let extra_opts = ["blocking", "responseHeaders"];
         if (is_chrome)
     	extra_opts.push("extraHeaders");
         browser.webRequest.onHeadersReceived.addListener(
     	headers_inject,
+    	{
     	    urls: ["<all_urls>"],
     	    types: ["main_frame", "sub_frame"]
     	},
     	extra_opts
         );
         return headers;
+    }
     /*
      * EXPORTS_START
      * EXPORT start_policy_injector
      * EXPORT inject_csp_headers
      * EXPORTS_END
      */

     /**
      * Hachette modifying a web page using the StreamFilter API
+     *
      * Copyright (C) 2018 Giorgio Maone <giorgio@maone.net>
      * Copyright (C) 2021 Wojtek Kosior
      * Redistribution terms are gathered in the `copyright' file.
+     *
      * Derived from `bg/ResponseProcessor.js' and `bg/ResponseMetaData.js'
      * in LibreJS.
      */
     /*
      * IMPORTS_START
      * IMPORT browser
      * IMPORTS_END
      */
     function validate_encoding(charset)
+    {
         try {
     	new TextDecoder();
     	return charset;
         } catch(e) {
     	return undefined;
+        }
+    }
     function is_content_type_header(header)
+    {
         header.name.toLowerCase().trim() === "content-type";
+    }
     const charset_reg = /;\s*charset\s*=\s*([\w-]+)/i;
     function properties_from_headers(headers)
+    {
         const properties = {};
         for (const header of headers.filter(is_content_type_header)) {
     	const match = charset_reg.exec(header.value);
     	if (!properties.detected_charset && validate_encoding(match[1]))
     	    properties.detected_charset = match[1];
     	if (/html/i.test(header.value))
     	    properties.html = true;
+        }
         return properties;
+    }
     const UTF8_BOM = [0xef, 0xbb, 0xbf];
     const BOMs = [
         [UTF8_BOM, "utf-8"],
         [[0xfe, 0xff], "utf-16be"],
         [[0xff, 0xfe], "utf-16le"]
     ];
     function charset_from_BOM(data)
+    {
         for (const [BOM, charset] of BOMs) {
     	if (BOM.reduce((ac, byte, i) => ac && byte === data[i], true))
     	    return charset;
+        }
         return "";
+    }
     const charset_attrs =
           ['charset', 'http-equiv="content-type"', 'content*="charset"'];
     const charset_meta_selector =
           charset_attrs.map(a => `head>meta[${a}]`).join(", ");
     function charset_from_meta_tags(doc)
+    {
         for (const meta of doc.querySelectorAll(charset_meta_selector)) {
     	const maybe_charset = meta.getAttribute("charset");
     	if (maybe_charset && validate_encoding(maybe_charset))
     	    return maybe_charset;
             const match = charset_reg.exec(meta.getAttribute("content"));
             if (match && validate_encoding(match[1]))
     	    return match[1];
+        }
         return undefined;
+    }
     function create_decoder(properties, data)
+    {
         let charset = charset_from_BOM(data) || properties.detected_charset;
         if (!charset && data.indexOf(0) !== -1) {
             console.debug("Warning: zeroes in bytestream, probable cached encoding mismatch. Trying to decode it as UTF-16.",
     		      properties);
     	return new TextDecoder("utf-16be");
+        }
         /* Missing HTTP charset, sniffing in content... */
         /*
          * TODO: I recall there is some standard saying how early in the doc the
          * charset has to be specified. We could process just this part of data.
          */
         const text = new TextDecoder("latin1").decode(data, {stream: true});
         properties.html = properties.html || /html/i.test(text);
         if (properties.html) {
     	const tmp_doc = new DOMParser().parseFromString(text, "text/html");
     	charset = charset_from_meta_tags(tmp_doc);
+        }
         return new TextDecoder(charset || "latin1");
+    }
     function filter_data(properties, event)
+    {
         const data = new Uint8Array(event.data);
         let first_chunk = false;
         if (!properties.decoder) {
     	first_chunk = true;
     	properties.decoder = create_decoder(properties, data);
     	properties.encoder = new TextEncoder();
     	/* Force UTF-8, this is the only encoding we can produce. */
     	properties.filter.write(new Uint8Array(UTF8_BOM));
+        }
         let decoded = properties.decoder.decode(data);
         if (first_chunk) {
     	/*
     	 * HAX! Our content scripts that execute at `document_start' will always
     	 * run before the first script in the document, but under Mozilla some
     	 * `<meta>' tags might already be loaded at that point. Here we inject a
     	 * dummy `<script>' at the beginning (before any `<meta>' tags) that
     	 * will force `document_start' to happen earlier. This way our content
     	 * scripts will be able to sanitize `http-equiv' tags with CSP rules
     	 * that would otherwise stop our injected scripts from executing.
     	 */
     	const dummy_script =
     	      `<script data-hachette-deleteme="${properties.policy.nonce}" nonce="${properties.policy.nonce}">null</script>`;
     	const doctype_decl = /^(\s*<!doctype[^<>"']*>)?/i.exec(decoded)[0];
     	decoded = doctype_decl + dummy_script +
     	    decoded.substring(doctype_decl.length);
+        }
         properties.filter.write(properties.encoder.encode(decoded));
         if (properties.decoder.encoding === "utf-8")
     	properties.filter.disconnect();
+    }
     function apply_stream_filter(details, headers, policy)
+    {
         if (policy.allow)
     	return headers;
         const properties = properties_from_headers(headers);
         properties.policy = policy;
         properties.filter =
     	browser.webRequest.filterResponseData(details.requestId);
         properties.filter.ondata = event => filter_data(properties, event);
         properties.filter.onstop = () => properties.filter.close();
         /*
          * In the future we might consider modifying the headers that specify
          * encoding. For now we are not yet doing it, though. However, we
          * prepend the data with UTF-8 BOM which should be enough.
          */
         return headers;
+    }
     /*
      * EXPORTS_START
      * EXPORT apply_stream_filter
      * EXPORTS_END
      */

         handle_page_actions(policy.nonce);
         if (!policy.allow && is_mozilla)
     	addEventListener('beforescriptexecute', mozilla_suppress_scripts, true);
         if (!policy.allow && is_chrome) {
         if (!policy.allow) {
     	const old_html = document.documentElement;
     	const new_html = document.createElement("html");
     	old_html.replaceWith(new_html);

         node.removeAttribute(attr);
+    }
     function sanitize_script(script, policy)
     function sanitize_script(script, data)
+    {
         if (policy.allow)
         if (script.getAttribute("data-hachette-deleteme") === data.policy.nonce) {
     	script.remove();
     	script.hachette_deleted = true;
     	script.hachette_ignore = true;
+        }
         if (data.policy.allow)
     	return;
         block_attribute(script, "type");
         script.setAttribute("type", "application/json");
+    }
     function inject_csp(head, policy)
     function inject_csp(head, data)
+    {
         if (policy.allow)
         if (data.policy.allow)
     	return;
         const meta = document.createElement("meta");
         meta.setAttribute("http-equiv", "Content-Security-Policy");
         meta.setAttribute("content", csp_rule(policy.nonce));
         meta.setAttribute("content", csp_rule(data.policy.nonce));
         meta.hachette_ignore = true;
         head.prepend(meta);
         data.new_added.unshift([meta, head]);
+    }
     function sanitize_http_equiv_csp_rule(meta, policy)
     function sanitize_http_equiv_csp_rule(meta, data)
+    {
         const http_equiv = meta.getAttribute("http-equiv");
         const value = meta.content;
         if (!is_csp_header_name(http_equiv, !policy.allow))
         if (!value || !is_csp_header_name(http_equiv, !data.policy.allow))
     	return;
         if (policy.allow || is_csp_header_name(http_equiv, false)) {
     	let value = meta.getAttribute("content");
     	block_attribute(meta, "content");
     	if (value) {
     	    value = sanitize_csp_header({value}, policy).value;
     	    meta.setAttribute("content", value);
+    	}
     	return;
+        }
         block_attribute(meta, "content");
         block_attribute(meta, "http-equiv");
         if (data.policy.allow || is_csp_header_name(http_equiv, false))
     	meta.content = sanitize_csp_header({value}, data.policy).value;
+    }
     function sanitize_node(node, policy)
     function sanitize_node(node, data)
+    {
         if (node.tagName === "SCRIPT")
     	sanitize_script(node, policy);
     	sanitize_script(node, data);
         if (node.tagName === "HEAD")
     	inject_csp(node, policy);
     	inject_csp(node, data);
         if (node.tagName === "META")
     	sanitize_http_equiv_csp_rule(node, policy);
     	sanitize_http_equiv_csp_rule(node, data);
         if (!data.policy.allow)
     	sanitize_attributes(node, data);
+    }
         if (!policy.allow)
     	sanitize_attributes(node, policy);
     /*
      * Instead of calling writer directly with multiple small chunks of reconstruced
      * HTML code, we utilize `setTimeout()' to only have it called once,
      * asynchronously.
      */
     function do_write_callback(data)
+    {
         data.writer(data.chunks.join(""));
         data.chunks = [];
         if (data.finished && data.finisher)
     	data.finisher();
+    }
     function do_write(chunk, data)
+    {
         data.chunks.push(chunk);
         clearTimeout(data.write_timeout);
         data.write_timeout = setTimeout(() => do_write_callback(data), 0);
+    }
     const serializer = new XMLSerializer();
     function start_node(node, data)
     function start_serializing_node(node, data)
+    {
         node.hachette_started = true;
         if (!data.writer)
     	return;
         node.hachette_started = true;
         const clone = node.cloneNode(false);
         clone.textContent = data.uniq;
         data.writer(data.uniq_reg.exec(clone.outerHTML)[1]);
         do_write(data.uniq_reg.exec(clone.outerHTML)[1], data);
+    }
     function finish_node(node, data)
     function finish_serializing_node(node, data)
+    {
         const nodes_to_process = [node];
-...
         while (nodes_to_process.length > 0) {
     	const node = nodes_to_process.pop();
     	node.remove();
     	node.hachette_ignore = true;
     	if (!data.writer)
     	    continue;
     	if (node.hachette_started) {
     	    node.textContent = data.uniq;
     	    data.writer(data.uniq_reg.exec(node.outerHTML)[2]);
     	    do_write(data.uniq_reg.exec(node.outerHTML)[2], data);
     	    continue;
+    	}
     	do_write(node.outerHTML || serializer.serializeToString(node), data);
+        }
+    }
     function process_initial_nodes(node, data)
+    {
         if (data.processed_initial_nodes)
     	return;
         data.processed_initial_nodes = true;
         start_serializing_node(data.html_root, data);
         const new_added = [];
         const nodes_to_process = [data.html_root];
         let i = 0;
         while (nodes_to_process.length > 0) {
     	let current = nodes_to_process.shift();
     	if (current.firstChild) {
     	    if (current.firstChild === node)
     		break;
     	    nodes_to_process.unshift(current.firstChild, current);
     	    new_added.push([current.firstChild, current]);
     	    continue;
+    	}
     	data.writer(node.outerHTML || serializer.serializeToString(node));
     	while (current && !current.nextSibling)
     	    current = nodes_to_process.shift();
     	if (!current || current.nextSibling === node)
     	    break;
     	nodes_to_process.unshift(current.nextSibling);
     	new_added.push([current.nextSibling, nodes_to_process[1]]);
+        }
         data.new_added.unshift(...new_added);
+    }
     /*
      * Important! Due to some weirdness node.parentElement is not alway correct
      * under Chromium. Track node relations manually.
      * in MutationRecords under Chromium. Track node relations manually.
      */
     function handle_added_node(node, true_parent, data)
+    {
         if (node.hachette_ignore || true_parent.hachette_ignore)
     	return;
         /*
          * Functions we call here might cause new nodes to be injected or found
          * that require processing before the one we got in function argument.
          * We rely on those functions putting the node(s) they create/find at the
          * very beginning of the `new_added' queue and (for created nodes) setting
          * their `hachette_ignore' property, based on which their MutationRecord
          * will not be processed. A function can also mark a node already in the
          * `new_added' queue as not eligible for processing by setting its
          * `hachette_deleted' property.
          */
         if (!true_parent.hachette_started)
     	start_node(true_parent, data)
         process_initial_nodes(node, data);
         sanitize_node(node, data.policy);
         data.new_added.push([node, true_parent]);
         if (data.node_eater)
     	data.node_eater(node, true_parent);
         while (data.new_added.length > 0) {
     	[node, true_parent] = data.new_added.shift();
         finish_node(true_parent.hachette_last_added, data);
     	if (true_parent.hachette_deleted)
     	    node.hachette_deleted = true;
     	if (node.hachette_deleted)
     	    continue;
     	if (!true_parent.hachette_started)
     	    start_serializing_node(true_parent, data)
     	if (!node.hachette_ignore)
     	    sanitize_node(node, data);
     	if (node.hachette_deleted)
     	    continue;
     	if (data.node_eater)
     	    data.node_eater(node, true_parent);
         true_parent.hachette_last_added = node;
     	finish_serializing_node(true_parent.hachette_last_added, data);
     	true_parent.hachette_last_added = node;
+        }
+    }
     function handle_mutation(mutations, data)
-...
          * node.parentElement. The former is the correct one.
          */
         for (const mutation of mutations) {
     	for (const node of mutation.addedNodes)
     	for (const node of mutation.addedNodes) {
     	    /* Check for nodes added by ourselves. */
     	    if (mutation.target.hachette_ignore)
     		node.hachette_ignore = true;
     	    if (node.hachette_ignore)
     		continue;
     	    handle_added_node(node, mutation.target, data);
+    	}
+        }
+    }
     function finish_processing(data)
+    {
         process_initial_nodes(undefined, data);
         /*
          * The `finisher' callback should be called, if provided. Normally our
          * function that performs the last write does it after seeing `finished'
          * set to `true'. If, however, there's no `writer' callback and hence no
          * writes to perform, we need to take care of calling `finisher' here.
          */
         data.finished = true;
         handle_mutation(data.observer.takeRecords(), data);
         finish_node(data.html_element, data);
         data.observer.disconnect();
         /*
          * Additional whitespace that was after `</body>' gets appended to body.
          * Although it's a minor issue, it is not what we want. There's no way to
          * tell exactly what part of that whitespace was after `</body>' and what
          * was before, so we just replace it with a single newline which looks good
          * when printed.
          */
         const body = data.html_root.lastChild;
         const text = body && body.tagName === "BODY" && body.lastChild;
         if (text && text.nodeName === "#text") {
     	const new_content = /^([\S\s]*\S)?\s*$/.exec(text.textContent)[1] || "";
     	text.textContent = new_content + "\n";
+        }
         finish_serializing_node(data.html_root, data);
         if (!data.writer && data.finisher)
     	setTimeout(data.finisher, 0);
+    }
     function modify_on_the_fly(html_element, policy, consumers)
     /*
      * This function sanitizes `html_root' according to `policy'. It is capable of
      * working on an HTML document that is being written to, sanitizing new nodes
      * as they appear.
+     *
      * `consumers' object may contain 3 optional callback functions: `writer',
      * `node_eater' and `finisher'. The first one, if present, is called with chunks
      * of reconstructed HTML code. The second one, if present, gets called for every
      * added node with 2 arguments: that node and its parent. The third one is
      * called at the end, after all processing has been done.
+     *
      * `modify_on_the_fly()' returns a callback that should be called (with no
      * arguments) once the document of html_root has finished being written to.
      * Unfortunately, due to specifics behavior of document that has had its
      * documentElement replaced
      */
     function modify_on_the_fly(html_root, policy, consumers)
+    {
         const uniq = gen_nonce();
         const uniq_reg = new RegExp(`^(.*)${uniq}(.*)$`);
         const data = {policy, html_element, uniq, uniq_reg, ...consumers};
         start_node(data.html_element, data);
         const uniq_reg = new RegExp(`^([\\s\\S]*)${uniq}([\\s\\S]*)$`);
         const data = {policy, html_root, uniq, uniq_reg, chunks: [], new_added: []};
         Object.assign(data, consumers);
         var observer = new MutationObserver(m => handle_mutation(m, data));
         observer.observe(data.html_element, {
         observer.observe(data.html_root, {
          	attributes: true,
     	childList: true,
     	subtree: true

 jahoti <jahoti@tilde.team>
     License: GPL-3+-javascript or Alicense-1.0
     Files: background/stream_filter.js
     Copyright: 2018 Giorgio Maone <giorgio@maone.net>
 Wojtek Kosior <koszko@koszko.org>
     License: GPL-3+-javascript or Alicense-1.0, and GPL-3+
     Comment: Code by Wojtek is dual-licensed under GPL-3+-javascript and
      Alicense-1.0. Giorgio's code is under GPL-3+.
     Files: *.html README.txt copyright
     Copyright: 2021 Wojtek Kosior <koszko@koszko.org>
     License: GPL-3+ or Alicense-1.0 or CC-BY-SA-4.0

Also available in: Unified diff

Project

General

Profile

Haketilo

Revision 6b53d6c8

Added by koszko about 2 years ago