Revision 6b53d6c8
Added by koszko about 2 years ago
| background/main.js | ||
|---|---|---|
| 11 | 11 |
* IMPORT get_storage |
| 12 | 12 |
* IMPORT start_storage_server |
| 13 | 13 |
* IMPORT start_page_actions_server |
| 14 |
* IMPORT start_policy_injector |
|
| 15 | 14 |
* IMPORT browser |
| 15 |
* IMPORT is_privileged_url |
|
| 16 |
* IMPORT query_best |
|
| 17 |
* IMPORT gen_nonce |
|
| 18 |
* IMPORT inject_csp_headers |
|
| 19 |
* IMPORT apply_stream_filter |
|
| 20 |
* IMPORT is_chrome |
|
| 16 | 21 |
* IMPORTS_END |
| 17 | 22 |
*/ |
| 18 | 23 |
|
| 19 | 24 |
start_storage_server(); |
| 20 | 25 |
start_page_actions_server(); |
| 21 |
start_policy_injector(); |
|
| 22 | 26 |
|
| 23 | 27 |
async function init_ext(install_details) |
| 24 | 28 |
{
|
| 25 |
console.log("details:", install_details);
|
|
| 26 | 29 |
if (install_details.reason != "install") |
| 27 | 30 |
return; |
| 28 | 31 |
|
| ... | ... | |
| 44 | 47 |
|
| 45 | 48 |
browser.runtime.onInstalled.addListener(init_ext); |
| 46 | 49 |
|
| 47 |
console.log("hello, hachette");
|
|
| 50 |
|
|
| 51 |
let storage; |
|
| 52 |
|
|
| 53 |
function on_headers_received(details) |
|
| 54 |
{
|
|
| 55 |
const url = details.url; |
|
| 56 |
if (is_privileged_url(details.url)) |
|
| 57 |
return; |
|
| 58 |
|
|
| 59 |
const [pattern, settings] = query_best(storage, details.url); |
|
| 60 |
const allow = !!(settings && settings.allow); |
|
| 61 |
const nonce = gen_nonce(); |
|
| 62 |
const policy = {allow, url, nonce};
|
|
| 63 |
|
|
| 64 |
let headers = details.responseHeaders; |
|
| 65 |
let skip = false; |
|
| 66 |
for (const header of headers) {
|
|
| 67 |
if ((header.name.toLowerCase().trim() === "content-disposition" && |
|
| 68 |
/^\s*attachment\s*(;.*)$/i.test(header.value))) |
|
| 69 |
skip = true; |
|
| 70 |
} |
|
| 71 |
|
|
| 72 |
headers = inject_csp_headers(details, headers, policy); |
|
| 73 |
|
|
| 74 |
skip = skip || (details.statusCode >= 300 && details.statusCode < 400); |
|
| 75 |
if (!skip) {
|
|
| 76 |
/* Check for API availability. */ |
|
| 77 |
if (browser.webRequest.filterResponseData) |
|
| 78 |
headers = apply_stream_filter(details, headers, policy); |
|
| 79 |
} |
|
| 80 |
|
|
| 81 |
return {responseHeaders: headers};
|
|
| 82 |
} |
|
| 83 |
|
|
| 84 |
async function start_webRequest_operations() |
|
| 85 |
{
|
|
| 86 |
storage = await get_storage(); |
|
| 87 |
|
|
| 88 |
const extra_opts = ["blocking", "responseHeaders"]; |
|
| 89 |
if (is_chrome) |
|
| 90 |
extra_opts.push("extraHeaders");
|
|
| 91 |
|
|
| 92 |
browser.webRequest.onHeadersReceived.addListener( |
|
| 93 |
on_headers_received, |
|
| 94 |
{urls: ["<all_urls>"], types: ["main_frame", "sub_frame"]},
|
|
| 95 |
extra_opts |
|
| 96 |
); |
|
| 97 |
} |
|
| 98 |
|
|
| 99 |
start_webRequest_operations(); |
|
| background/policy_injector.js | ||
|---|---|---|
| 8 | 8 |
|
| 9 | 9 |
/* |
| 10 | 10 |
* IMPORTS_START |
| 11 |
* IMPORT get_storage |
|
| 12 |
* IMPORT browser |
|
| 13 |
* IMPORT is_chrome |
|
| 14 |
* IMPORT gen_nonce |
|
| 15 |
* IMPORT is_privileged_url |
|
| 16 | 11 |
* IMPORT sign_data |
| 17 | 12 |
* IMPORT extract_signed |
| 18 |
* IMPORT query_best |
|
| 19 | 13 |
* IMPORT sanitize_csp_header |
| 20 | 14 |
* IMPORT csp_rule |
| 21 | 15 |
* IMPORT is_csp_header_name |
| 22 | 16 |
* IMPORTS_END |
| 23 | 17 |
*/ |
| 24 | 18 |
|
| 25 |
var storage; |
|
| 26 |
|
|
| 27 |
function headers_inject(details) |
|
| 19 |
function inject_csp_headers(details, headers, policy) |
|
| 28 | 20 |
{
|
| 29 | 21 |
const url = details.url; |
| 30 |
if (is_privileged_url(url)) |
|
| 31 |
return; |
|
| 32 |
|
|
| 33 |
const [pattern, settings] = query_best(storage, url); |
|
| 34 |
const allow = !!(settings && settings.allow); |
|
| 35 |
const nonce = gen_nonce(); |
|
| 36 | 22 |
|
| 37 | 23 |
let orig_csp_headers; |
| 38 | 24 |
let old_signature; |
| 39 | 25 |
let hachette_header; |
| 40 |
let headers = details.responseHeaders; |
|
| 41 | 26 |
|
| 42 | 27 |
for (const header of headers.filter(h => h.name === "x-hachette")) {
|
| 43 | 28 |
const match = /^([^%])(%.*)$/.exec(header.value); |
| ... | ... | |
| 50 | 35 |
|
| 51 | 36 |
/* Confirmed- it's the originals, smuggled in! */ |
| 52 | 37 |
orig_csp_headers = old_data.csp_headers; |
| 53 |
old_signature = old_data.policy_signature;
|
|
| 38 |
old_signature = old_data.policy_sig; |
|
| 54 | 39 |
|
| 55 | 40 |
hachette_header = header; |
| 56 | 41 |
break; |
| ... | ... | |
| 65 | 50 |
headers.filter(h => is_csp_header_name(h.name)); |
| 66 | 51 |
|
| 67 | 52 |
/* When blocking remove report-only CSP headers that snitch on us. */ |
| 68 |
headers = headers.filter(h => !is_csp_header_name(h.name, !allow)); |
|
| 53 |
headers = headers.filter(h => !is_csp_header_name(h.name, !policy.allow));
|
|
| 69 | 54 |
|
| 70 | 55 |
if (old_signature) |
| 71 | 56 |
headers = headers.filter(h => h.name.search(old_signature) === -1); |
| 72 | 57 |
|
| 73 |
const policy_object = {allow, nonce, url};
|
|
| 74 |
const sanitizer = h => sanitize_csp_header(h, policy_object); |
|
| 58 |
const sanitizer = h => sanitize_csp_header(h, policy); |
|
| 75 | 59 |
headers.push(...orig_csp_headers.map(sanitizer)); |
| 76 | 60 |
|
| 77 |
const policy = encodeURIComponent(JSON.stringify(policy_object));
|
|
| 78 |
const policy_signature = sign_data(policy, new Date());
|
|
| 61 |
const policy_str = encodeURIComponent(JSON.stringify(policy));
|
|
| 62 |
const policy_sig = sign_data(policy_str, new Date());
|
|
| 79 | 63 |
const later_30sec = new Date(new Date().getTime() + 30000).toGMTString(); |
| 80 | 64 |
headers.push({
|
| 81 | 65 |
name: "Set-Cookie", |
| 82 |
value: `hachette-${policy_signature}=${policy}; Expires=${later_30sec};`
|
|
| 66 |
value: `hachette-${policy_sig}=${policy_str}; Expires=${later_30sec};`
|
|
| 83 | 67 |
}); |
| 84 | 68 |
|
| 85 | 69 |
/* |
| ... | ... | |
| 87 | 71 |
* These are signed with a time of 0, as it's not clear there is a limit on |
| 88 | 72 |
* how long Firefox might retain headers in the cache. |
| 89 | 73 |
*/ |
| 90 |
let hachette_data = {csp_headers: orig_csp_headers, policy_signature, url};
|
|
| 74 |
let hachette_data = {csp_headers: orig_csp_headers, policy_sig, url};
|
|
| 91 | 75 |
hachette_data = encodeURIComponent(JSON.stringify(hachette_data)); |
| 92 | 76 |
hachette_header.value = sign_data(hachette_data, 0) + hachette_data; |
| 93 | 77 |
|
| 94 | 78 |
/* To ensure there is a CSP header if required */ |
| 95 |
if (!allow) |
|
| 96 |
headers.push({name: "content-security-policy", value: csp_rule(nonce)});
|
|
| 79 |
if (!policy.allow) |
|
| 80 |
headers.push({
|
|
| 81 |
name: "content-security-policy", |
|
| 82 |
value: csp_rule(policy.nonce) |
|
| 83 |
}); |
|
| 97 | 84 |
|
| 98 |
return {responseHeaders: headers};
|
|
| 99 |
} |
|
| 100 |
|
|
| 101 |
async function start_policy_injector() |
|
| 102 |
{
|
|
| 103 |
storage = await get_storage(); |
|
| 104 |
|
|
| 105 |
let extra_opts = ["blocking", "responseHeaders"]; |
|
| 106 |
if (is_chrome) |
|
| 107 |
extra_opts.push("extraHeaders");
|
|
| 108 |
|
|
| 109 |
browser.webRequest.onHeadersReceived.addListener( |
|
| 110 |
headers_inject, |
|
| 111 |
{
|
|
| 112 |
urls: ["<all_urls>"], |
|
| 113 |
types: ["main_frame", "sub_frame"] |
|
| 114 |
}, |
|
| 115 |
extra_opts |
|
| 116 |
); |
|
| 85 |
return headers; |
|
| 117 | 86 |
} |
| 118 | 87 |
|
| 119 | 88 |
/* |
| 120 | 89 |
* EXPORTS_START |
| 121 |
* EXPORT start_policy_injector
|
|
| 90 |
* EXPORT inject_csp_headers
|
|
| 122 | 91 |
* EXPORTS_END |
| 123 | 92 |
*/ |
| background/stream_filter.js | ||
|---|---|---|
| 1 |
/** |
|
| 2 |
* Hachette modifying a web page using the StreamFilter API |
|
| 3 |
* |
|
| 4 |
* Copyright (C) 2018 Giorgio Maone <giorgio@maone.net> |
|
| 5 |
* Copyright (C) 2021 Wojtek Kosior |
|
| 6 |
* Redistribution terms are gathered in the `copyright' file. |
|
| 7 |
* |
|
| 8 |
* Derived from `bg/ResponseProcessor.js' and `bg/ResponseMetaData.js' |
|
| 9 |
* in LibreJS. |
|
| 10 |
*/ |
|
| 11 |
|
|
| 12 |
/* |
|
| 13 |
* IMPORTS_START |
|
| 14 |
* IMPORT browser |
|
| 15 |
* IMPORTS_END |
|
| 16 |
*/ |
|
| 17 |
|
|
| 18 |
function validate_encoding(charset) |
|
| 19 |
{
|
|
| 20 |
try {
|
|
| 21 |
new TextDecoder(); |
|
| 22 |
return charset; |
|
| 23 |
} catch(e) {
|
|
| 24 |
return undefined; |
|
| 25 |
} |
|
| 26 |
} |
|
| 27 |
|
|
| 28 |
function is_content_type_header(header) |
|
| 29 |
{
|
|
| 30 |
header.name.toLowerCase().trim() === "content-type"; |
|
| 31 |
} |
|
| 32 |
|
|
| 33 |
const charset_reg = /;\s*charset\s*=\s*([\w-]+)/i; |
|
| 34 |
|
|
| 35 |
function properties_from_headers(headers) |
|
| 36 |
{
|
|
| 37 |
const properties = {};
|
|
| 38 |
|
|
| 39 |
for (const header of headers.filter(is_content_type_header)) {
|
|
| 40 |
const match = charset_reg.exec(header.value); |
|
| 41 |
if (!properties.detected_charset && validate_encoding(match[1])) |
|
| 42 |
properties.detected_charset = match[1]; |
|
| 43 |
|
|
| 44 |
if (/html/i.test(header.value)) |
|
| 45 |
properties.html = true; |
|
| 46 |
} |
|
| 47 |
|
|
| 48 |
return properties; |
|
| 49 |
} |
|
| 50 |
|
|
| 51 |
const UTF8_BOM = [0xef, 0xbb, 0xbf]; |
|
| 52 |
const BOMs = [ |
|
| 53 |
[UTF8_BOM, "utf-8"], |
|
| 54 |
[[0xfe, 0xff], "utf-16be"], |
|
| 55 |
[[0xff, 0xfe], "utf-16le"] |
|
| 56 |
]; |
|
| 57 |
|
|
| 58 |
function charset_from_BOM(data) |
|
| 59 |
{
|
|
| 60 |
for (const [BOM, charset] of BOMs) {
|
|
| 61 |
if (BOM.reduce((ac, byte, i) => ac && byte === data[i], true)) |
|
| 62 |
return charset; |
|
| 63 |
} |
|
| 64 |
|
|
| 65 |
return ""; |
|
| 66 |
} |
|
| 67 |
|
|
| 68 |
const charset_attrs = |
|
| 69 |
['charset', 'http-equiv="content-type"', 'content*="charset"']; |
|
| 70 |
const charset_meta_selector = |
|
| 71 |
charset_attrs.map(a => `head>meta[${a}]`).join(", ");
|
|
| 72 |
|
|
| 73 |
function charset_from_meta_tags(doc) |
|
| 74 |
{
|
|
| 75 |
for (const meta of doc.querySelectorAll(charset_meta_selector)) {
|
|
| 76 |
const maybe_charset = meta.getAttribute("charset");
|
|
| 77 |
if (maybe_charset && validate_encoding(maybe_charset)) |
|
| 78 |
return maybe_charset; |
|
| 79 |
|
|
| 80 |
const match = charset_reg.exec(meta.getAttribute("content"));
|
|
| 81 |
if (match && validate_encoding(match[1])) |
|
| 82 |
return match[1]; |
|
| 83 |
} |
|
| 84 |
|
|
| 85 |
return undefined; |
|
| 86 |
} |
|
| 87 |
|
|
| 88 |
function create_decoder(properties, data) |
|
| 89 |
{
|
|
| 90 |
let charset = charset_from_BOM(data) || properties.detected_charset; |
|
| 91 |
if (!charset && data.indexOf(0) !== -1) {
|
|
| 92 |
console.debug("Warning: zeroes in bytestream, probable cached encoding mismatch. Trying to decode it as UTF-16.",
|
|
| 93 |
properties); |
|
| 94 |
return new TextDecoder("utf-16be");
|
|
| 95 |
} |
|
| 96 |
|
|
| 97 |
/* Missing HTTP charset, sniffing in content... */ |
|
| 98 |
/* |
|
| 99 |
* TODO: I recall there is some standard saying how early in the doc the |
|
| 100 |
* charset has to be specified. We could process just this part of data. |
|
| 101 |
*/ |
|
| 102 |
const text = new TextDecoder("latin1").decode(data, {stream: true});
|
|
| 103 |
properties.html = properties.html || /html/i.test(text); |
|
| 104 |
|
|
| 105 |
if (properties.html) {
|
|
| 106 |
const tmp_doc = new DOMParser().parseFromString(text, "text/html"); |
|
| 107 |
charset = charset_from_meta_tags(tmp_doc); |
|
| 108 |
} |
|
| 109 |
|
|
| 110 |
return new TextDecoder(charset || "latin1"); |
|
| 111 |
} |
|
| 112 |
|
|
| 113 |
function filter_data(properties, event) |
|
| 114 |
{
|
|
| 115 |
const data = new Uint8Array(event.data); |
|
| 116 |
let first_chunk = false; |
|
| 117 |
if (!properties.decoder) {
|
|
| 118 |
first_chunk = true; |
|
| 119 |
properties.decoder = create_decoder(properties, data); |
|
| 120 |
properties.encoder = new TextEncoder(); |
|
| 121 |
/* Force UTF-8, this is the only encoding we can produce. */ |
|
| 122 |
properties.filter.write(new Uint8Array(UTF8_BOM)); |
|
| 123 |
} |
|
| 124 |
|
|
| 125 |
let decoded = properties.decoder.decode(data); |
|
| 126 |
|
|
| 127 |
if (first_chunk) {
|
|
| 128 |
/* |
|
| 129 |
* HAX! Our content scripts that execute at `document_start' will always |
|
| 130 |
* run before the first script in the document, but under Mozilla some |
|
| 131 |
* `<meta>' tags might already be loaded at that point. Here we inject a |
|
| 132 |
* dummy `<script>' at the beginning (before any `<meta>' tags) that |
|
| 133 |
* will force `document_start' to happen earlier. This way our content |
|
| 134 |
* scripts will be able to sanitize `http-equiv' tags with CSP rules |
|
| 135 |
* that would otherwise stop our injected scripts from executing. |
|
| 136 |
*/ |
|
| 137 |
const dummy_script = |
|
| 138 |
`<script data-hachette-deleteme="${properties.policy.nonce}" nonce="${properties.policy.nonce}">null</script>`;
|
|
| 139 |
const doctype_decl = /^(\s*<!doctype[^<>"']*>)?/i.exec(decoded)[0]; |
|
| 140 |
decoded = doctype_decl + dummy_script + |
|
| 141 |
decoded.substring(doctype_decl.length); |
|
| 142 |
} |
|
| 143 |
|
|
| 144 |
properties.filter.write(properties.encoder.encode(decoded)); |
|
| 145 |
|
|
| 146 |
if (properties.decoder.encoding === "utf-8") |
|
| 147 |
properties.filter.disconnect(); |
|
| 148 |
} |
|
| 149 |
|
|
| 150 |
function apply_stream_filter(details, headers, policy) |
|
| 151 |
{
|
|
| 152 |
if (policy.allow) |
|
| 153 |
return headers; |
|
| 154 |
|
|
| 155 |
const properties = properties_from_headers(headers); |
|
| 156 |
properties.policy = policy; |
|
| 157 |
|
|
| 158 |
properties.filter = |
|
| 159 |
browser.webRequest.filterResponseData(details.requestId); |
|
| 160 |
|
|
| 161 |
properties.filter.ondata = event => filter_data(properties, event); |
|
| 162 |
properties.filter.onstop = () => properties.filter.close(); |
|
| 163 |
|
|
| 164 |
/* |
|
| 165 |
* In the future we might consider modifying the headers that specify |
|
| 166 |
* encoding. For now we are not yet doing it, though. However, we |
|
| 167 |
* prepend the data with UTF-8 BOM which should be enough. |
|
| 168 |
*/ |
|
| 169 |
return headers; |
|
| 170 |
} |
|
| 171 |
|
|
| 172 |
/* |
|
| 173 |
* EXPORTS_START |
|
| 174 |
* EXPORT apply_stream_filter |
|
| 175 |
* EXPORTS_END |
|
| 176 |
*/ |
|
| content/main.js | ||
|---|---|---|
| 47 | 47 |
|
| 48 | 48 |
handle_page_actions(policy.nonce); |
| 49 | 49 |
|
| 50 |
if (!policy.allow && is_mozilla) |
|
| 51 |
addEventListener('beforescriptexecute', mozilla_suppress_scripts, true);
|
|
| 52 |
|
|
| 53 |
if (!policy.allow && is_chrome) {
|
|
| 50 |
if (!policy.allow) {
|
|
| 54 | 51 |
const old_html = document.documentElement; |
| 55 | 52 |
const new_html = document.createElement("html");
|
| 56 | 53 |
old_html.replaceWith(new_html); |
| content/sanitize_document.js | ||
|---|---|---|
| 43 | 43 |
node.removeAttribute(attr); |
| 44 | 44 |
} |
| 45 | 45 |
|
| 46 |
function sanitize_script(script, policy)
|
|
| 46 |
function sanitize_script(script, data)
|
|
| 47 | 47 |
{
|
| 48 |
if (policy.allow) |
|
| 48 |
if (script.getAttribute("data-hachette-deleteme") === data.policy.nonce) {
|
|
| 49 |
script.remove(); |
|
| 50 |
script.hachette_deleted = true; |
|
| 51 |
script.hachette_ignore = true; |
|
| 52 |
} |
|
| 53 |
|
|
| 54 |
if (data.policy.allow) |
|
| 49 | 55 |
return; |
| 50 | 56 |
|
| 51 | 57 |
block_attribute(script, "type"); |
| 52 | 58 |
script.setAttribute("type", "application/json");
|
| 53 | 59 |
} |
| 54 | 60 |
|
| 55 |
function inject_csp(head, policy)
|
|
| 61 |
function inject_csp(head, data)
|
|
| 56 | 62 |
{
|
| 57 |
if (policy.allow) |
|
| 63 |
if (data.policy.allow)
|
|
| 58 | 64 |
return; |
| 59 | 65 |
|
| 60 | 66 |
const meta = document.createElement("meta");
|
| 61 | 67 |
meta.setAttribute("http-equiv", "Content-Security-Policy");
|
| 62 |
meta.setAttribute("content", csp_rule(policy.nonce));
|
|
| 68 |
meta.setAttribute("content", csp_rule(data.policy.nonce));
|
|
| 63 | 69 |
meta.hachette_ignore = true; |
| 64 | 70 |
head.prepend(meta); |
| 71 |
|
|
| 72 |
data.new_added.unshift([meta, head]); |
|
| 65 | 73 |
} |
| 66 | 74 |
|
| 67 |
function sanitize_http_equiv_csp_rule(meta, policy)
|
|
| 75 |
function sanitize_http_equiv_csp_rule(meta, data)
|
|
| 68 | 76 |
{
|
| 69 | 77 |
const http_equiv = meta.getAttribute("http-equiv");
|
| 78 |
const value = meta.content; |
|
| 70 | 79 |
|
| 71 |
if (!is_csp_header_name(http_equiv, !policy.allow))
|
|
| 80 |
if (!value || !is_csp_header_name(http_equiv, !data.policy.allow))
|
|
| 72 | 81 |
return; |
| 73 | 82 |
|
| 74 |
if (policy.allow || is_csp_header_name(http_equiv, false)) {
|
|
| 75 |
let value = meta.getAttribute("content");
|
|
| 76 |
block_attribute(meta, "content"); |
|
| 77 |
if (value) {
|
|
| 78 |
value = sanitize_csp_header({value}, policy).value;
|
|
| 79 |
meta.setAttribute("content", value);
|
|
| 80 |
} |
|
| 81 |
return; |
|
| 82 |
} |
|
| 83 |
block_attribute(meta, "content"); |
|
| 83 | 84 |
|
| 84 |
block_attribute(meta, "http-equiv"); |
|
| 85 |
if (data.policy.allow || is_csp_header_name(http_equiv, false)) |
|
| 86 |
meta.content = sanitize_csp_header({value}, data.policy).value;
|
|
| 85 | 87 |
} |
| 86 | 88 |
|
| 87 |
function sanitize_node(node, policy)
|
|
| 89 |
function sanitize_node(node, data)
|
|
| 88 | 90 |
{
|
| 89 | 91 |
if (node.tagName === "SCRIPT") |
| 90 |
sanitize_script(node, policy);
|
|
| 92 |
sanitize_script(node, data);
|
|
| 91 | 93 |
|
| 92 | 94 |
if (node.tagName === "HEAD") |
| 93 |
inject_csp(node, policy);
|
|
| 95 |
inject_csp(node, data);
|
|
| 94 | 96 |
|
| 95 | 97 |
if (node.tagName === "META") |
| 96 |
sanitize_http_equiv_csp_rule(node, policy); |
|
| 98 |
sanitize_http_equiv_csp_rule(node, data); |
|
| 99 |
|
|
| 100 |
if (!data.policy.allow) |
|
| 101 |
sanitize_attributes(node, data); |
|
| 102 |
} |
|
| 97 | 103 |
|
| 98 |
if (!policy.allow) |
|
| 99 |
sanitize_attributes(node, policy); |
|
| 104 |
/* |
|
| 105 |
* Instead of calling writer directly with multiple small chunks of reconstruced |
|
| 106 |
* HTML code, we utilize `setTimeout()' to only have it called once, |
|
| 107 |
* asynchronously. |
|
| 108 |
*/ |
|
| 109 |
function do_write_callback(data) |
|
| 110 |
{
|
|
| 111 |
data.writer(data.chunks.join(""));
|
|
| 112 |
data.chunks = []; |
|
| 113 |
|
|
| 114 |
if (data.finished && data.finisher) |
|
| 115 |
data.finisher(); |
|
| 116 |
} |
|
| 117 |
|
|
| 118 |
function do_write(chunk, data) |
|
| 119 |
{
|
|
| 120 |
data.chunks.push(chunk); |
|
| 121 |
clearTimeout(data.write_timeout); |
|
| 122 |
data.write_timeout = setTimeout(() => do_write_callback(data), 0); |
|
| 100 | 123 |
} |
| 101 | 124 |
|
| 102 | 125 |
const serializer = new XMLSerializer(); |
| 103 | 126 |
|
| 104 |
function start_node(node, data) |
|
| 127 |
function start_serializing_node(node, data)
|
|
| 105 | 128 |
{
|
| 129 |
node.hachette_started = true; |
|
| 130 |
|
|
| 106 | 131 |
if (!data.writer) |
| 107 | 132 |
return; |
| 108 | 133 |
|
| 109 |
node.hachette_started = true; |
|
| 110 | 134 |
const clone = node.cloneNode(false); |
| 111 | 135 |
clone.textContent = data.uniq; |
| 112 |
data.writer(data.uniq_reg.exec(clone.outerHTML)[1]);
|
|
| 136 |
do_write(data.uniq_reg.exec(clone.outerHTML)[1], data);
|
|
| 113 | 137 |
} |
| 114 | 138 |
|
| 115 |
function finish_node(node, data) |
|
| 139 |
function finish_serializing_node(node, data)
|
|
| 116 | 140 |
{
|
| 117 | 141 |
const nodes_to_process = [node]; |
| 118 | 142 |
|
| ... | ... | |
| 127 | 151 |
while (nodes_to_process.length > 0) {
|
| 128 | 152 |
const node = nodes_to_process.pop(); |
| 129 | 153 |
node.remove(); |
| 154 |
node.hachette_ignore = true; |
|
| 130 | 155 |
|
| 131 | 156 |
if (!data.writer) |
| 132 | 157 |
continue; |
| 133 | 158 |
|
| 134 | 159 |
if (node.hachette_started) {
|
| 135 | 160 |
node.textContent = data.uniq; |
| 136 |
data.writer(data.uniq_reg.exec(node.outerHTML)[2]); |
|
| 161 |
do_write(data.uniq_reg.exec(node.outerHTML)[2], data); |
|
| 162 |
continue; |
|
| 163 |
} |
|
| 164 |
|
|
| 165 |
do_write(node.outerHTML || serializer.serializeToString(node), data); |
|
| 166 |
} |
|
| 167 |
} |
|
| 168 |
|
|
| 169 |
function process_initial_nodes(node, data) |
|
| 170 |
{
|
|
| 171 |
if (data.processed_initial_nodes) |
|
| 172 |
return; |
|
| 173 |
|
|
| 174 |
data.processed_initial_nodes = true; |
|
| 175 |
|
|
| 176 |
start_serializing_node(data.html_root, data); |
|
| 177 |
|
|
| 178 |
const new_added = []; |
|
| 179 |
const nodes_to_process = [data.html_root]; |
|
| 180 |
|
|
| 181 |
let i = 0; |
|
| 182 |
while (nodes_to_process.length > 0) {
|
|
| 183 |
let current = nodes_to_process.shift(); |
|
| 184 |
|
|
| 185 |
if (current.firstChild) {
|
|
| 186 |
if (current.firstChild === node) |
|
| 187 |
break; |
|
| 188 |
nodes_to_process.unshift(current.firstChild, current); |
|
| 189 |
new_added.push([current.firstChild, current]); |
|
| 137 | 190 |
continue; |
| 138 | 191 |
} |
| 139 | 192 |
|
| 140 |
data.writer(node.outerHTML || serializer.serializeToString(node)); |
|
| 193 |
while (current && !current.nextSibling) |
|
| 194 |
current = nodes_to_process.shift(); |
|
| 195 |
|
|
| 196 |
if (!current || current.nextSibling === node) |
|
| 197 |
break; |
|
| 198 |
|
|
| 199 |
nodes_to_process.unshift(current.nextSibling); |
|
| 200 |
new_added.push([current.nextSibling, nodes_to_process[1]]); |
|
| 141 | 201 |
} |
| 202 |
|
|
| 203 |
data.new_added.unshift(...new_added); |
|
| 142 | 204 |
} |
| 143 | 205 |
|
| 144 | 206 |
/* |
| 145 | 207 |
* Important! Due to some weirdness node.parentElement is not alway correct |
| 146 |
* under Chromium. Track node relations manually. |
|
| 208 |
* in MutationRecords under Chromium. Track node relations manually.
|
|
| 147 | 209 |
*/ |
| 148 | 210 |
function handle_added_node(node, true_parent, data) |
| 149 | 211 |
{
|
| 150 |
if (node.hachette_ignore || true_parent.hachette_ignore) |
|
| 151 |
return; |
|
| 212 |
/* |
|
| 213 |
* Functions we call here might cause new nodes to be injected or found |
|
| 214 |
* that require processing before the one we got in function argument. |
|
| 215 |
* We rely on those functions putting the node(s) they create/find at the |
|
| 216 |
* very beginning of the `new_added' queue and (for created nodes) setting |
|
| 217 |
* their `hachette_ignore' property, based on which their MutationRecord |
|
| 218 |
* will not be processed. A function can also mark a node already in the |
|
| 219 |
* `new_added' queue as not eligible for processing by setting its |
|
| 220 |
* `hachette_deleted' property. |
|
| 221 |
*/ |
|
| 152 | 222 |
|
| 153 |
if (!true_parent.hachette_started) |
|
| 154 |
start_node(true_parent, data) |
|
| 223 |
process_initial_nodes(node, data); |
|
| 155 | 224 |
|
| 156 |
sanitize_node(node, data.policy);
|
|
| 225 |
data.new_added.push([node, true_parent]);
|
|
| 157 | 226 |
|
| 158 |
if (data.node_eater)
|
|
| 159 |
data.node_eater(node, true_parent);
|
|
| 227 |
while (data.new_added.length > 0) {
|
|
| 228 |
[node, true_parent] = data.new_added.shift();
|
|
| 160 | 229 |
|
| 161 |
finish_node(true_parent.hachette_last_added, data); |
|
| 230 |
if (true_parent.hachette_deleted) |
|
| 231 |
node.hachette_deleted = true; |
|
| 232 |
if (node.hachette_deleted) |
|
| 233 |
continue; |
|
| 234 |
|
|
| 235 |
if (!true_parent.hachette_started) |
|
| 236 |
start_serializing_node(true_parent, data) |
|
| 237 |
|
|
| 238 |
if (!node.hachette_ignore) |
|
| 239 |
sanitize_node(node, data); |
|
| 240 |
|
|
| 241 |
if (node.hachette_deleted) |
|
| 242 |
continue; |
|
| 243 |
|
|
| 244 |
if (data.node_eater) |
|
| 245 |
data.node_eater(node, true_parent); |
|
| 162 | 246 |
|
| 163 |
true_parent.hachette_last_added = node; |
|
| 247 |
finish_serializing_node(true_parent.hachette_last_added, data); |
|
| 248 |
|
|
| 249 |
true_parent.hachette_last_added = node; |
|
| 250 |
} |
|
| 164 | 251 |
} |
| 165 | 252 |
|
| 166 | 253 |
function handle_mutation(mutations, data) |
| ... | ... | |
| 170 | 257 |
* node.parentElement. The former is the correct one. |
| 171 | 258 |
*/ |
| 172 | 259 |
for (const mutation of mutations) {
|
| 173 |
for (const node of mutation.addedNodes) |
|
| 260 |
for (const node of mutation.addedNodes) {
|
|
| 261 |
/* Check for nodes added by ourselves. */ |
|
| 262 |
if (mutation.target.hachette_ignore) |
|
| 263 |
node.hachette_ignore = true; |
|
| 264 |
if (node.hachette_ignore) |
|
| 265 |
continue; |
|
| 266 |
|
|
| 174 | 267 |
handle_added_node(node, mutation.target, data); |
| 268 |
} |
|
| 175 | 269 |
} |
| 176 | 270 |
} |
| 177 | 271 |
|
| 178 | 272 |
function finish_processing(data) |
| 179 | 273 |
{
|
| 274 |
process_initial_nodes(undefined, data); |
|
| 275 |
|
|
| 276 |
/* |
|
| 277 |
* The `finisher' callback should be called, if provided. Normally our |
|
| 278 |
* function that performs the last write does it after seeing `finished' |
|
| 279 |
* set to `true'. If, however, there's no `writer' callback and hence no |
|
| 280 |
* writes to perform, we need to take care of calling `finisher' here. |
|
| 281 |
*/ |
|
| 282 |
data.finished = true; |
|
| 180 | 283 |
handle_mutation(data.observer.takeRecords(), data); |
| 181 |
finish_node(data.html_element, data); |
|
| 182 | 284 |
data.observer.disconnect(); |
| 285 |
|
|
| 286 |
/* |
|
| 287 |
* Additional whitespace that was after `</body>' gets appended to body. |
|
| 288 |
* Although it's a minor issue, it is not what we want. There's no way to |
|
| 289 |
* tell exactly what part of that whitespace was after `</body>' and what |
|
| 290 |
* was before, so we just replace it with a single newline which looks good |
|
| 291 |
* when printed. |
|
| 292 |
*/ |
|
| 293 |
const body = data.html_root.lastChild; |
|
| 294 |
const text = body && body.tagName === "BODY" && body.lastChild; |
|
| 295 |
if (text && text.nodeName === "#text") {
|
|
| 296 |
const new_content = /^([\S\s]*\S)?\s*$/.exec(text.textContent)[1] || ""; |
|
| 297 |
text.textContent = new_content + "\n"; |
|
| 298 |
} |
|
| 299 |
|
|
| 300 |
finish_serializing_node(data.html_root, data); |
|
| 301 |
if (!data.writer && data.finisher) |
|
| 302 |
setTimeout(data.finisher, 0); |
|
| 183 | 303 |
} |
| 184 | 304 |
|
| 185 |
function modify_on_the_fly(html_element, policy, consumers) |
|
| 305 |
/* |
|
| 306 |
* This function sanitizes `html_root' according to `policy'. It is capable of |
|
| 307 |
* working on an HTML document that is being written to, sanitizing new nodes |
|
| 308 |
* as they appear. |
|
| 309 |
* |
|
| 310 |
* `consumers' object may contain 3 optional callback functions: `writer', |
|
| 311 |
* `node_eater' and `finisher'. The first one, if present, is called with chunks |
|
| 312 |
* of reconstructed HTML code. The second one, if present, gets called for every |
|
| 313 |
* added node with 2 arguments: that node and its parent. The third one is |
|
| 314 |
* called at the end, after all processing has been done. |
|
| 315 |
* |
|
| 316 |
* `modify_on_the_fly()' returns a callback that should be called (with no |
|
| 317 |
* arguments) once the document of html_root has finished being written to. |
|
| 318 |
* Unfortunately, due to specifics behavior of document that has had its |
|
| 319 |
* documentElement replaced |
|
| 320 |
*/ |
|
| 321 |
function modify_on_the_fly(html_root, policy, consumers) |
|
| 186 | 322 |
{
|
| 187 | 323 |
const uniq = gen_nonce(); |
| 188 |
const uniq_reg = new RegExp(`^(.*)${uniq}(.*)$`);
|
|
| 189 |
const data = {policy, html_element, uniq, uniq_reg, ...consumers};
|
|
| 190 |
|
|
| 191 |
start_node(data.html_element, data); |
|
| 324 |
const uniq_reg = new RegExp(`^([\\s\\S]*)${uniq}([\\s\\S]*)$`);
|
|
| 325 |
const data = {policy, html_root, uniq, uniq_reg, chunks: [], new_added: []};
|
|
| 326 |
Object.assign(data, consumers); |
|
| 192 | 327 |
|
| 193 | 328 |
var observer = new MutationObserver(m => handle_mutation(m, data)); |
| 194 |
observer.observe(data.html_element, {
|
|
| 329 |
observer.observe(data.html_root, {
|
|
| 195 | 330 |
attributes: true, |
| 196 | 331 |
childList: true, |
| 197 | 332 |
subtree: true |
| copyright | ||
|---|---|---|
| 20 | 20 |
2021 jahoti <jahoti@tilde.team> |
| 21 | 21 |
License: GPL-3+-javascript or Alicense-1.0 |
| 22 | 22 |
|
| 23 |
Files: background/stream_filter.js |
|
| 24 |
Copyright: 2018 Giorgio Maone <giorgio@maone.net> |
|
| 25 |
2021 Wojtek Kosior <koszko@koszko.org> |
|
| 26 |
License: GPL-3+-javascript or Alicense-1.0, and GPL-3+ |
|
| 27 |
Comment: Code by Wojtek is dual-licensed under GPL-3+-javascript and |
|
| 28 |
Alicense-1.0. Giorgio's code is under GPL-3+. |
|
| 29 |
|
|
| 23 | 30 |
Files: *.html README.txt copyright |
| 24 | 31 |
Copyright: 2021 Wojtek Kosior <koszko@koszko.org> |
| 25 | 32 |
License: GPL-3+ or Alicense-1.0 or CC-BY-SA-4.0 |
Also available in: Unified diff
use StreamFilter under Mozilla to prevent csp tags from blocking our injected scripts