Revision 6b53d6c8
Added by koszko about 2 years ago
background/main.js | ||
---|---|---|
11 | 11 |
* IMPORT get_storage |
12 | 12 |
* IMPORT start_storage_server |
13 | 13 |
* IMPORT start_page_actions_server |
14 |
* IMPORT start_policy_injector |
|
15 | 14 |
* IMPORT browser |
15 |
* IMPORT is_privileged_url |
|
16 |
* IMPORT query_best |
|
17 |
* IMPORT gen_nonce |
|
18 |
* IMPORT inject_csp_headers |
|
19 |
* IMPORT apply_stream_filter |
|
20 |
* IMPORT is_chrome |
|
16 | 21 |
* IMPORTS_END |
17 | 22 |
*/ |
18 | 23 |
|
19 | 24 |
start_storage_server(); |
20 | 25 |
start_page_actions_server(); |
21 |
start_policy_injector(); |
|
22 | 26 |
|
23 | 27 |
async function init_ext(install_details) |
24 | 28 |
{ |
25 |
console.log("details:", install_details); |
|
26 | 29 |
if (install_details.reason != "install") |
27 | 30 |
return; |
28 | 31 |
|
... | ... | |
44 | 47 |
|
45 | 48 |
browser.runtime.onInstalled.addListener(init_ext); |
46 | 49 |
|
47 |
console.log("hello, hachette"); |
|
50 |
|
|
51 |
let storage; |
|
52 |
|
|
53 |
function on_headers_received(details) |
|
54 |
{ |
|
55 |
const url = details.url; |
|
56 |
if (is_privileged_url(details.url)) |
|
57 |
return; |
|
58 |
|
|
59 |
const [pattern, settings] = query_best(storage, details.url); |
|
60 |
const allow = !!(settings && settings.allow); |
|
61 |
const nonce = gen_nonce(); |
|
62 |
const policy = {allow, url, nonce}; |
|
63 |
|
|
64 |
let headers = details.responseHeaders; |
|
65 |
let skip = false; |
|
66 |
for (const header of headers) { |
|
67 |
if ((header.name.toLowerCase().trim() === "content-disposition" && |
|
68 |
/^\s*attachment\s*(;.*)$/i.test(header.value))) |
|
69 |
skip = true; |
|
70 |
} |
|
71 |
|
|
72 |
headers = inject_csp_headers(details, headers, policy); |
|
73 |
|
|
74 |
skip = skip || (details.statusCode >= 300 && details.statusCode < 400); |
|
75 |
if (!skip) { |
|
76 |
/* Check for API availability. */ |
|
77 |
if (browser.webRequest.filterResponseData) |
|
78 |
headers = apply_stream_filter(details, headers, policy); |
|
79 |
} |
|
80 |
|
|
81 |
return {responseHeaders: headers}; |
|
82 |
} |
|
83 |
|
|
84 |
async function start_webRequest_operations() |
|
85 |
{ |
|
86 |
storage = await get_storage(); |
|
87 |
|
|
88 |
const extra_opts = ["blocking", "responseHeaders"]; |
|
89 |
if (is_chrome) |
|
90 |
extra_opts.push("extraHeaders"); |
|
91 |
|
|
92 |
browser.webRequest.onHeadersReceived.addListener( |
|
93 |
on_headers_received, |
|
94 |
{urls: ["<all_urls>"], types: ["main_frame", "sub_frame"]}, |
|
95 |
extra_opts |
|
96 |
); |
|
97 |
} |
|
98 |
|
|
99 |
start_webRequest_operations(); |
background/policy_injector.js | ||
---|---|---|
8 | 8 |
|
9 | 9 |
/* |
10 | 10 |
* IMPORTS_START |
11 |
* IMPORT get_storage |
|
12 |
* IMPORT browser |
|
13 |
* IMPORT is_chrome |
|
14 |
* IMPORT gen_nonce |
|
15 |
* IMPORT is_privileged_url |
|
16 | 11 |
* IMPORT sign_data |
17 | 12 |
* IMPORT extract_signed |
18 |
* IMPORT query_best |
|
19 | 13 |
* IMPORT sanitize_csp_header |
20 | 14 |
* IMPORT csp_rule |
21 | 15 |
* IMPORT is_csp_header_name |
22 | 16 |
* IMPORTS_END |
23 | 17 |
*/ |
24 | 18 |
|
25 |
var storage; |
|
26 |
|
|
27 |
function headers_inject(details) |
|
19 |
function inject_csp_headers(details, headers, policy) |
|
28 | 20 |
{ |
29 | 21 |
const url = details.url; |
30 |
if (is_privileged_url(url)) |
|
31 |
return; |
|
32 |
|
|
33 |
const [pattern, settings] = query_best(storage, url); |
|
34 |
const allow = !!(settings && settings.allow); |
|
35 |
const nonce = gen_nonce(); |
|
36 | 22 |
|
37 | 23 |
let orig_csp_headers; |
38 | 24 |
let old_signature; |
39 | 25 |
let hachette_header; |
40 |
let headers = details.responseHeaders; |
|
41 | 26 |
|
42 | 27 |
for (const header of headers.filter(h => h.name === "x-hachette")) { |
43 | 28 |
const match = /^([^%])(%.*)$/.exec(header.value); |
... | ... | |
50 | 35 |
|
51 | 36 |
/* Confirmed- it's the originals, smuggled in! */ |
52 | 37 |
orig_csp_headers = old_data.csp_headers; |
53 |
old_signature = old_data.policy_signature;
|
|
38 |
old_signature = old_data.policy_sig; |
|
54 | 39 |
|
55 | 40 |
hachette_header = header; |
56 | 41 |
break; |
... | ... | |
65 | 50 |
headers.filter(h => is_csp_header_name(h.name)); |
66 | 51 |
|
67 | 52 |
/* When blocking remove report-only CSP headers that snitch on us. */ |
68 |
headers = headers.filter(h => !is_csp_header_name(h.name, !allow)); |
|
53 |
headers = headers.filter(h => !is_csp_header_name(h.name, !policy.allow));
|
|
69 | 54 |
|
70 | 55 |
if (old_signature) |
71 | 56 |
headers = headers.filter(h => h.name.search(old_signature) === -1); |
72 | 57 |
|
73 |
const policy_object = {allow, nonce, url}; |
|
74 |
const sanitizer = h => sanitize_csp_header(h, policy_object); |
|
58 |
const sanitizer = h => sanitize_csp_header(h, policy); |
|
75 | 59 |
headers.push(...orig_csp_headers.map(sanitizer)); |
76 | 60 |
|
77 |
const policy = encodeURIComponent(JSON.stringify(policy_object));
|
|
78 |
const policy_signature = sign_data(policy, new Date());
|
|
61 |
const policy_str = encodeURIComponent(JSON.stringify(policy));
|
|
62 |
const policy_sig = sign_data(policy_str, new Date());
|
|
79 | 63 |
const later_30sec = new Date(new Date().getTime() + 30000).toGMTString(); |
80 | 64 |
headers.push({ |
81 | 65 |
name: "Set-Cookie", |
82 |
value: `hachette-${policy_signature}=${policy}; Expires=${later_30sec};`
|
|
66 |
value: `hachette-${policy_sig}=${policy_str}; Expires=${later_30sec};`
|
|
83 | 67 |
}); |
84 | 68 |
|
85 | 69 |
/* |
... | ... | |
87 | 71 |
* These are signed with a time of 0, as it's not clear there is a limit on |
88 | 72 |
* how long Firefox might retain headers in the cache. |
89 | 73 |
*/ |
90 |
let hachette_data = {csp_headers: orig_csp_headers, policy_signature, url};
|
|
74 |
let hachette_data = {csp_headers: orig_csp_headers, policy_sig, url}; |
|
91 | 75 |
hachette_data = encodeURIComponent(JSON.stringify(hachette_data)); |
92 | 76 |
hachette_header.value = sign_data(hachette_data, 0) + hachette_data; |
93 | 77 |
|
94 | 78 |
/* To ensure there is a CSP header if required */ |
95 |
if (!allow) |
|
96 |
headers.push({name: "content-security-policy", value: csp_rule(nonce)}); |
|
79 |
if (!policy.allow) |
|
80 |
headers.push({ |
|
81 |
name: "content-security-policy", |
|
82 |
value: csp_rule(policy.nonce) |
|
83 |
}); |
|
97 | 84 |
|
98 |
return {responseHeaders: headers}; |
|
99 |
} |
|
100 |
|
|
101 |
async function start_policy_injector() |
|
102 |
{ |
|
103 |
storage = await get_storage(); |
|
104 |
|
|
105 |
let extra_opts = ["blocking", "responseHeaders"]; |
|
106 |
if (is_chrome) |
|
107 |
extra_opts.push("extraHeaders"); |
|
108 |
|
|
109 |
browser.webRequest.onHeadersReceived.addListener( |
|
110 |
headers_inject, |
|
111 |
{ |
|
112 |
urls: ["<all_urls>"], |
|
113 |
types: ["main_frame", "sub_frame"] |
|
114 |
}, |
|
115 |
extra_opts |
|
116 |
); |
|
85 |
return headers; |
|
117 | 86 |
} |
118 | 87 |
|
119 | 88 |
/* |
120 | 89 |
* EXPORTS_START |
121 |
* EXPORT start_policy_injector
|
|
90 |
* EXPORT inject_csp_headers
|
|
122 | 91 |
* EXPORTS_END |
123 | 92 |
*/ |
background/stream_filter.js | ||
---|---|---|
1 |
/** |
|
2 |
* Hachette modifying a web page using the StreamFilter API |
|
3 |
* |
|
4 |
* Copyright (C) 2018 Giorgio Maone <giorgio@maone.net> |
|
5 |
* Copyright (C) 2021 Wojtek Kosior |
|
6 |
* Redistribution terms are gathered in the `copyright' file. |
|
7 |
* |
|
8 |
* Derived from `bg/ResponseProcessor.js' and `bg/ResponseMetaData.js' |
|
9 |
* in LibreJS. |
|
10 |
*/ |
|
11 |
|
|
12 |
/* |
|
13 |
* IMPORTS_START |
|
14 |
* IMPORT browser |
|
15 |
* IMPORTS_END |
|
16 |
*/ |
|
17 |
|
|
18 |
function validate_encoding(charset) |
|
19 |
{ |
|
20 |
try { |
|
21 |
new TextDecoder(); |
|
22 |
return charset; |
|
23 |
} catch(e) { |
|
24 |
return undefined; |
|
25 |
} |
|
26 |
} |
|
27 |
|
|
28 |
function is_content_type_header(header) |
|
29 |
{ |
|
30 |
header.name.toLowerCase().trim() === "content-type"; |
|
31 |
} |
|
32 |
|
|
33 |
const charset_reg = /;\s*charset\s*=\s*([\w-]+)/i; |
|
34 |
|
|
35 |
function properties_from_headers(headers) |
|
36 |
{ |
|
37 |
const properties = {}; |
|
38 |
|
|
39 |
for (const header of headers.filter(is_content_type_header)) { |
|
40 |
const match = charset_reg.exec(header.value); |
|
41 |
if (!properties.detected_charset && validate_encoding(match[1])) |
|
42 |
properties.detected_charset = match[1]; |
|
43 |
|
|
44 |
if (/html/i.test(header.value)) |
|
45 |
properties.html = true; |
|
46 |
} |
|
47 |
|
|
48 |
return properties; |
|
49 |
} |
|
50 |
|
|
51 |
const UTF8_BOM = [0xef, 0xbb, 0xbf]; |
|
52 |
const BOMs = [ |
|
53 |
[UTF8_BOM, "utf-8"], |
|
54 |
[[0xfe, 0xff], "utf-16be"], |
|
55 |
[[0xff, 0xfe], "utf-16le"] |
|
56 |
]; |
|
57 |
|
|
58 |
function charset_from_BOM(data) |
|
59 |
{ |
|
60 |
for (const [BOM, charset] of BOMs) { |
|
61 |
if (BOM.reduce((ac, byte, i) => ac && byte === data[i], true)) |
|
62 |
return charset; |
|
63 |
} |
|
64 |
|
|
65 |
return ""; |
|
66 |
} |
|
67 |
|
|
68 |
const charset_attrs = |
|
69 |
['charset', 'http-equiv="content-type"', 'content*="charset"']; |
|
70 |
const charset_meta_selector = |
|
71 |
charset_attrs.map(a => `head>meta[${a}]`).join(", "); |
|
72 |
|
|
73 |
function charset_from_meta_tags(doc) |
|
74 |
{ |
|
75 |
for (const meta of doc.querySelectorAll(charset_meta_selector)) { |
|
76 |
const maybe_charset = meta.getAttribute("charset"); |
|
77 |
if (maybe_charset && validate_encoding(maybe_charset)) |
|
78 |
return maybe_charset; |
|
79 |
|
|
80 |
const match = charset_reg.exec(meta.getAttribute("content")); |
|
81 |
if (match && validate_encoding(match[1])) |
|
82 |
return match[1]; |
|
83 |
} |
|
84 |
|
|
85 |
return undefined; |
|
86 |
} |
|
87 |
|
|
88 |
function create_decoder(properties, data) |
|
89 |
{ |
|
90 |
let charset = charset_from_BOM(data) || properties.detected_charset; |
|
91 |
if (!charset && data.indexOf(0) !== -1) { |
|
92 |
console.debug("Warning: zeroes in bytestream, probable cached encoding mismatch. Trying to decode it as UTF-16.", |
|
93 |
properties); |
|
94 |
return new TextDecoder("utf-16be"); |
|
95 |
} |
|
96 |
|
|
97 |
/* Missing HTTP charset, sniffing in content... */ |
|
98 |
/* |
|
99 |
* TODO: I recall there is some standard saying how early in the doc the |
|
100 |
* charset has to be specified. We could process just this part of data. |
|
101 |
*/ |
|
102 |
const text = new TextDecoder("latin1").decode(data, {stream: true}); |
|
103 |
properties.html = properties.html || /html/i.test(text); |
|
104 |
|
|
105 |
if (properties.html) { |
|
106 |
const tmp_doc = new DOMParser().parseFromString(text, "text/html"); |
|
107 |
charset = charset_from_meta_tags(tmp_doc); |
|
108 |
} |
|
109 |
|
|
110 |
return new TextDecoder(charset || "latin1"); |
|
111 |
} |
|
112 |
|
|
113 |
function filter_data(properties, event) |
|
114 |
{ |
|
115 |
const data = new Uint8Array(event.data); |
|
116 |
let first_chunk = false; |
|
117 |
if (!properties.decoder) { |
|
118 |
first_chunk = true; |
|
119 |
properties.decoder = create_decoder(properties, data); |
|
120 |
properties.encoder = new TextEncoder(); |
|
121 |
/* Force UTF-8, this is the only encoding we can produce. */ |
|
122 |
properties.filter.write(new Uint8Array(UTF8_BOM)); |
|
123 |
} |
|
124 |
|
|
125 |
let decoded = properties.decoder.decode(data); |
|
126 |
|
|
127 |
if (first_chunk) { |
|
128 |
/* |
|
129 |
* HAX! Our content scripts that execute at `document_start' will always |
|
130 |
* run before the first script in the document, but under Mozilla some |
|
131 |
* `<meta>' tags might already be loaded at that point. Here we inject a |
|
132 |
* dummy `<script>' at the beginning (before any `<meta>' tags) that |
|
133 |
* will force `document_start' to happen earlier. This way our content |
|
134 |
* scripts will be able to sanitize `http-equiv' tags with CSP rules |
|
135 |
* that would otherwise stop our injected scripts from executing. |
|
136 |
*/ |
|
137 |
const dummy_script = |
|
138 |
`<script data-hachette-deleteme="${properties.policy.nonce}" nonce="${properties.policy.nonce}">null</script>`; |
|
139 |
const doctype_decl = /^(\s*<!doctype[^<>"']*>)?/i.exec(decoded)[0]; |
|
140 |
decoded = doctype_decl + dummy_script + |
|
141 |
decoded.substring(doctype_decl.length); |
|
142 |
} |
|
143 |
|
|
144 |
properties.filter.write(properties.encoder.encode(decoded)); |
|
145 |
|
|
146 |
if (properties.decoder.encoding === "utf-8") |
|
147 |
properties.filter.disconnect(); |
|
148 |
} |
|
149 |
|
|
150 |
function apply_stream_filter(details, headers, policy) |
|
151 |
{ |
|
152 |
if (policy.allow) |
|
153 |
return headers; |
|
154 |
|
|
155 |
const properties = properties_from_headers(headers); |
|
156 |
properties.policy = policy; |
|
157 |
|
|
158 |
properties.filter = |
|
159 |
browser.webRequest.filterResponseData(details.requestId); |
|
160 |
|
|
161 |
properties.filter.ondata = event => filter_data(properties, event); |
|
162 |
properties.filter.onstop = () => properties.filter.close(); |
|
163 |
|
|
164 |
/* |
|
165 |
* In the future we might consider modifying the headers that specify |
|
166 |
* encoding. For now we are not yet doing it, though. However, we |
|
167 |
* prepend the data with UTF-8 BOM which should be enough. |
|
168 |
*/ |
|
169 |
return headers; |
|
170 |
} |
|
171 |
|
|
172 |
/* |
|
173 |
* EXPORTS_START |
|
174 |
* EXPORT apply_stream_filter |
|
175 |
* EXPORTS_END |
|
176 |
*/ |
content/main.js | ||
---|---|---|
47 | 47 |
|
48 | 48 |
handle_page_actions(policy.nonce); |
49 | 49 |
|
50 |
if (!policy.allow && is_mozilla) |
|
51 |
addEventListener('beforescriptexecute', mozilla_suppress_scripts, true); |
|
52 |
|
|
53 |
if (!policy.allow && is_chrome) { |
|
50 |
if (!policy.allow) { |
|
54 | 51 |
const old_html = document.documentElement; |
55 | 52 |
const new_html = document.createElement("html"); |
56 | 53 |
old_html.replaceWith(new_html); |
content/sanitize_document.js | ||
---|---|---|
43 | 43 |
node.removeAttribute(attr); |
44 | 44 |
} |
45 | 45 |
|
46 |
function sanitize_script(script, policy)
|
|
46 |
function sanitize_script(script, data)
|
|
47 | 47 |
{ |
48 |
if (policy.allow) |
|
48 |
if (script.getAttribute("data-hachette-deleteme") === data.policy.nonce) { |
|
49 |
script.remove(); |
|
50 |
script.hachette_deleted = true; |
|
51 |
script.hachette_ignore = true; |
|
52 |
} |
|
53 |
|
|
54 |
if (data.policy.allow) |
|
49 | 55 |
return; |
50 | 56 |
|
51 | 57 |
block_attribute(script, "type"); |
52 | 58 |
script.setAttribute("type", "application/json"); |
53 | 59 |
} |
54 | 60 |
|
55 |
function inject_csp(head, policy)
|
|
61 |
function inject_csp(head, data)
|
|
56 | 62 |
{ |
57 |
if (policy.allow) |
|
63 |
if (data.policy.allow)
|
|
58 | 64 |
return; |
59 | 65 |
|
60 | 66 |
const meta = document.createElement("meta"); |
61 | 67 |
meta.setAttribute("http-equiv", "Content-Security-Policy"); |
62 |
meta.setAttribute("content", csp_rule(policy.nonce)); |
|
68 |
meta.setAttribute("content", csp_rule(data.policy.nonce));
|
|
63 | 69 |
meta.hachette_ignore = true; |
64 | 70 |
head.prepend(meta); |
71 |
|
|
72 |
data.new_added.unshift([meta, head]); |
|
65 | 73 |
} |
66 | 74 |
|
67 |
function sanitize_http_equiv_csp_rule(meta, policy)
|
|
75 |
function sanitize_http_equiv_csp_rule(meta, data)
|
|
68 | 76 |
{ |
69 | 77 |
const http_equiv = meta.getAttribute("http-equiv"); |
78 |
const value = meta.content; |
|
70 | 79 |
|
71 |
if (!is_csp_header_name(http_equiv, !policy.allow))
|
|
80 |
if (!value || !is_csp_header_name(http_equiv, !data.policy.allow))
|
|
72 | 81 |
return; |
73 | 82 |
|
74 |
if (policy.allow || is_csp_header_name(http_equiv, false)) { |
|
75 |
let value = meta.getAttribute("content"); |
|
76 |
block_attribute(meta, "content"); |
|
77 |
if (value) { |
|
78 |
value = sanitize_csp_header({value}, policy).value; |
|
79 |
meta.setAttribute("content", value); |
|
80 |
} |
|
81 |
return; |
|
82 |
} |
|
83 |
block_attribute(meta, "content"); |
|
83 | 84 |
|
84 |
block_attribute(meta, "http-equiv"); |
|
85 |
if (data.policy.allow || is_csp_header_name(http_equiv, false)) |
|
86 |
meta.content = sanitize_csp_header({value}, data.policy).value; |
|
85 | 87 |
} |
86 | 88 |
|
87 |
function sanitize_node(node, policy)
|
|
89 |
function sanitize_node(node, data)
|
|
88 | 90 |
{ |
89 | 91 |
if (node.tagName === "SCRIPT") |
90 |
sanitize_script(node, policy);
|
|
92 |
sanitize_script(node, data);
|
|
91 | 93 |
|
92 | 94 |
if (node.tagName === "HEAD") |
93 |
inject_csp(node, policy);
|
|
95 |
inject_csp(node, data);
|
|
94 | 96 |
|
95 | 97 |
if (node.tagName === "META") |
96 |
sanitize_http_equiv_csp_rule(node, policy); |
|
98 |
sanitize_http_equiv_csp_rule(node, data); |
|
99 |
|
|
100 |
if (!data.policy.allow) |
|
101 |
sanitize_attributes(node, data); |
|
102 |
} |
|
97 | 103 |
|
98 |
if (!policy.allow) |
|
99 |
sanitize_attributes(node, policy); |
|
104 |
/* |
|
105 |
* Instead of calling writer directly with multiple small chunks of reconstruced |
|
106 |
* HTML code, we utilize `setTimeout()' to only have it called once, |
|
107 |
* asynchronously. |
|
108 |
*/ |
|
109 |
function do_write_callback(data) |
|
110 |
{ |
|
111 |
data.writer(data.chunks.join("")); |
|
112 |
data.chunks = []; |
|
113 |
|
|
114 |
if (data.finished && data.finisher) |
|
115 |
data.finisher(); |
|
116 |
} |
|
117 |
|
|
118 |
function do_write(chunk, data) |
|
119 |
{ |
|
120 |
data.chunks.push(chunk); |
|
121 |
clearTimeout(data.write_timeout); |
|
122 |
data.write_timeout = setTimeout(() => do_write_callback(data), 0); |
|
100 | 123 |
} |
101 | 124 |
|
102 | 125 |
const serializer = new XMLSerializer(); |
103 | 126 |
|
104 |
function start_node(node, data) |
|
127 |
function start_serializing_node(node, data)
|
|
105 | 128 |
{ |
129 |
node.hachette_started = true; |
|
130 |
|
|
106 | 131 |
if (!data.writer) |
107 | 132 |
return; |
108 | 133 |
|
109 |
node.hachette_started = true; |
|
110 | 134 |
const clone = node.cloneNode(false); |
111 | 135 |
clone.textContent = data.uniq; |
112 |
data.writer(data.uniq_reg.exec(clone.outerHTML)[1]);
|
|
136 |
do_write(data.uniq_reg.exec(clone.outerHTML)[1], data);
|
|
113 | 137 |
} |
114 | 138 |
|
115 |
function finish_node(node, data) |
|
139 |
function finish_serializing_node(node, data)
|
|
116 | 140 |
{ |
117 | 141 |
const nodes_to_process = [node]; |
118 | 142 |
|
... | ... | |
127 | 151 |
while (nodes_to_process.length > 0) { |
128 | 152 |
const node = nodes_to_process.pop(); |
129 | 153 |
node.remove(); |
154 |
node.hachette_ignore = true; |
|
130 | 155 |
|
131 | 156 |
if (!data.writer) |
132 | 157 |
continue; |
133 | 158 |
|
134 | 159 |
if (node.hachette_started) { |
135 | 160 |
node.textContent = data.uniq; |
136 |
data.writer(data.uniq_reg.exec(node.outerHTML)[2]); |
|
161 |
do_write(data.uniq_reg.exec(node.outerHTML)[2], data); |
|
162 |
continue; |
|
163 |
} |
|
164 |
|
|
165 |
do_write(node.outerHTML || serializer.serializeToString(node), data); |
|
166 |
} |
|
167 |
} |
|
168 |
|
|
169 |
function process_initial_nodes(node, data) |
|
170 |
{ |
|
171 |
if (data.processed_initial_nodes) |
|
172 |
return; |
|
173 |
|
|
174 |
data.processed_initial_nodes = true; |
|
175 |
|
|
176 |
start_serializing_node(data.html_root, data); |
|
177 |
|
|
178 |
const new_added = []; |
|
179 |
const nodes_to_process = [data.html_root]; |
|
180 |
|
|
181 |
let i = 0; |
|
182 |
while (nodes_to_process.length > 0) { |
|
183 |
let current = nodes_to_process.shift(); |
|
184 |
|
|
185 |
if (current.firstChild) { |
|
186 |
if (current.firstChild === node) |
|
187 |
break; |
|
188 |
nodes_to_process.unshift(current.firstChild, current); |
|
189 |
new_added.push([current.firstChild, current]); |
|
137 | 190 |
continue; |
138 | 191 |
} |
139 | 192 |
|
140 |
data.writer(node.outerHTML || serializer.serializeToString(node)); |
|
193 |
while (current && !current.nextSibling) |
|
194 |
current = nodes_to_process.shift(); |
|
195 |
|
|
196 |
if (!current || current.nextSibling === node) |
|
197 |
break; |
|
198 |
|
|
199 |
nodes_to_process.unshift(current.nextSibling); |
|
200 |
new_added.push([current.nextSibling, nodes_to_process[1]]); |
|
141 | 201 |
} |
202 |
|
|
203 |
data.new_added.unshift(...new_added); |
|
142 | 204 |
} |
143 | 205 |
|
144 | 206 |
/* |
145 | 207 |
* Important! Due to some weirdness node.parentElement is not alway correct |
146 |
* under Chromium. Track node relations manually. |
|
208 |
* in MutationRecords under Chromium. Track node relations manually.
|
|
147 | 209 |
*/ |
148 | 210 |
function handle_added_node(node, true_parent, data) |
149 | 211 |
{ |
150 |
if (node.hachette_ignore || true_parent.hachette_ignore) |
|
151 |
return; |
|
212 |
/* |
|
213 |
* Functions we call here might cause new nodes to be injected or found |
|
214 |
* that require processing before the one we got in function argument. |
|
215 |
* We rely on those functions putting the node(s) they create/find at the |
|
216 |
* very beginning of the `new_added' queue and (for created nodes) setting |
|
217 |
* their `hachette_ignore' property, based on which their MutationRecord |
|
218 |
* will not be processed. A function can also mark a node already in the |
|
219 |
* `new_added' queue as not eligible for processing by setting its |
|
220 |
* `hachette_deleted' property. |
|
221 |
*/ |
|
152 | 222 |
|
153 |
if (!true_parent.hachette_started) |
|
154 |
start_node(true_parent, data) |
|
223 |
process_initial_nodes(node, data); |
|
155 | 224 |
|
156 |
sanitize_node(node, data.policy);
|
|
225 |
data.new_added.push([node, true_parent]);
|
|
157 | 226 |
|
158 |
if (data.node_eater)
|
|
159 |
data.node_eater(node, true_parent);
|
|
227 |
while (data.new_added.length > 0) {
|
|
228 |
[node, true_parent] = data.new_added.shift();
|
|
160 | 229 |
|
161 |
finish_node(true_parent.hachette_last_added, data); |
|
230 |
if (true_parent.hachette_deleted) |
|
231 |
node.hachette_deleted = true; |
|
232 |
if (node.hachette_deleted) |
|
233 |
continue; |
|
234 |
|
|
235 |
if (!true_parent.hachette_started) |
|
236 |
start_serializing_node(true_parent, data) |
|
237 |
|
|
238 |
if (!node.hachette_ignore) |
|
239 |
sanitize_node(node, data); |
|
240 |
|
|
241 |
if (node.hachette_deleted) |
|
242 |
continue; |
|
243 |
|
|
244 |
if (data.node_eater) |
|
245 |
data.node_eater(node, true_parent); |
|
162 | 246 |
|
163 |
true_parent.hachette_last_added = node; |
|
247 |
finish_serializing_node(true_parent.hachette_last_added, data); |
|
248 |
|
|
249 |
true_parent.hachette_last_added = node; |
|
250 |
} |
|
164 | 251 |
} |
165 | 252 |
|
166 | 253 |
function handle_mutation(mutations, data) |
... | ... | |
170 | 257 |
* node.parentElement. The former is the correct one. |
171 | 258 |
*/ |
172 | 259 |
for (const mutation of mutations) { |
173 |
for (const node of mutation.addedNodes) |
|
260 |
for (const node of mutation.addedNodes) { |
|
261 |
/* Check for nodes added by ourselves. */ |
|
262 |
if (mutation.target.hachette_ignore) |
|
263 |
node.hachette_ignore = true; |
|
264 |
if (node.hachette_ignore) |
|
265 |
continue; |
|
266 |
|
|
174 | 267 |
handle_added_node(node, mutation.target, data); |
268 |
} |
|
175 | 269 |
} |
176 | 270 |
} |
177 | 271 |
|
178 | 272 |
function finish_processing(data) |
179 | 273 |
{ |
274 |
process_initial_nodes(undefined, data); |
|
275 |
|
|
276 |
/* |
|
277 |
* The `finisher' callback should be called, if provided. Normally our |
|
278 |
* function that performs the last write does it after seeing `finished' |
|
279 |
* set to `true'. If, however, there's no `writer' callback and hence no |
|
280 |
* writes to perform, we need to take care of calling `finisher' here. |
|
281 |
*/ |
|
282 |
data.finished = true; |
|
180 | 283 |
handle_mutation(data.observer.takeRecords(), data); |
181 |
finish_node(data.html_element, data); |
|
182 | 284 |
data.observer.disconnect(); |
285 |
|
|
286 |
/* |
|
287 |
* Additional whitespace that was after `</body>' gets appended to body. |
|
288 |
* Although it's a minor issue, it is not what we want. There's no way to |
|
289 |
* tell exactly what part of that whitespace was after `</body>' and what |
|
290 |
* was before, so we just replace it with a single newline which looks good |
|
291 |
* when printed. |
|
292 |
*/ |
|
293 |
const body = data.html_root.lastChild; |
|
294 |
const text = body && body.tagName === "BODY" && body.lastChild; |
|
295 |
if (text && text.nodeName === "#text") { |
|
296 |
const new_content = /^([\S\s]*\S)?\s*$/.exec(text.textContent)[1] || ""; |
|
297 |
text.textContent = new_content + "\n"; |
|
298 |
} |
|
299 |
|
|
300 |
finish_serializing_node(data.html_root, data); |
|
301 |
if (!data.writer && data.finisher) |
|
302 |
setTimeout(data.finisher, 0); |
|
183 | 303 |
} |
184 | 304 |
|
185 |
function modify_on_the_fly(html_element, policy, consumers) |
|
305 |
/* |
|
306 |
* This function sanitizes `html_root' according to `policy'. It is capable of |
|
307 |
* working on an HTML document that is being written to, sanitizing new nodes |
|
308 |
* as they appear. |
|
309 |
* |
|
310 |
* `consumers' object may contain 3 optional callback functions: `writer', |
|
311 |
* `node_eater' and `finisher'. The first one, if present, is called with chunks |
|
312 |
* of reconstructed HTML code. The second one, if present, gets called for every |
|
313 |
* added node with 2 arguments: that node and its parent. The third one is |
|
314 |
* called at the end, after all processing has been done. |
|
315 |
* |
|
316 |
* `modify_on_the_fly()' returns a callback that should be called (with no |
|
317 |
* arguments) once the document of html_root has finished being written to. |
|
318 |
* Unfortunately, due to specifics behavior of document that has had its |
|
319 |
* documentElement replaced |
|
320 |
*/ |
|
321 |
function modify_on_the_fly(html_root, policy, consumers) |
|
186 | 322 |
{ |
187 | 323 |
const uniq = gen_nonce(); |
188 |
const uniq_reg = new RegExp(`^(.*)${uniq}(.*)$`); |
|
189 |
const data = {policy, html_element, uniq, uniq_reg, ...consumers}; |
|
190 |
|
|
191 |
start_node(data.html_element, data); |
|
324 |
const uniq_reg = new RegExp(`^([\\s\\S]*)${uniq}([\\s\\S]*)$`); |
|
325 |
const data = {policy, html_root, uniq, uniq_reg, chunks: [], new_added: []}; |
|
326 |
Object.assign(data, consumers); |
|
192 | 327 |
|
193 | 328 |
var observer = new MutationObserver(m => handle_mutation(m, data)); |
194 |
observer.observe(data.html_element, {
|
|
329 |
observer.observe(data.html_root, {
|
|
195 | 330 |
attributes: true, |
196 | 331 |
childList: true, |
197 | 332 |
subtree: true |
copyright | ||
---|---|---|
20 | 20 |
2021 jahoti <jahoti@tilde.team> |
21 | 21 |
License: GPL-3+-javascript or Alicense-1.0 |
22 | 22 |
|
23 |
Files: background/stream_filter.js |
|
24 |
Copyright: 2018 Giorgio Maone <giorgio@maone.net> |
|
25 |
2021 Wojtek Kosior <koszko@koszko.org> |
|
26 |
License: GPL-3+-javascript or Alicense-1.0, and GPL-3+ |
|
27 |
Comment: Code by Wojtek is dual-licensed under GPL-3+-javascript and |
|
28 |
Alicense-1.0. Giorgio's code is under GPL-3+. |
|
29 |
|
|
23 | 30 |
Files: *.html README.txt copyright |
24 | 31 |
Copyright: 2021 Wojtek Kosior <koszko@koszko.org> |
25 | 32 |
License: GPL-3+ or Alicense-1.0 or CC-BY-SA-4.0 |
Also available in: Unified diff
use StreamFilter under Mozilla to prevent csp tags from blocking our injected scripts