Revision 6b53d6c8
Added by koszko about 2 years ago
content/sanitize_document.js | ||
---|---|---|
43 | 43 |
node.removeAttribute(attr); |
44 | 44 |
} |
45 | 45 |
|
46 |
function sanitize_script(script, policy)
|
|
46 |
function sanitize_script(script, data)
|
|
47 | 47 |
{ |
48 |
if (policy.allow) |
|
48 |
if (script.getAttribute("data-hachette-deleteme") === data.policy.nonce) { |
|
49 |
script.remove(); |
|
50 |
script.hachette_deleted = true; |
|
51 |
script.hachette_ignore = true; |
|
52 |
} |
|
53 |
|
|
54 |
if (data.policy.allow) |
|
49 | 55 |
return; |
50 | 56 |
|
51 | 57 |
block_attribute(script, "type"); |
52 | 58 |
script.setAttribute("type", "application/json"); |
53 | 59 |
} |
54 | 60 |
|
55 |
function inject_csp(head, policy)
|
|
61 |
function inject_csp(head, data)
|
|
56 | 62 |
{ |
57 |
if (policy.allow) |
|
63 |
if (data.policy.allow)
|
|
58 | 64 |
return; |
59 | 65 |
|
60 | 66 |
const meta = document.createElement("meta"); |
61 | 67 |
meta.setAttribute("http-equiv", "Content-Security-Policy"); |
62 |
meta.setAttribute("content", csp_rule(policy.nonce)); |
|
68 |
meta.setAttribute("content", csp_rule(data.policy.nonce));
|
|
63 | 69 |
meta.hachette_ignore = true; |
64 | 70 |
head.prepend(meta); |
71 |
|
|
72 |
data.new_added.unshift([meta, head]); |
|
65 | 73 |
} |
66 | 74 |
|
67 |
function sanitize_http_equiv_csp_rule(meta, policy)
|
|
75 |
function sanitize_http_equiv_csp_rule(meta, data)
|
|
68 | 76 |
{ |
69 | 77 |
const http_equiv = meta.getAttribute("http-equiv"); |
78 |
const value = meta.content; |
|
70 | 79 |
|
71 |
if (!is_csp_header_name(http_equiv, !policy.allow))
|
|
80 |
if (!value || !is_csp_header_name(http_equiv, !data.policy.allow))
|
|
72 | 81 |
return; |
73 | 82 |
|
74 |
if (policy.allow || is_csp_header_name(http_equiv, false)) { |
|
75 |
let value = meta.getAttribute("content"); |
|
76 |
block_attribute(meta, "content"); |
|
77 |
if (value) { |
|
78 |
value = sanitize_csp_header({value}, policy).value; |
|
79 |
meta.setAttribute("content", value); |
|
80 |
} |
|
81 |
return; |
|
82 |
} |
|
83 |
block_attribute(meta, "content"); |
|
83 | 84 |
|
84 |
block_attribute(meta, "http-equiv"); |
|
85 |
if (data.policy.allow || is_csp_header_name(http_equiv, false)) |
|
86 |
meta.content = sanitize_csp_header({value}, data.policy).value; |
|
85 | 87 |
} |
86 | 88 |
|
87 |
function sanitize_node(node, policy)
|
|
89 |
function sanitize_node(node, data)
|
|
88 | 90 |
{ |
89 | 91 |
if (node.tagName === "SCRIPT") |
90 |
sanitize_script(node, policy);
|
|
92 |
sanitize_script(node, data);
|
|
91 | 93 |
|
92 | 94 |
if (node.tagName === "HEAD") |
93 |
inject_csp(node, policy);
|
|
95 |
inject_csp(node, data);
|
|
94 | 96 |
|
95 | 97 |
if (node.tagName === "META") |
96 |
sanitize_http_equiv_csp_rule(node, policy); |
|
98 |
sanitize_http_equiv_csp_rule(node, data); |
|
99 |
|
|
100 |
if (!data.policy.allow) |
|
101 |
sanitize_attributes(node, data); |
|
102 |
} |
|
97 | 103 |
|
98 |
if (!policy.allow) |
|
99 |
sanitize_attributes(node, policy); |
|
104 |
/* |
|
105 |
* Instead of calling writer directly with multiple small chunks of reconstruced |
|
106 |
* HTML code, we utilize `setTimeout()' to only have it called once, |
|
107 |
* asynchronously. |
|
108 |
*/ |
|
109 |
function do_write_callback(data) |
|
110 |
{ |
|
111 |
data.writer(data.chunks.join("")); |
|
112 |
data.chunks = []; |
|
113 |
|
|
114 |
if (data.finished && data.finisher) |
|
115 |
data.finisher(); |
|
116 |
} |
|
117 |
|
|
118 |
function do_write(chunk, data) |
|
119 |
{ |
|
120 |
data.chunks.push(chunk); |
|
121 |
clearTimeout(data.write_timeout); |
|
122 |
data.write_timeout = setTimeout(() => do_write_callback(data), 0); |
|
100 | 123 |
} |
101 | 124 |
|
102 | 125 |
const serializer = new XMLSerializer(); |
103 | 126 |
|
104 |
function start_node(node, data) |
|
127 |
function start_serializing_node(node, data)
|
|
105 | 128 |
{ |
129 |
node.hachette_started = true; |
|
130 |
|
|
106 | 131 |
if (!data.writer) |
107 | 132 |
return; |
108 | 133 |
|
109 |
node.hachette_started = true; |
|
110 | 134 |
const clone = node.cloneNode(false); |
111 | 135 |
clone.textContent = data.uniq; |
112 |
data.writer(data.uniq_reg.exec(clone.outerHTML)[1]);
|
|
136 |
do_write(data.uniq_reg.exec(clone.outerHTML)[1], data);
|
|
113 | 137 |
} |
114 | 138 |
|
115 |
function finish_node(node, data) |
|
139 |
function finish_serializing_node(node, data)
|
|
116 | 140 |
{ |
117 | 141 |
const nodes_to_process = [node]; |
118 | 142 |
|
... | ... | |
127 | 151 |
while (nodes_to_process.length > 0) { |
128 | 152 |
const node = nodes_to_process.pop(); |
129 | 153 |
node.remove(); |
154 |
node.hachette_ignore = true; |
|
130 | 155 |
|
131 | 156 |
if (!data.writer) |
132 | 157 |
continue; |
133 | 158 |
|
134 | 159 |
if (node.hachette_started) { |
135 | 160 |
node.textContent = data.uniq; |
136 |
data.writer(data.uniq_reg.exec(node.outerHTML)[2]); |
|
161 |
do_write(data.uniq_reg.exec(node.outerHTML)[2], data); |
|
162 |
continue; |
|
163 |
} |
|
164 |
|
|
165 |
do_write(node.outerHTML || serializer.serializeToString(node), data); |
|
166 |
} |
|
167 |
} |
|
168 |
|
|
169 |
function process_initial_nodes(node, data) |
|
170 |
{ |
|
171 |
if (data.processed_initial_nodes) |
|
172 |
return; |
|
173 |
|
|
174 |
data.processed_initial_nodes = true; |
|
175 |
|
|
176 |
start_serializing_node(data.html_root, data); |
|
177 |
|
|
178 |
const new_added = []; |
|
179 |
const nodes_to_process = [data.html_root]; |
|
180 |
|
|
181 |
let i = 0; |
|
182 |
while (nodes_to_process.length > 0) { |
|
183 |
let current = nodes_to_process.shift(); |
|
184 |
|
|
185 |
if (current.firstChild) { |
|
186 |
if (current.firstChild === node) |
|
187 |
break; |
|
188 |
nodes_to_process.unshift(current.firstChild, current); |
|
189 |
new_added.push([current.firstChild, current]); |
|
137 | 190 |
continue; |
138 | 191 |
} |
139 | 192 |
|
140 |
data.writer(node.outerHTML || serializer.serializeToString(node)); |
|
193 |
while (current && !current.nextSibling) |
|
194 |
current = nodes_to_process.shift(); |
|
195 |
|
|
196 |
if (!current || current.nextSibling === node) |
|
197 |
break; |
|
198 |
|
|
199 |
nodes_to_process.unshift(current.nextSibling); |
|
200 |
new_added.push([current.nextSibling, nodes_to_process[1]]); |
|
141 | 201 |
} |
202 |
|
|
203 |
data.new_added.unshift(...new_added); |
|
142 | 204 |
} |
143 | 205 |
|
144 | 206 |
/* |
145 | 207 |
* Important! Due to some weirdness node.parentElement is not alway correct |
146 |
* under Chromium. Track node relations manually. |
|
208 |
* in MutationRecords under Chromium. Track node relations manually.
|
|
147 | 209 |
*/ |
148 | 210 |
function handle_added_node(node, true_parent, data) |
149 | 211 |
{ |
150 |
if (node.hachette_ignore || true_parent.hachette_ignore) |
|
151 |
return; |
|
212 |
/* |
|
213 |
* Functions we call here might cause new nodes to be injected or found |
|
214 |
* that require processing before the one we got in function argument. |
|
215 |
* We rely on those functions putting the node(s) they create/find at the |
|
216 |
* very beginning of the `new_added' queue and (for created nodes) setting |
|
217 |
* their `hachette_ignore' property, based on which their MutationRecord |
|
218 |
* will not be processed. A function can also mark a node already in the |
|
219 |
* `new_added' queue as not eligible for processing by setting its |
|
220 |
* `hachette_deleted' property. |
|
221 |
*/ |
|
152 | 222 |
|
153 |
if (!true_parent.hachette_started) |
|
154 |
start_node(true_parent, data) |
|
223 |
process_initial_nodes(node, data); |
|
155 | 224 |
|
156 |
sanitize_node(node, data.policy);
|
|
225 |
data.new_added.push([node, true_parent]);
|
|
157 | 226 |
|
158 |
if (data.node_eater)
|
|
159 |
data.node_eater(node, true_parent);
|
|
227 |
while (data.new_added.length > 0) {
|
|
228 |
[node, true_parent] = data.new_added.shift();
|
|
160 | 229 |
|
161 |
finish_node(true_parent.hachette_last_added, data); |
|
230 |
if (true_parent.hachette_deleted) |
|
231 |
node.hachette_deleted = true; |
|
232 |
if (node.hachette_deleted) |
|
233 |
continue; |
|
234 |
|
|
235 |
if (!true_parent.hachette_started) |
|
236 |
start_serializing_node(true_parent, data) |
|
237 |
|
|
238 |
if (!node.hachette_ignore) |
|
239 |
sanitize_node(node, data); |
|
240 |
|
|
241 |
if (node.hachette_deleted) |
|
242 |
continue; |
|
243 |
|
|
244 |
if (data.node_eater) |
|
245 |
data.node_eater(node, true_parent); |
|
162 | 246 |
|
163 |
true_parent.hachette_last_added = node; |
|
247 |
finish_serializing_node(true_parent.hachette_last_added, data); |
|
248 |
|
|
249 |
true_parent.hachette_last_added = node; |
|
250 |
} |
|
164 | 251 |
} |
165 | 252 |
|
166 | 253 |
function handle_mutation(mutations, data) |
... | ... | |
170 | 257 |
* node.parentElement. The former is the correct one. |
171 | 258 |
*/ |
172 | 259 |
for (const mutation of mutations) { |
173 |
for (const node of mutation.addedNodes) |
|
260 |
for (const node of mutation.addedNodes) { |
|
261 |
/* Check for nodes added by ourselves. */ |
|
262 |
if (mutation.target.hachette_ignore) |
|
263 |
node.hachette_ignore = true; |
|
264 |
if (node.hachette_ignore) |
|
265 |
continue; |
|
266 |
|
|
174 | 267 |
handle_added_node(node, mutation.target, data); |
268 |
} |
|
175 | 269 |
} |
176 | 270 |
} |
177 | 271 |
|
178 | 272 |
function finish_processing(data) |
179 | 273 |
{ |
274 |
process_initial_nodes(undefined, data); |
|
275 |
|
|
276 |
/* |
|
277 |
* The `finisher' callback should be called, if provided. Normally our |
|
278 |
* function that performs the last write does it after seeing `finished' |
|
279 |
* set to `true'. If, however, there's no `writer' callback and hence no |
|
280 |
* writes to perform, we need to take care of calling `finisher' here. |
|
281 |
*/ |
|
282 |
data.finished = true; |
|
180 | 283 |
handle_mutation(data.observer.takeRecords(), data); |
181 |
finish_node(data.html_element, data); |
|
182 | 284 |
data.observer.disconnect(); |
285 |
|
|
286 |
/* |
|
287 |
* Additional whitespace that was after `</body>' gets appended to body. |
|
288 |
* Although it's a minor issue, it is not what we want. There's no way to |
|
289 |
* tell exactly what part of that whitespace was after `</body>' and what |
|
290 |
* was before, so we just replace it with a single newline which looks good |
|
291 |
* when printed. |
|
292 |
*/ |
|
293 |
const body = data.html_root.lastChild; |
|
294 |
const text = body && body.tagName === "BODY" && body.lastChild; |
|
295 |
if (text && text.nodeName === "#text") { |
|
296 |
const new_content = /^([\S\s]*\S)?\s*$/.exec(text.textContent)[1] || ""; |
|
297 |
text.textContent = new_content + "\n"; |
|
298 |
} |
|
299 |
|
|
300 |
finish_serializing_node(data.html_root, data); |
|
301 |
if (!data.writer && data.finisher) |
|
302 |
setTimeout(data.finisher, 0); |
|
183 | 303 |
} |
184 | 304 |
|
185 |
function modify_on_the_fly(html_element, policy, consumers) |
|
305 |
/* |
|
306 |
* This function sanitizes `html_root' according to `policy'. It is capable of |
|
307 |
* working on an HTML document that is being written to, sanitizing new nodes |
|
308 |
* as they appear. |
|
309 |
* |
|
310 |
* `consumers' object may contain 3 optional callback functions: `writer', |
|
311 |
* `node_eater' and `finisher'. The first one, if present, is called with chunks |
|
312 |
* of reconstructed HTML code. The second one, if present, gets called for every |
|
313 |
* added node with 2 arguments: that node and its parent. The third one is |
|
314 |
* called at the end, after all processing has been done. |
|
315 |
* |
|
316 |
* `modify_on_the_fly()' returns a callback that should be called (with no |
|
317 |
* arguments) once the document of html_root has finished being written to. |
|
318 |
* Unfortunately, due to specifics behavior of document that has had its |
|
319 |
* documentElement replaced |
|
320 |
*/ |
|
321 |
function modify_on_the_fly(html_root, policy, consumers) |
|
186 | 322 |
{ |
187 | 323 |
const uniq = gen_nonce(); |
188 |
const uniq_reg = new RegExp(`^(.*)${uniq}(.*)$`); |
|
189 |
const data = {policy, html_element, uniq, uniq_reg, ...consumers}; |
|
190 |
|
|
191 |
start_node(data.html_element, data); |
|
324 |
const uniq_reg = new RegExp(`^([\\s\\S]*)${uniq}([\\s\\S]*)$`); |
|
325 |
const data = {policy, html_root, uniq, uniq_reg, chunks: [], new_added: []}; |
|
326 |
Object.assign(data, consumers); |
|
192 | 327 |
|
193 | 328 |
var observer = new MutationObserver(m => handle_mutation(m, data)); |
194 |
observer.observe(data.html_element, {
|
|
329 |
observer.observe(data.html_root, {
|
|
195 | 330 |
attributes: true, |
196 | 331 |
childList: true, |
197 | 332 |
subtree: true |
Also available in: Unified diff
use StreamFilter under Mozilla to prevent csp tags from blocking our injected scripts