Project

General

Profile

« Previous | Next » 

Revision 6b53d6c8

Added by koszko about 2 years ago

use StreamFilter under Mozilla to prevent csp tags from blocking our injected scripts

View differences:

content/sanitize_document.js
43 43
    node.removeAttribute(attr);
44 44
}
45 45

  
46
function sanitize_script(script, policy)
46
function sanitize_script(script, data)
47 47
{
48
    if (policy.allow)
48
    if (script.getAttribute("data-hachette-deleteme") === data.policy.nonce) {
49
	script.remove();
50
	script.hachette_deleted = true;
51
	script.hachette_ignore = true;
52
    }
53

  
54
    if (data.policy.allow)
49 55
	return;
50 56

  
51 57
    block_attribute(script, "type");
52 58
    script.setAttribute("type", "application/json");
53 59
}
54 60

  
55
function inject_csp(head, policy)
61
function inject_csp(head, data)
56 62
{
57
    if (policy.allow)
63
    if (data.policy.allow)
58 64
	return;
59 65

  
60 66
    const meta = document.createElement("meta");
61 67
    meta.setAttribute("http-equiv", "Content-Security-Policy");
62
    meta.setAttribute("content", csp_rule(policy.nonce));
68
    meta.setAttribute("content", csp_rule(data.policy.nonce));
63 69
    meta.hachette_ignore = true;
64 70
    head.prepend(meta);
71

  
72
    data.new_added.unshift([meta, head]);
65 73
}
66 74

  
67
function sanitize_http_equiv_csp_rule(meta, policy)
75
function sanitize_http_equiv_csp_rule(meta, data)
68 76
{
69 77
    const http_equiv = meta.getAttribute("http-equiv");
78
    const value = meta.content;
70 79

  
71
    if (!is_csp_header_name(http_equiv, !policy.allow))
80
    if (!value || !is_csp_header_name(http_equiv, !data.policy.allow))
72 81
	return;
73 82

  
74
    if (policy.allow || is_csp_header_name(http_equiv, false)) {
75
	let value = meta.getAttribute("content");
76
	block_attribute(meta, "content");
77
	if (value) {
78
	    value = sanitize_csp_header({value}, policy).value;
79
	    meta.setAttribute("content", value);
80
	}
81
	return;
82
    }
83
    block_attribute(meta, "content");
83 84

  
84
    block_attribute(meta, "http-equiv");
85
    if (data.policy.allow || is_csp_header_name(http_equiv, false))
86
	meta.content = sanitize_csp_header({value}, data.policy).value;
85 87
}
86 88

  
87
function sanitize_node(node, policy)
89
function sanitize_node(node, data)
88 90
{
89 91
    if (node.tagName === "SCRIPT")
90
	sanitize_script(node, policy);
92
	sanitize_script(node, data);
91 93

  
92 94
    if (node.tagName === "HEAD")
93
	inject_csp(node, policy);
95
	inject_csp(node, data);
94 96

  
95 97
    if (node.tagName === "META")
96
	sanitize_http_equiv_csp_rule(node, policy);
98
	sanitize_http_equiv_csp_rule(node, data);
99

  
100
    if (!data.policy.allow)
101
	sanitize_attributes(node, data);
102
}
97 103

  
98
    if (!policy.allow)
99
	sanitize_attributes(node, policy);
104
/*
105
 * Instead of calling writer directly with multiple small chunks of reconstruced
106
 * HTML code, we utilize `setTimeout()' to only have it called once,
107
 * asynchronously.
108
 */
109
function do_write_callback(data)
110
{
111
    data.writer(data.chunks.join(""));
112
    data.chunks = [];
113

  
114
    if (data.finished && data.finisher)
115
	data.finisher();
116
}
117

  
118
function do_write(chunk, data)
119
{
120
    data.chunks.push(chunk);
121
    clearTimeout(data.write_timeout);
122
    data.write_timeout = setTimeout(() => do_write_callback(data), 0);
100 123
}
101 124

  
102 125
const serializer = new XMLSerializer();
103 126

  
104
function start_node(node, data)
127
function start_serializing_node(node, data)
105 128
{
129
    node.hachette_started = true;
130

  
106 131
    if (!data.writer)
107 132
	return;
108 133

  
109
    node.hachette_started = true;
110 134
    const clone = node.cloneNode(false);
111 135
    clone.textContent = data.uniq;
112
    data.writer(data.uniq_reg.exec(clone.outerHTML)[1]);
136
    do_write(data.uniq_reg.exec(clone.outerHTML)[1], data);
113 137
}
114 138

  
115
function finish_node(node, data)
139
function finish_serializing_node(node, data)
116 140
{
117 141
    const nodes_to_process = [node];
118 142

  
......
127 151
    while (nodes_to_process.length > 0) {
128 152
	const node = nodes_to_process.pop();
129 153
	node.remove();
154
	node.hachette_ignore = true;
130 155

  
131 156
	if (!data.writer)
132 157
	    continue;
133 158

  
134 159
	if (node.hachette_started) {
135 160
	    node.textContent = data.uniq;
136
	    data.writer(data.uniq_reg.exec(node.outerHTML)[2]);
161
	    do_write(data.uniq_reg.exec(node.outerHTML)[2], data);
162
	    continue;
163
	}
164

  
165
	do_write(node.outerHTML || serializer.serializeToString(node), data);
166
    }
167
}
168

  
169
function process_initial_nodes(node, data)
170
{
171
    if (data.processed_initial_nodes)
172
	return;
173

  
174
    data.processed_initial_nodes = true;
175

  
176
    start_serializing_node(data.html_root, data);
177

  
178
    const new_added = [];
179
    const nodes_to_process = [data.html_root];
180

  
181
    let i = 0;
182
    while (nodes_to_process.length > 0) {
183
	let current = nodes_to_process.shift();
184

  
185
	if (current.firstChild) {
186
	    if (current.firstChild === node)
187
		break;
188
	    nodes_to_process.unshift(current.firstChild, current);
189
	    new_added.push([current.firstChild, current]);
137 190
	    continue;
138 191
	}
139 192

  
140
	data.writer(node.outerHTML || serializer.serializeToString(node));
193
	while (current && !current.nextSibling)
194
	    current = nodes_to_process.shift();
195

  
196
	if (!current || current.nextSibling === node)
197
	    break;
198

  
199
	nodes_to_process.unshift(current.nextSibling);
200
	new_added.push([current.nextSibling, nodes_to_process[1]]);
141 201
    }
202

  
203
    data.new_added.unshift(...new_added);
142 204
}
143 205

  
144 206
/*
145 207
 * Important! Due to some weirdness node.parentElement is not alway correct
146
 * under Chromium. Track node relations manually.
208
 * in MutationRecords under Chromium. Track node relations manually.
147 209
 */
148 210
function handle_added_node(node, true_parent, data)
149 211
{
150
    if (node.hachette_ignore || true_parent.hachette_ignore)
151
	return;
212
    /*
213
     * Functions we call here might cause new nodes to be injected or found
214
     * that require processing before the one we got in function argument.
215
     * We rely on those functions putting the node(s) they create/find at the
216
     * very beginning of the `new_added' queue and (for created nodes) setting
217
     * their `hachette_ignore' property, based on which their MutationRecord
218
     * will not be processed. A function can also mark a node already in the
219
     * `new_added' queue as not eligible for processing by setting its
220
     * `hachette_deleted' property.
221
     */
152 222

  
153
    if (!true_parent.hachette_started)
154
	start_node(true_parent, data)
223
    process_initial_nodes(node, data);
155 224

  
156
    sanitize_node(node, data.policy);
225
    data.new_added.push([node, true_parent]);
157 226

  
158
    if (data.node_eater)
159
	data.node_eater(node, true_parent);
227
    while (data.new_added.length > 0) {
228
	[node, true_parent] = data.new_added.shift();
160 229

  
161
    finish_node(true_parent.hachette_last_added, data);
230
	if (true_parent.hachette_deleted)
231
	    node.hachette_deleted = true;
232
	if (node.hachette_deleted)
233
	    continue;
234

  
235
	if (!true_parent.hachette_started)
236
	    start_serializing_node(true_parent, data)
237

  
238
	if (!node.hachette_ignore)
239
	    sanitize_node(node, data);
240

  
241
	if (node.hachette_deleted)
242
	    continue;
243

  
244
	if (data.node_eater)
245
	    data.node_eater(node, true_parent);
162 246

  
163
    true_parent.hachette_last_added = node;
247
	finish_serializing_node(true_parent.hachette_last_added, data);
248

  
249
	true_parent.hachette_last_added = node;
250
    }
164 251
}
165 252

  
166 253
function handle_mutation(mutations, data)
......
170 257
     * node.parentElement. The former is the correct one.
171 258
     */
172 259
    for (const mutation of mutations) {
173
	for (const node of mutation.addedNodes)
260
	for (const node of mutation.addedNodes) {
261
	    /* Check for nodes added by ourselves. */
262
	    if (mutation.target.hachette_ignore)
263
		node.hachette_ignore = true;
264
	    if (node.hachette_ignore)
265
		continue;
266

  
174 267
	    handle_added_node(node, mutation.target, data);
268
	}
175 269
    }
176 270
}
177 271

  
178 272
function finish_processing(data)
179 273
{
274
    process_initial_nodes(undefined, data);
275

  
276
    /*
277
     * The `finisher' callback should be called, if provided. Normally our
278
     * function that performs the last write does it after seeing `finished'
279
     * set to `true'. If, however, there's no `writer' callback and hence no
280
     * writes to perform, we need to take care of calling `finisher' here.
281
     */
282
    data.finished = true;
180 283
    handle_mutation(data.observer.takeRecords(), data);
181
    finish_node(data.html_element, data);
182 284
    data.observer.disconnect();
285

  
286
    /*
287
     * Additional whitespace that was after `</body>' gets appended to body.
288
     * Although it's a minor issue, it is not what we want. There's no way to
289
     * tell exactly what part of that whitespace was after `</body>' and what
290
     * was before, so we just replace it with a single newline which looks good
291
     * when printed.
292
     */
293
    const body = data.html_root.lastChild;
294
    const text = body && body.tagName === "BODY" && body.lastChild;
295
    if (text && text.nodeName === "#text") {
296
	const new_content = /^([\S\s]*\S)?\s*$/.exec(text.textContent)[1] || "";
297
	text.textContent = new_content + "\n";
298
    }
299

  
300
    finish_serializing_node(data.html_root, data);
301
    if (!data.writer && data.finisher)
302
	setTimeout(data.finisher, 0);
183 303
}
184 304

  
185
function modify_on_the_fly(html_element, policy, consumers)
305
/*
306
 * This function sanitizes `html_root' according to `policy'. It is capable of
307
 * working on an HTML document that is being written to, sanitizing new nodes
308
 * as they appear.
309
 *
310
 * `consumers' object may contain 3 optional callback functions: `writer',
311
 * `node_eater' and `finisher'. The first one, if present, is called with chunks
312
 * of reconstructed HTML code. The second one, if present, gets called for every
313
 * added node with 2 arguments: that node and its parent. The third one is
314
 * called at the end, after all processing has been done.
315
 *
316
 * `modify_on_the_fly()' returns a callback that should be called (with no
317
 * arguments) once the document of html_root has finished being written to.
318
 * Unfortunately, due to specifics behavior of document that has had its
319
 * documentElement replaced
320
 */
321
function modify_on_the_fly(html_root, policy, consumers)
186 322
{
187 323
    const uniq = gen_nonce();
188
    const uniq_reg = new RegExp(`^(.*)${uniq}(.*)$`);
189
    const data = {policy, html_element, uniq, uniq_reg, ...consumers};
190

  
191
    start_node(data.html_element, data);
324
    const uniq_reg = new RegExp(`^([\\s\\S]*)${uniq}([\\s\\S]*)$`);
325
    const data = {policy, html_root, uniq, uniq_reg, chunks: [], new_added: []};
326
    Object.assign(data, consumers);
192 327

  
193 328
    var observer = new MutationObserver(m => handle_mutation(m, data));
194
    observer.observe(data.html_element, {
329
    observer.observe(data.html_root, {
195 330
     	attributes: true,
196 331
	childList: true,
197 332
	subtree: true

Also available in: Unified diff