Revision 6b53d6c8
Added by koszko about 2 years ago
| content/sanitize_document.js | ||
|---|---|---|
| 43 | 43 |
node.removeAttribute(attr); |
| 44 | 44 |
} |
| 45 | 45 |
|
| 46 |
function sanitize_script(script, policy)
|
|
| 46 |
function sanitize_script(script, data)
|
|
| 47 | 47 |
{
|
| 48 |
if (policy.allow) |
|
| 48 |
if (script.getAttribute("data-hachette-deleteme") === data.policy.nonce) {
|
|
| 49 |
script.remove(); |
|
| 50 |
script.hachette_deleted = true; |
|
| 51 |
script.hachette_ignore = true; |
|
| 52 |
} |
|
| 53 |
|
|
| 54 |
if (data.policy.allow) |
|
| 49 | 55 |
return; |
| 50 | 56 |
|
| 51 | 57 |
block_attribute(script, "type"); |
| 52 | 58 |
script.setAttribute("type", "application/json");
|
| 53 | 59 |
} |
| 54 | 60 |
|
| 55 |
function inject_csp(head, policy)
|
|
| 61 |
function inject_csp(head, data)
|
|
| 56 | 62 |
{
|
| 57 |
if (policy.allow) |
|
| 63 |
if (data.policy.allow)
|
|
| 58 | 64 |
return; |
| 59 | 65 |
|
| 60 | 66 |
const meta = document.createElement("meta");
|
| 61 | 67 |
meta.setAttribute("http-equiv", "Content-Security-Policy");
|
| 62 |
meta.setAttribute("content", csp_rule(policy.nonce));
|
|
| 68 |
meta.setAttribute("content", csp_rule(data.policy.nonce));
|
|
| 63 | 69 |
meta.hachette_ignore = true; |
| 64 | 70 |
head.prepend(meta); |
| 71 |
|
|
| 72 |
data.new_added.unshift([meta, head]); |
|
| 65 | 73 |
} |
| 66 | 74 |
|
| 67 |
function sanitize_http_equiv_csp_rule(meta, policy)
|
|
| 75 |
function sanitize_http_equiv_csp_rule(meta, data)
|
|
| 68 | 76 |
{
|
| 69 | 77 |
const http_equiv = meta.getAttribute("http-equiv");
|
| 78 |
const value = meta.content; |
|
| 70 | 79 |
|
| 71 |
if (!is_csp_header_name(http_equiv, !policy.allow))
|
|
| 80 |
if (!value || !is_csp_header_name(http_equiv, !data.policy.allow))
|
|
| 72 | 81 |
return; |
| 73 | 82 |
|
| 74 |
if (policy.allow || is_csp_header_name(http_equiv, false)) {
|
|
| 75 |
let value = meta.getAttribute("content");
|
|
| 76 |
block_attribute(meta, "content"); |
|
| 77 |
if (value) {
|
|
| 78 |
value = sanitize_csp_header({value}, policy).value;
|
|
| 79 |
meta.setAttribute("content", value);
|
|
| 80 |
} |
|
| 81 |
return; |
|
| 82 |
} |
|
| 83 |
block_attribute(meta, "content"); |
|
| 83 | 84 |
|
| 84 |
block_attribute(meta, "http-equiv"); |
|
| 85 |
if (data.policy.allow || is_csp_header_name(http_equiv, false)) |
|
| 86 |
meta.content = sanitize_csp_header({value}, data.policy).value;
|
|
| 85 | 87 |
} |
| 86 | 88 |
|
| 87 |
function sanitize_node(node, policy)
|
|
| 89 |
function sanitize_node(node, data)
|
|
| 88 | 90 |
{
|
| 89 | 91 |
if (node.tagName === "SCRIPT") |
| 90 |
sanitize_script(node, policy);
|
|
| 92 |
sanitize_script(node, data);
|
|
| 91 | 93 |
|
| 92 | 94 |
if (node.tagName === "HEAD") |
| 93 |
inject_csp(node, policy);
|
|
| 95 |
inject_csp(node, data);
|
|
| 94 | 96 |
|
| 95 | 97 |
if (node.tagName === "META") |
| 96 |
sanitize_http_equiv_csp_rule(node, policy); |
|
| 98 |
sanitize_http_equiv_csp_rule(node, data); |
|
| 99 |
|
|
| 100 |
if (!data.policy.allow) |
|
| 101 |
sanitize_attributes(node, data); |
|
| 102 |
} |
|
| 97 | 103 |
|
| 98 |
if (!policy.allow) |
|
| 99 |
sanitize_attributes(node, policy); |
|
| 104 |
/* |
|
| 105 |
* Instead of calling writer directly with multiple small chunks of reconstruced |
|
| 106 |
* HTML code, we utilize `setTimeout()' to only have it called once, |
|
| 107 |
* asynchronously. |
|
| 108 |
*/ |
|
| 109 |
function do_write_callback(data) |
|
| 110 |
{
|
|
| 111 |
data.writer(data.chunks.join(""));
|
|
| 112 |
data.chunks = []; |
|
| 113 |
|
|
| 114 |
if (data.finished && data.finisher) |
|
| 115 |
data.finisher(); |
|
| 116 |
} |
|
| 117 |
|
|
| 118 |
function do_write(chunk, data) |
|
| 119 |
{
|
|
| 120 |
data.chunks.push(chunk); |
|
| 121 |
clearTimeout(data.write_timeout); |
|
| 122 |
data.write_timeout = setTimeout(() => do_write_callback(data), 0); |
|
| 100 | 123 |
} |
| 101 | 124 |
|
| 102 | 125 |
const serializer = new XMLSerializer(); |
| 103 | 126 |
|
| 104 |
function start_node(node, data) |
|
| 127 |
function start_serializing_node(node, data)
|
|
| 105 | 128 |
{
|
| 129 |
node.hachette_started = true; |
|
| 130 |
|
|
| 106 | 131 |
if (!data.writer) |
| 107 | 132 |
return; |
| 108 | 133 |
|
| 109 |
node.hachette_started = true; |
|
| 110 | 134 |
const clone = node.cloneNode(false); |
| 111 | 135 |
clone.textContent = data.uniq; |
| 112 |
data.writer(data.uniq_reg.exec(clone.outerHTML)[1]);
|
|
| 136 |
do_write(data.uniq_reg.exec(clone.outerHTML)[1], data);
|
|
| 113 | 137 |
} |
| 114 | 138 |
|
| 115 |
function finish_node(node, data) |
|
| 139 |
function finish_serializing_node(node, data)
|
|
| 116 | 140 |
{
|
| 117 | 141 |
const nodes_to_process = [node]; |
| 118 | 142 |
|
| ... | ... | |
| 127 | 151 |
while (nodes_to_process.length > 0) {
|
| 128 | 152 |
const node = nodes_to_process.pop(); |
| 129 | 153 |
node.remove(); |
| 154 |
node.hachette_ignore = true; |
|
| 130 | 155 |
|
| 131 | 156 |
if (!data.writer) |
| 132 | 157 |
continue; |
| 133 | 158 |
|
| 134 | 159 |
if (node.hachette_started) {
|
| 135 | 160 |
node.textContent = data.uniq; |
| 136 |
data.writer(data.uniq_reg.exec(node.outerHTML)[2]); |
|
| 161 |
do_write(data.uniq_reg.exec(node.outerHTML)[2], data); |
|
| 162 |
continue; |
|
| 163 |
} |
|
| 164 |
|
|
| 165 |
do_write(node.outerHTML || serializer.serializeToString(node), data); |
|
| 166 |
} |
|
| 167 |
} |
|
| 168 |
|
|
| 169 |
function process_initial_nodes(node, data) |
|
| 170 |
{
|
|
| 171 |
if (data.processed_initial_nodes) |
|
| 172 |
return; |
|
| 173 |
|
|
| 174 |
data.processed_initial_nodes = true; |
|
| 175 |
|
|
| 176 |
start_serializing_node(data.html_root, data); |
|
| 177 |
|
|
| 178 |
const new_added = []; |
|
| 179 |
const nodes_to_process = [data.html_root]; |
|
| 180 |
|
|
| 181 |
let i = 0; |
|
| 182 |
while (nodes_to_process.length > 0) {
|
|
| 183 |
let current = nodes_to_process.shift(); |
|
| 184 |
|
|
| 185 |
if (current.firstChild) {
|
|
| 186 |
if (current.firstChild === node) |
|
| 187 |
break; |
|
| 188 |
nodes_to_process.unshift(current.firstChild, current); |
|
| 189 |
new_added.push([current.firstChild, current]); |
|
| 137 | 190 |
continue; |
| 138 | 191 |
} |
| 139 | 192 |
|
| 140 |
data.writer(node.outerHTML || serializer.serializeToString(node)); |
|
| 193 |
while (current && !current.nextSibling) |
|
| 194 |
current = nodes_to_process.shift(); |
|
| 195 |
|
|
| 196 |
if (!current || current.nextSibling === node) |
|
| 197 |
break; |
|
| 198 |
|
|
| 199 |
nodes_to_process.unshift(current.nextSibling); |
|
| 200 |
new_added.push([current.nextSibling, nodes_to_process[1]]); |
|
| 141 | 201 |
} |
| 202 |
|
|
| 203 |
data.new_added.unshift(...new_added); |
|
| 142 | 204 |
} |
| 143 | 205 |
|
| 144 | 206 |
/* |
| 145 | 207 |
* Important! Due to some weirdness node.parentElement is not alway correct |
| 146 |
* under Chromium. Track node relations manually. |
|
| 208 |
* in MutationRecords under Chromium. Track node relations manually.
|
|
| 147 | 209 |
*/ |
| 148 | 210 |
function handle_added_node(node, true_parent, data) |
| 149 | 211 |
{
|
| 150 |
if (node.hachette_ignore || true_parent.hachette_ignore) |
|
| 151 |
return; |
|
| 212 |
/* |
|
| 213 |
* Functions we call here might cause new nodes to be injected or found |
|
| 214 |
* that require processing before the one we got in function argument. |
|
| 215 |
* We rely on those functions putting the node(s) they create/find at the |
|
| 216 |
* very beginning of the `new_added' queue and (for created nodes) setting |
|
| 217 |
* their `hachette_ignore' property, based on which their MutationRecord |
|
| 218 |
* will not be processed. A function can also mark a node already in the |
|
| 219 |
* `new_added' queue as not eligible for processing by setting its |
|
| 220 |
* `hachette_deleted' property. |
|
| 221 |
*/ |
|
| 152 | 222 |
|
| 153 |
if (!true_parent.hachette_started) |
|
| 154 |
start_node(true_parent, data) |
|
| 223 |
process_initial_nodes(node, data); |
|
| 155 | 224 |
|
| 156 |
sanitize_node(node, data.policy);
|
|
| 225 |
data.new_added.push([node, true_parent]);
|
|
| 157 | 226 |
|
| 158 |
if (data.node_eater)
|
|
| 159 |
data.node_eater(node, true_parent);
|
|
| 227 |
while (data.new_added.length > 0) {
|
|
| 228 |
[node, true_parent] = data.new_added.shift();
|
|
| 160 | 229 |
|
| 161 |
finish_node(true_parent.hachette_last_added, data); |
|
| 230 |
if (true_parent.hachette_deleted) |
|
| 231 |
node.hachette_deleted = true; |
|
| 232 |
if (node.hachette_deleted) |
|
| 233 |
continue; |
|
| 234 |
|
|
| 235 |
if (!true_parent.hachette_started) |
|
| 236 |
start_serializing_node(true_parent, data) |
|
| 237 |
|
|
| 238 |
if (!node.hachette_ignore) |
|
| 239 |
sanitize_node(node, data); |
|
| 240 |
|
|
| 241 |
if (node.hachette_deleted) |
|
| 242 |
continue; |
|
| 243 |
|
|
| 244 |
if (data.node_eater) |
|
| 245 |
data.node_eater(node, true_parent); |
|
| 162 | 246 |
|
| 163 |
true_parent.hachette_last_added = node; |
|
| 247 |
finish_serializing_node(true_parent.hachette_last_added, data); |
|
| 248 |
|
|
| 249 |
true_parent.hachette_last_added = node; |
|
| 250 |
} |
|
| 164 | 251 |
} |
| 165 | 252 |
|
| 166 | 253 |
function handle_mutation(mutations, data) |
| ... | ... | |
| 170 | 257 |
* node.parentElement. The former is the correct one. |
| 171 | 258 |
*/ |
| 172 | 259 |
for (const mutation of mutations) {
|
| 173 |
for (const node of mutation.addedNodes) |
|
| 260 |
for (const node of mutation.addedNodes) {
|
|
| 261 |
/* Check for nodes added by ourselves. */ |
|
| 262 |
if (mutation.target.hachette_ignore) |
|
| 263 |
node.hachette_ignore = true; |
|
| 264 |
if (node.hachette_ignore) |
|
| 265 |
continue; |
|
| 266 |
|
|
| 174 | 267 |
handle_added_node(node, mutation.target, data); |
| 268 |
} |
|
| 175 | 269 |
} |
| 176 | 270 |
} |
| 177 | 271 |
|
| 178 | 272 |
function finish_processing(data) |
| 179 | 273 |
{
|
| 274 |
process_initial_nodes(undefined, data); |
|
| 275 |
|
|
| 276 |
/* |
|
| 277 |
* The `finisher' callback should be called, if provided. Normally our |
|
| 278 |
* function that performs the last write does it after seeing `finished' |
|
| 279 |
* set to `true'. If, however, there's no `writer' callback and hence no |
|
| 280 |
* writes to perform, we need to take care of calling `finisher' here. |
|
| 281 |
*/ |
|
| 282 |
data.finished = true; |
|
| 180 | 283 |
handle_mutation(data.observer.takeRecords(), data); |
| 181 |
finish_node(data.html_element, data); |
|
| 182 | 284 |
data.observer.disconnect(); |
| 285 |
|
|
| 286 |
/* |
|
| 287 |
* Additional whitespace that was after `</body>' gets appended to body. |
|
| 288 |
* Although it's a minor issue, it is not what we want. There's no way to |
|
| 289 |
* tell exactly what part of that whitespace was after `</body>' and what |
|
| 290 |
* was before, so we just replace it with a single newline which looks good |
|
| 291 |
* when printed. |
|
| 292 |
*/ |
|
| 293 |
const body = data.html_root.lastChild; |
|
| 294 |
const text = body && body.tagName === "BODY" && body.lastChild; |
|
| 295 |
if (text && text.nodeName === "#text") {
|
|
| 296 |
const new_content = /^([\S\s]*\S)?\s*$/.exec(text.textContent)[1] || ""; |
|
| 297 |
text.textContent = new_content + "\n"; |
|
| 298 |
} |
|
| 299 |
|
|
| 300 |
finish_serializing_node(data.html_root, data); |
|
| 301 |
if (!data.writer && data.finisher) |
|
| 302 |
setTimeout(data.finisher, 0); |
|
| 183 | 303 |
} |
| 184 | 304 |
|
| 185 |
function modify_on_the_fly(html_element, policy, consumers) |
|
| 305 |
/* |
|
| 306 |
* This function sanitizes `html_root' according to `policy'. It is capable of |
|
| 307 |
* working on an HTML document that is being written to, sanitizing new nodes |
|
| 308 |
* as they appear. |
|
| 309 |
* |
|
| 310 |
* `consumers' object may contain 3 optional callback functions: `writer', |
|
| 311 |
* `node_eater' and `finisher'. The first one, if present, is called with chunks |
|
| 312 |
* of reconstructed HTML code. The second one, if present, gets called for every |
|
| 313 |
* added node with 2 arguments: that node and its parent. The third one is |
|
| 314 |
* called at the end, after all processing has been done. |
|
| 315 |
* |
|
| 316 |
* `modify_on_the_fly()' returns a callback that should be called (with no |
|
| 317 |
* arguments) once the document of html_root has finished being written to. |
|
| 318 |
* Unfortunately, due to specifics behavior of document that has had its |
|
| 319 |
* documentElement replaced |
|
| 320 |
*/ |
|
| 321 |
function modify_on_the_fly(html_root, policy, consumers) |
|
| 186 | 322 |
{
|
| 187 | 323 |
const uniq = gen_nonce(); |
| 188 |
const uniq_reg = new RegExp(`^(.*)${uniq}(.*)$`);
|
|
| 189 |
const data = {policy, html_element, uniq, uniq_reg, ...consumers};
|
|
| 190 |
|
|
| 191 |
start_node(data.html_element, data); |
|
| 324 |
const uniq_reg = new RegExp(`^([\\s\\S]*)${uniq}([\\s\\S]*)$`);
|
|
| 325 |
const data = {policy, html_root, uniq, uniq_reg, chunks: [], new_added: []};
|
|
| 326 |
Object.assign(data, consumers); |
|
| 192 | 327 |
|
| 193 | 328 |
var observer = new MutationObserver(m => handle_mutation(m, data)); |
| 194 |
observer.observe(data.html_element, {
|
|
| 329 |
observer.observe(data.html_root, {
|
|
| 195 | 330 |
attributes: true, |
| 196 | 331 |
childList: true, |
| 197 | 332 |
subtree: true |
Also available in: Unified diff
use StreamFilter under Mozilla to prevent csp tags from blocking our injected scripts