Project

General

Profile

Download (4.71 KB) Statistics
| Branch: | Tag: | Revision:

haketilo / content / sanitize_document.js @ d09b7ee1

1
/**
2
 * Hachette modify HTML document as it loads and reconstruct HTML code from it
3
 *
4
 * Copyright (C) 2021 Wojtek Kosior
5
 * Redistribution terms are gathered in the `copyright' file.
6
 */
7

    
8
/*
9
 * IMPORTS_START
10
 * IMPORT gen_nonce
11
 * IMPORT csp_rule
12
 * IMPORT is_csp_header_name
13
 * IMPORT sanitize_csp_header
14
 * IMPORT sanitize_attributes
15
 * IMPORTS_END
16
 */
17

    
18
/*
19
 * Functions that sanitize elements. The script blocking measures are, when
20
 * possible, going to be applied together with CSP rules injected using
21
 * webRequest.
22
 */
23

    
24
const blocked = "blocked";
25

    
26
function block_attribute(node, attr)
27
{
28
    /*
29
     * Disabling attributed this way allows them to still be relatively
30
     * easily accessed in case they contain some useful data.
31
     */
32

    
33
    const construct_name = [attr];
34
    while (node.hasAttribute(construct_name.join("")))
35
	construct_name.unshift(blocked);
36

    
37
    while (construct_name.length > 1) {
38
	construct_name.shift();
39
	const name = construct_name.join("");
40
	node.setAttribute(`${blocked}-${name}`, node.getAttribute(name));
41
    }
42

    
43
    node.removeAttribute(attr);
44
}
45

    
46
function sanitize_script(script, policy)
47
{
48
    if (policy.allow)
49
	return;
50

    
51
    block_attribute(script, "type");
52
    script.setAttribute("type", "application/json");
53
}
54

    
55
function inject_csp(head, policy)
56
{
57
    if (policy.allow)
58
	return;
59

    
60
    const meta = document.createElement("meta");
61
    meta.setAttribute("http-equiv", "Content-Security-Policy");
62
    meta.setAttribute("content", csp_rule(policy.nonce));
63
    meta.hachette_ignore = true;
64
    head.prepend(meta);
65
}
66

    
67
function sanitize_http_equiv_csp_rule(meta, policy)
68
{
69
    const http_equiv = meta.getAttribute("http-equiv");
70

    
71
    if (!is_csp_header_name(http_equiv, !policy.allow))
72
	return;
73

    
74
    if (policy.allow || is_csp_header_name(http_equiv, false)) {
75
	let value = meta.getAttribute("content");
76
	block_attribute(meta, "content");
77
	if (value) {
78
	    value = sanitize_csp_header({value}, policy).value;
79
	    meta.setAttribute("content", value);
80
	}
81
	return;
82
    }
83

    
84
    block_attribute(meta, "http-equiv");
85
}
86

    
87
function sanitize_node(node, policy)
88
{
89
    if (node.tagName === "SCRIPT")
90
	sanitize_script(node, policy);
91

    
92
    if (node.tagName === "HEAD")
93
	inject_csp(node, policy);
94

    
95
    if (node.tagName === "META")
96
	sanitize_http_equiv_csp_rule(node, policy);
97

    
98
    if (!policy.allow)
99
	sanitize_attributes(node, policy);
100
}
101

    
102
const serializer = new XMLSerializer();
103

    
104
function start_node(node, data)
105
{
106
    if (!data.writer)
107
	return;
108

    
109
    node.hachette_started = true;
110
    const clone = node.cloneNode(false);
111
    clone.textContent = data.uniq;
112
    data.writer(data.uniq_reg.exec(clone.outerHTML)[1]);
113
}
114

    
115
function finish_node(node, data)
116
{
117
    const nodes_to_process = [node];
118

    
119
    while (true) {
120
	node = nodes_to_process.pop();
121
	if (!node)
122
	    break;
123

    
124
	nodes_to_process.push(node, node.hachette_last_added);
125
    }
126

    
127
    while (nodes_to_process.length > 0) {
128
	const node = nodes_to_process.pop();
129
	node.remove();
130

    
131
	if (!data.writer)
132
	    continue;
133

    
134
	if (node.hachette_started) {
135
	    node.textContent = data.uniq;
136
	    data.writer(data.uniq_reg.exec(node.outerHTML)[2]);
137
	    continue;
138
	}
139

    
140
	data.writer(node.outerHTML || serializer.serializeToString(node));
141
    }
142
}
143

    
144
/*
145
 * Important! Due to some weirdness node.parentElement is not alway correct
146
 * under Chromium. Track node relations manually.
147
 */
148
function handle_added_node(node, true_parent, data)
149
{
150
    if (node.hachette_ignore || true_parent.hachette_ignore)
151
	return;
152

    
153
    if (!true_parent.hachette_started)
154
	start_node(true_parent, data)
155

    
156
    sanitize_node(node, data.policy);
157

    
158
    if (data.node_eater)
159
	data.node_eater(node, true_parent);
160

    
161
    finish_node(true_parent.hachette_last_added, data);
162

    
163
    true_parent.hachette_last_added = node;
164
}
165

    
166
function handle_mutation(mutations, data)
167
{
168
    /*
169
     * Chromium: for an unknown reason mutation.target is not always the same as
170
     * node.parentElement. The former is the correct one.
171
     */
172
    for (const mutation of mutations) {
173
	for (const node of mutation.addedNodes)
174
	    handle_added_node(node, mutation.target, data);
175
    }
176
}
177

    
178
function finish_processing(data)
179
{
180
    handle_mutation(data.observer.takeRecords(), data);
181
    finish_node(data.html_element, data);
182
    data.observer.disconnect();
183
}
184

    
185
function modify_on_the_fly(html_element, policy, consumers)
186
{
187
    const uniq = gen_nonce();
188
    const uniq_reg = new RegExp(`^(.*)${uniq}(.*)$`);
189
    const data = {policy, html_element, uniq, uniq_reg, ...consumers};
190

    
191
    start_node(data.html_element, data);
192

    
193
    var observer = new MutationObserver(m => handle_mutation(m, data));
194
    observer.observe(data.html_element, {
195
     	attributes: true,
196
	childList: true,
197
	subtree: true
198
    });
199

    
200
    data.observer = observer;
201

    
202
    return () => finish_processing(data);
203
}
204

    
205
/*
206
 * EXPORTS_START
207
 * EXPORT modify_on_the_fly
208
 * EXPORTS_END
209
 */
(6-6/6)