1
|
/**
|
2
|
* Hachette modify HTML document as it loads and reconstruct HTML code from it
|
3
|
*
|
4
|
* Copyright (C) 2021 Wojtek Kosior
|
5
|
* Redistribution terms are gathered in the `copyright' file.
|
6
|
*/
|
7
|
|
8
|
/*
|
9
|
* IMPORTS_START
|
10
|
* IMPORT gen_nonce
|
11
|
* IMPORT csp_rule
|
12
|
* IMPORT is_csp_header_name
|
13
|
* IMPORT sanitize_csp_header
|
14
|
* IMPORT sanitize_attributes
|
15
|
* IMPORTS_END
|
16
|
*/
|
17
|
|
18
|
/*
|
19
|
* Functions that sanitize elements. The script blocking measures are, when
|
20
|
* possible, going to be applied together with CSP rules injected using
|
21
|
* webRequest.
|
22
|
*/
|
23
|
|
24
|
const blocked = "blocked";
|
25
|
|
26
|
function block_attribute(node, attr)
|
27
|
{
|
28
|
/*
|
29
|
* Disabling attributed this way allows them to still be relatively
|
30
|
* easily accessed in case they contain some useful data.
|
31
|
*/
|
32
|
|
33
|
const construct_name = [attr];
|
34
|
while (node.hasAttribute(construct_name.join("")))
|
35
|
construct_name.unshift(blocked);
|
36
|
|
37
|
while (construct_name.length > 1) {
|
38
|
construct_name.shift();
|
39
|
const name = construct_name.join("");
|
40
|
node.setAttribute(`${blocked}-${name}`, node.getAttribute(name));
|
41
|
}
|
42
|
|
43
|
node.removeAttribute(attr);
|
44
|
}
|
45
|
|
46
|
function sanitize_script(script, policy)
|
47
|
{
|
48
|
if (policy.allow)
|
49
|
return;
|
50
|
|
51
|
block_attribute(script, "type");
|
52
|
script.setAttribute("type", "application/json");
|
53
|
}
|
54
|
|
55
|
function inject_csp(head, policy)
|
56
|
{
|
57
|
if (policy.allow)
|
58
|
return;
|
59
|
|
60
|
const meta = document.createElement("meta");
|
61
|
meta.setAttribute("http-equiv", "Content-Security-Policy");
|
62
|
meta.setAttribute("content", csp_rule(policy.nonce));
|
63
|
meta.hachette_ignore = true;
|
64
|
head.prepend(meta);
|
65
|
}
|
66
|
|
67
|
function sanitize_http_equiv_csp_rule(meta, policy)
|
68
|
{
|
69
|
const http_equiv = meta.getAttribute("http-equiv");
|
70
|
|
71
|
if (!is_csp_header_name(http_equiv, !policy.allow))
|
72
|
return;
|
73
|
|
74
|
if (policy.allow || is_csp_header_name(http_equiv, false)) {
|
75
|
let value = meta.getAttribute("content");
|
76
|
block_attribute(meta, "content");
|
77
|
if (value) {
|
78
|
value = sanitize_csp_header({value}, policy).value;
|
79
|
meta.setAttribute("content", value);
|
80
|
}
|
81
|
return;
|
82
|
}
|
83
|
|
84
|
block_attribute(meta, "http-equiv");
|
85
|
}
|
86
|
|
87
|
function sanitize_node(node, policy)
|
88
|
{
|
89
|
if (node.tagName === "SCRIPT")
|
90
|
sanitize_script(node, policy);
|
91
|
|
92
|
if (node.tagName === "HEAD")
|
93
|
inject_csp(node, policy);
|
94
|
|
95
|
if (node.tagName === "META")
|
96
|
sanitize_http_equiv_csp_rule(node, policy);
|
97
|
|
98
|
if (!policy.allow)
|
99
|
sanitize_attributes(node, policy);
|
100
|
}
|
101
|
|
102
|
const serializer = new XMLSerializer();
|
103
|
|
104
|
function start_node(node, data)
|
105
|
{
|
106
|
if (!data.writer)
|
107
|
return;
|
108
|
|
109
|
node.hachette_started = true;
|
110
|
const clone = node.cloneNode(false);
|
111
|
clone.textContent = data.uniq;
|
112
|
data.writer(data.uniq_reg.exec(clone.outerHTML)[1]);
|
113
|
}
|
114
|
|
115
|
function finish_node(node, data)
|
116
|
{
|
117
|
const nodes_to_process = [node];
|
118
|
|
119
|
while (true) {
|
120
|
node = nodes_to_process.pop();
|
121
|
if (!node)
|
122
|
break;
|
123
|
|
124
|
nodes_to_process.push(node, node.hachette_last_added);
|
125
|
}
|
126
|
|
127
|
while (nodes_to_process.length > 0) {
|
128
|
const node = nodes_to_process.pop();
|
129
|
node.remove();
|
130
|
|
131
|
if (!data.writer)
|
132
|
continue;
|
133
|
|
134
|
if (node.hachette_started) {
|
135
|
node.textContent = data.uniq;
|
136
|
data.writer(data.uniq_reg.exec(node.outerHTML)[2]);
|
137
|
continue;
|
138
|
}
|
139
|
|
140
|
data.writer(node.outerHTML || serializer.serializeToString(node));
|
141
|
}
|
142
|
}
|
143
|
|
144
|
/*
|
145
|
* Important! Due to some weirdness node.parentElement is not alway correct
|
146
|
* under Chromium. Track node relations manually.
|
147
|
*/
|
148
|
function handle_added_node(node, true_parent, data)
|
149
|
{
|
150
|
if (node.hachette_ignore || true_parent.hachette_ignore)
|
151
|
return;
|
152
|
|
153
|
if (!true_parent.hachette_started)
|
154
|
start_node(true_parent, data)
|
155
|
|
156
|
sanitize_node(node, data.policy);
|
157
|
|
158
|
if (data.node_eater)
|
159
|
data.node_eater(node, true_parent);
|
160
|
|
161
|
finish_node(true_parent.hachette_last_added, data);
|
162
|
|
163
|
true_parent.hachette_last_added = node;
|
164
|
}
|
165
|
|
166
|
function handle_mutation(mutations, data)
|
167
|
{
|
168
|
/*
|
169
|
* Chromium: for an unknown reason mutation.target is not always the same as
|
170
|
* node.parentElement. The former is the correct one.
|
171
|
*/
|
172
|
for (const mutation of mutations) {
|
173
|
for (const node of mutation.addedNodes)
|
174
|
handle_added_node(node, mutation.target, data);
|
175
|
}
|
176
|
}
|
177
|
|
178
|
function finish_processing(data)
|
179
|
{
|
180
|
handle_mutation(data.observer.takeRecords(), data);
|
181
|
finish_node(data.html_element, data);
|
182
|
data.observer.disconnect();
|
183
|
}
|
184
|
|
185
|
function modify_on_the_fly(html_element, policy, consumers)
|
186
|
{
|
187
|
const uniq = gen_nonce();
|
188
|
const uniq_reg = new RegExp(`^(.*)${uniq}(.*)$`);
|
189
|
const data = {policy, html_element, uniq, uniq_reg, ...consumers};
|
190
|
|
191
|
start_node(data.html_element, data);
|
192
|
|
193
|
var observer = new MutationObserver(m => handle_mutation(m, data));
|
194
|
observer.observe(data.html_element, {
|
195
|
attributes: true,
|
196
|
childList: true,
|
197
|
subtree: true
|
198
|
});
|
199
|
|
200
|
data.observer = observer;
|
201
|
|
202
|
return () => finish_processing(data);
|
203
|
}
|
204
|
|
205
|
/*
|
206
|
* EXPORTS_START
|
207
|
* EXPORT modify_on_the_fly
|
208
|
* EXPORTS_END
|
209
|
*/
|