Project

General

Profile

« Previous | Next » 

Revision 96efcc33

Added by koszko over 1 year ago

improve script blocking in non-HTML documents (XML)

View differences:

content/policy_enforcing.js
45 45

  
46 46
#FROM common/misc.js IMPORT gen_nonce, csp_header_regex
47 47

  
48
const html_ns = "http://www.w3.org/1999/xhtml";
49
const svg_ns = "http://www.w3.org/2000/svg";
50

  
48 51
document.content_loaded = document.readyState === "complete";
49 52
const wait_loaded = e => e.content_loaded ? Promise.resolve() :
50 53
      new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true}));
......
203 206
     */
204 207
    if (some_attr_blocked) {
205 208
	const replacement_elem = document.createElement("a");
209

  
210
	/* Prevent this node from being processed by our observer. */
211
	replacement_elem.haketilo_trusted_node = true;
212

  
206 213
	element.replaceWith(replacement_elem);
207 214
	replacement_elem.replaceWith(element);
208 215
    }
......
221 228
    element.haketilo_sanitized_onevent = true;
222 229

  
223 230
    for (const attribute_node of [...(element.attributes || [])]) {
224
	const attr = attribute_node.localName, attr_lo = attr.toLowerCase();;
225
	if (!/^on/.test(attr_lo) || !(attr_lo in element.wrappedJSObject))
231
	const attr = attribute_node.localName, attr_lo = attr.toLowerCase();
232
	if (!/^on/.test(attr_lo) || !(attr_lo in element))
226 233
	    continue;
227 234

  
228 235
	/*
......
246 253
}
247 254
#ENDIF
248 255

  
249
function start_mo_sanitizing(doc) {
250
    if (!doc.content_loaded) {
251
	function mutation_handler(mutation) {
252
	    mutation.addedNodes.forEach(sanitize_element_urls);
256
/*
257
 * Sanitize elements on-the-fly as they appear using MutationObserver.
258
 *
259
 * Under Abrowser 97 it was observed that MutationObserver does not always work
260
 * as is should. When trying to observe nodes of an XMLDocument the behavior was
261
 * as if the "subtree" option to MutationObserver.observe() was ignored. To work
262
 * around this we avoid using the "subtree" option altogether and have the same
263
 * code work in all scenarios.
264
 */
265
function MOSanitizer(root) {
266
    this.root = root;
267

  
268
    this.recursively_sanitize(root);
269

  
270
    this.mo = new MutationObserver(ms => this.handle_mutations(ms));
271
}
272

  
273
MOSanitizer.prototype.observe = function() {
274
    let elem = this.root;
275
    while (elem && !elem.haketilo_trusted_node) {
276
	this.mo.observe(elem, {childList: true});
277
	elem = elem.lastElementChild;
278
    }
279
}
280

  
281
MOSanitizer.prototype.handle_mutations = function(mutations) {
282
    for (const mut of mutations) {
283
	for (const new_node of mut.addedNodes)
284
	    this.recursively_sanitize(new_node);
285
    }
286

  
287
    this.mo.disconnect();
288
    this.observe();
289
}
290

  
291
MOSanitizer.prototype.recursively_sanitize = function(elem) {
292
    const to_process = [elem];
293

  
294
    while (to_process.length > 0) {
295
	const current_elem = to_process.pop();
296

  
297
	if (current_elem.haketilo_trusted_node ||
298
	    current_elem.nodeType !== this.root.ELEMENT_NODE)
299
	    continue;
300

  
301
	to_process.push(...current_elem.children);
302

  
303
	sanitize_element_urls(current_elem);
253 304
#IF MOZILLA
254
	    mutation.addedNodes.forEach(sanitize_element_onevent);
305
	sanitize_element_onevent(current_elem);
255 306
#ENDIF
256
	}
257
	const mo = new MutationObserver(ms => ms.forEach(mutation_handler));
258
	mo.observe(doc, {childList: true, subtree: true});
259
	wait_loaded(doc).then(() => mo.disconnect());
260 307
    }
261 308
}
262 309

  
310
MOSanitizer.prototype.start = function() {
311
    this.recursively_sanitize(this.root);
312
    this.observe();
313
}
314

  
315
MOSanitizer.prototype.stop = function() {
316
    this.mo.disconnect();
317
}
318

  
263 319
#IF MOZILLA
264 320
/*
265 321
 * Normally, we block scripts with CSP. However, Mozilla does optimizations that
......
270 326
 * applying this CSP to non-inline `<scripts>' in certain scenarios.
271 327
 */
272 328
function prevent_script_execution(event) {
273
    if (!event.target.haketilo_payload)
274
	event.preventDefault();
329
    event.preventDefault();
275 330
}
276 331
#ENDIF
277 332

  
......
285 340
 * javascript execution.
286 341
 */
287 342
async function sanitize_document(doc, policy) {
343
    const root = doc.documentElement;
344
    const substitute_doc =
345
	  new DOMParser().parseFromString("<!DOCTYPE html>", "text/html");
346

  
288 347
#IF MOZILLA
289 348
    /*
290 349
     * Blocking of scripts that are in the DOM from the beginning. Needed for
291 350
     * Mozilla.
292 351
     */
293 352
    const listener_args = ["beforescriptexecute", prevent_script_execution];
353

  
294 354
    doc.addEventListener(...listener_args);
355
    substitute_doc.addEventListener(...listener_args);
356

  
295 357
    wait_loaded(doc).then(() => doc.removeEventListener(...listener_args));
296 358

  
297 359
    sanitize_tree_urls(doc.documentElement);
298 360
    sanitize_tree_onevent(doc.documentElement);
299 361
#ENDIF
300 362

  
363
    if (!doc.content_loaded) {
364
	const sanitizer = new MOSanitizer(doc.documentElement);
365
	sanitizer.start();
366
	wait_loaded(doc).then(() => sanitizer.stop());
367
    }
368

  
301 369
    /*
302 370
     * Ensure our CSP rules are employed from the beginning. This CSP injection
303 371
     * method is, when possible, going to be applied together with CSP rules
......
322 390
     * Root node gets hijacked now, to be re-attached after <head> is loaded
323 391
     * and sanitized.
324 392
     */
325
    const root = doc.documentElement;
326 393
    root.replaceWith(temporary_html);
394
#IF MOZILLA
395
    /*
396
     * To be able to handle the onbeforescriptexecute event for scripts that
397
     * appear under detached document.
398
     */
399
    substitute_doc.documentElement.replaceWith(root);
400
#ENDIF
327 401

  
328 402
    /*
329 403
     * When we don't inject payload, we neither block document's CSP `<meta>'
......
336 410
	    .forEach(m => sanitize_meta(m, policy));
337 411
    }
338 412

  
339
    sanitize_tree_urls(root);
340
    root.querySelectorAll("script").forEach(s => sanitize_script(s, policy));
413
    const scripts = [...root.getElementsByTagNameNS(html_ns, "script"),
414
		     ...root.getElementsByTagNameNS(svg_ns, "svg")];
415
    scripts.forEach(s => sanitize_script(s, policy));
341 416
    temporary_html.replaceWith(root);
342
    root.querySelectorAll("script").forEach(s => desanitize_script(s, policy));
343
#IF MOZILLA
344
    sanitize_tree_onevent(root);
345
#ENDIF
346

  
347
    start_mo_sanitizing(doc);
417
    scripts.forEach(s => desanitize_script(s, policy));
348 418
}
349 419

  
350 420
async function _disable_service_workers() {
test/haketilo_test/data/pages/scripts_to_block_1.html
29 29
    </script>
30 30
  </head>
31 31
  <body>
32
    <button id="clickme1"
33
	    onclick="window.__run = [...(window.__run || []), 'on'];"
34
	    blocked-onclick="some useful data">
35
      Click Meee!
36
    </button>
37
    <a id="clickme2"
38
       href="javascript:window.__run = [...(window.__run || []), 'href'];void(0);">
39
      Click Meee!
40
    </a>
41
    <iframe src="javascript:void(window.parent.__run = [...(window.parent.__run || []), 'src']);">
42
    </iframe>
43
    <object data="javascript:window.__run = [...(window.__run || []), 'data'];">
44
    </object>
32
    <!--
33
	Put all objects under a <div> to make sure the Mutation Observer does
34
	indeed correctly report changes in subtrees (there are problems with
35
	this in XML documents).
36
      -->
37
    <div>
38
      <button id="clickme1"
39
	      onclick="window.__run = [...(window.__run || []), 'on'];"
40
	      blocked-onclick="some useful data">
41
	Click Meee!
42
      </button>
43
      <a id="clickme2"
44
	 href="javascript:window.__run = [...(window.__run || []), 'href'];void(0);">
45
	Click Meee!
46
      </a>
47
      <iframe src="javascript:void(window.parent.__run = [...(window.parent.__run || []), 'src']);">
48
      </iframe>
49
      <object data="javascript:window.__run = [...(window.__run || []), 'data'];">
50
      </object>
51
    </div>
45 52
  </body>
46 53
</html>
test/haketilo_test/data/pages/scripts_to_block_2.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<!--
3
    SPDX-License-Identifier: CC0-1.0
4

  
5
    A testing XML document with various scripts that need to get blocked.
6

  
7
    This file is part of Haketilo.
8

  
9
    Copyright (C) 2021, 2022 Wojtek Kosior <koszko@koszko.org>
10

  
11
    This program is free software: you can redistribute it and/or modify
12
    it under the terms of the CC0 1.0 Universal License as published by
13
    the Creative Commons Corporation.
14

  
15
    This program is distributed in the hope that it will be useful,
16
    but WITHOUT ANY WARRANTY; without even the implied warranty of
17
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
    CC0 1.0 Universal License for more details.
19
  -->
20

  
21
<fruits>
22

  
23
  <!--
24
      The following will not execute since it is not recognized as either HTML
25
      or SVG script.
26
  -->
27
  <script>
28
    window.__run = [...(window.__run || []), 'banana'];
29
  </script>
30

  
31
  <html:img xmlns:html="http://www.w3.org/1999/xhtml"
32
	    src=""
33
	    onload="window.__run = [...(window.__run || []), 'melon'];console.log('delme melon')">
34
  </html:img>
35

  
36
  <!-- Will execute -->
37
  <html:script xmlns:html="http://www.w3.org/1999/xhtml">
38
    window.__run = [...(window.__run || []), 'grape'];
39
  </html:script>
40

  
41
  <!-- Will also execute -->
42
  <vector-graphics:script xmlns:vector-graphics="http://www.w3.org/2000/svg">
43
    window.__run = [...(window.__run || []), 'raspberry'];
44
  </vector-graphics:script>
45

  
46
  <apple>
47
    <svg viewBox="0 0 10 14" xmlns="http://www.w3.org/2000/svg">
48
      <!-- Will run when clicked -->
49
      <circle id="idaret_circle" cx="5" cy="5" r="4"
50
	      onclick="window.__run = [...(window.__run || []), 'idaret'];" />
51
      <!-- Will *NOT* run when clicked -->
52
      <circle id="nowamak_circle" cx="5" cy="13" r="4"
53
	      some-unknown:onclick="window.__run = [...(window.__run || []), 'nowamak'];"
54
	      xmlns:some-unknown="https://example.org/blah/blah" />
55
    </svg>
56
  </apple>
57
  <!--
58
      In case of wrong namespace URI (or lack thereof), svg subtree will not
59
      be recognized as SVG at all
60
  -->
61
  <svg>
62
    <!-- Will neither run nor be drawn by the browser -->
63
    <circle id="mango_circle" cx="5" cy="5" r="4"
64
	    onclick="window.__run = [...(window.__run || []), 'mango'];" />
65
  </svg>
66
  <svg viewBox="0 0 10" xmlns="http://www.w3.org/2000/sv">
67
    <!-- Will neither run nor be drawn by the browser -->
68
    <circle id="annoying_circle" cx="5" cy="5" r="4"
69
	    onclick="window.__run = [...(window.__run || []), 'orange'];" />
70
  </svg>
71
</fruits>
test/haketilo_test/unit/test_policy_enforcing.py
73 73
@pytest.mark.parametrize('csp_off_setting', [{}, {'csp_off': True}])
74 74
def test_policy_enforcing_html(driver, execute_in_page, csp_off_setting):
75 75
    """
76
    A test case of sanitizing <script>s and intrinsic javascript in pages.
76
    A test case of sanitizing <script>s and intrinsic JavaScript in HTML pages.
77 77
    """
78
    def assert_properly_blocked():
78
    def click_all():
79 79
        for i in range(1, 3):
80 80
            driver.find_element_by_id(f'clickme{i}').click()
81 81

  
82
    def assert_properly_blocked():
83
        click_all()
84

  
82 85
        assert set(driver.execute_script('return window.__run || [];')) == set()
83 86
        assert bool(csp_off_setting) == are_scripts_allowed(driver)
84 87

  
......
98 101
        **csp_off_setting
99 102
    })
100 103

  
101
    for i in range(1, 3):
102
        driver.find_element_by_id(f'clickme{i}').click()
104
    click_all()
103 105

  
104 106
    assert set(driver.execute_script('return window.__run || [];')) == \
105 107
        {'inline', 'on', 'href', 'src', 'data'}
......
121 123

  
122 124
    assert_properly_blocked()
123 125
    assert are_scripts_allowed(driver, nonce)
126

  
127
# Test function analogous to that for HTML page.
128
@pytest.mark.ext_data({'content_script': content_script})
129
@pytest.mark.usefixtures('webextension')
130
@pytest.mark.parametrize('csp_off_setting', [{}, {'csp_off': True}])
131
def test_policy_enforcing_xml(driver, execute_in_page, csp_off_setting):
132
    """
133
    A test case of sanitizing <script>s and intrinsic JavaScript in XML
134
    documents.
135
    """
136
    def click_all():
137
        for name in ('idaret', 'nowamak', 'mango', 'annoying'):
138
            elem = driver.find_element_by_id(f'{name}_circle')
139
            try:
140
                elem.click()
141
            except:
142
                pass
143

  
144
    def assert_properly_blocked():
145
        click_all()
146

  
147
        try:
148
            assert set(driver.execute_script('return window.__run || [];')) == set()
149
        except:
150
            from time import sleep
151
            sleep(100000)
152
        assert bool(csp_off_setting) == are_scripts_allowed(driver)
153

  
154
    # First, see if scripts run when not blocked.
155
    get(driver, 'https://gotmyowndoma.in/scripts_to_block_2.xml', {
156
        'policy': allow_policy,
157
        **csp_off_setting
158
    })
159

  
160
    click_all()
161

  
162
    assert set(driver.execute_script('return window.__run || [];')) == \
163
        {'grape', 'raspberry', 'idaret', 'melon'}
164
    assert are_scripts_allowed(driver)
165

  
166
    # Now, verify scripts don't run when blocked.
167
    get(driver, 'https://gotmyowndoma.in/scripts_to_block_2.xml', {
168
        'policy': block_policy,
169
        **csp_off_setting
170
    })
171

  
172
    assert_properly_blocked()
173

  
174
    # Now, verify only scripts with nonce can run when payload is injected.
175
    get(driver, 'https://gotmyowndoma.in/scripts_to_block_2.xml', {
176
        'policy': payload_policy,
177
        **csp_off_setting
178
    })
179

  
180
    assert_properly_blocked()
181
    assert are_scripts_allowed(driver, nonce)
test/haketilo_test/unit/utils.py
228 228
        return driver.execute_script(
229 229
            '''
230 230
            document.haketilo_scripts_allowed = false;
231
            const script = document.createElement("script");
231
            const html_ns = "http://www.w3.org/1999/xhtml";
232
            const script = document.createElementNS(html_ns, "script");
232 233
            script.innerHTML = "document.haketilo_scripts_allowed = true;";
233 234
            if (arguments[0])
234 235
                script.setAttribute("nonce", arguments[0]);
235
            document.head.append(script);
236
            (document.head || document.documentElement).append(script);
236 237
            return document.haketilo_scripts_allowed;
237 238
            ''',
238 239
            nonce)
test/haketilo_test/world_wide_library.py
234 234

  
235 235
    'https://gotmyowndoma.in/scripts_to_block_1.html':
236 236
    (200, {}, here / 'data' / 'pages' / 'scripts_to_block_1.html'),
237
    'https://gotmyowndoma.in/scripts_to_block_2.xml':
238
    (200, {}, here / 'data' / 'pages' / 'scripts_to_block_2.xml'),
237 239

  
238 240
    'https://anotherdoma.in/resource/blocked/by/CORS.json':
239 241
    lambda command, get_params, post_params: (200, {}, some_data),

Also available in: Unified diff