Revision 7f0b5ded
Added by koszko over 1 year ago
| background/webrequest.js | ||
|---|---|---|
| 3 | 3 |
* |
| 4 | 4 |
* Function: Modify HTTP traffic usng webRequest API. |
| 5 | 5 |
* |
| 6 |
* Copyright (C) 2021 Wojtek Kosior <koszko@koszko.org> |
|
| 6 |
* Copyright (C) 2021, 2022 Wojtek Kosior <koszko@koszko.org>
|
|
| 7 | 7 |
* |
| 8 | 8 |
* This program is free software: you can redistribute it and/or modify |
| 9 | 9 |
* it under the terms of the GNU General Public License as published by |
| ... | ... | |
| 41 | 41 |
* proprietary program, I am not going to enforce this in court. |
| 42 | 42 |
*/ |
| 43 | 43 |
|
| 44 |
#IMPORT common/indexeddb.js AS haketilodb |
|
| 44 |
#IMPORT common/indexeddb.js AS haketilodb |
|
| 45 |
|
|
| 45 | 46 |
#IF MOZILLA |
| 46 | 47 |
#IMPORT background/stream_filter.js |
| 47 | 48 |
#ENDIF |
| 48 | 49 |
|
| 49 | 50 |
#FROM common/browser.js IMPORT browser |
| 50 |
#FROM common/misc.js IMPORT is_privileged_url, csp_header_regex |
|
| 51 |
#FROM common/misc.js IMPORT is_privileged_url, csp_header_regex, \ |
|
| 52 |
sha256_async AS sha256 |
|
| 51 | 53 |
#FROM common/policy.js IMPORT decide_policy |
| 52 | 54 |
|
| 53 | 55 |
#FROM background/patterns_query_manager.js IMPORT tree, default_allow |
| 54 | 56 |
|
| 55 | 57 |
let secret; |
| 56 | 58 |
|
| 57 |
function on_headers_received(details) |
|
| 58 |
{
|
|
| 59 |
#IF MOZILLA |
|
| 60 |
/* |
|
| 61 |
* Under Mozilla-based browsers, responses are cached together with headers as |
|
| 62 |
* they appear *after* modifications by Haketilo. This means Haketilo's CSP |
|
| 63 |
* script-blocking headers might be present in responses loaded from cache. In |
|
| 64 |
* the meantime the user might have changes Haketilo settings to instead allow |
|
| 65 |
* the scripts on the page in question. This causes a problem and creates the |
|
| 66 |
* need to somehow restore the response headers to the state in which they |
|
| 67 |
* arrived from the server. |
|
| 68 |
* To cope with this, Haketilo will inject some additional headers with private |
|
| 69 |
* data. Those will include a hard-to-guess value derived from extension's |
|
| 70 |
* internal ID. It is assumed the internal ID has a longer lifetime than cached |
|
| 71 |
* responses. |
|
| 72 |
*/ |
|
| 73 |
|
|
| 74 |
const settings_page_url = browser.runtime.getURL("html/settings.html");
|
|
| 75 |
const header_prefix_prom = sha256(settings_page_url) |
|
| 76 |
.then(hash => `X-Haketilo-${hash}`);
|
|
| 77 |
|
|
| 78 |
/* |
|
| 79 |
* Mozilla, unlike Chrome, allows webRequest callbacks to return promises. Here |
|
| 80 |
* we leverage that to be able to use asynchronous sha256 computation. |
|
| 81 |
*/ |
|
| 82 |
async function on_headers_received(details) {
|
|
| 83 |
#IF NEVER |
|
| 84 |
} /* Help auto-indent in editors. */ |
|
| 85 |
#ENDIF |
|
| 86 |
#ELSE |
|
| 87 |
function on_headers_received(details) {
|
|
| 88 |
#ENDIF |
|
| 59 | 89 |
const url = details.url; |
| 60 | 90 |
if (is_privileged_url(details.url)) |
| 61 | 91 |
return; |
| 62 | 92 |
|
| 63 | 93 |
let headers = details.responseHeaders; |
| 64 | 94 |
|
| 95 |
#IF MOZILLA |
|
| 96 |
const prefix = await header_prefix_prom; |
|
| 97 |
|
|
| 98 |
/* |
|
| 99 |
* We assume that the original CSP headers of a response are always |
|
| 100 |
* preserved under names of the form: |
|
| 101 |
* X-Haketilo-<some_secret>-<original_name> |
|
| 102 |
* In some cases the original response may contain no CSP headers. To still |
|
| 103 |
* be able to tell whether the headers we were provided were modified by |
|
| 104 |
* Haketilo in the past, all modifications are accompanied by addition of an |
|
| 105 |
* extra header with name: |
|
| 106 |
* X-Haketilo-<some_secret> |
|
| 107 |
*/ |
|
| 108 |
|
|
| 109 |
const restore_old_headers = details.fromCache && |
|
| 110 |
!!headers.filter(h => h.name === prefix).length; |
|
| 111 |
|
|
| 112 |
if (restore_old_headers) {
|
|
| 113 |
const restored_headers = []; |
|
| 114 |
|
|
| 115 |
for (const h of headers) {
|
|
| 116 |
if (csp_header_regex.test(h.name) || h.name === prefix) |
|
| 117 |
continue; |
|
| 118 |
|
|
| 119 |
if (h.name.startsWith(prefix)) {
|
|
| 120 |
restored_headers.push({
|
|
| 121 |
name: h.name.substring(prefix.length + 1), |
|
| 122 |
value: h.value |
|
| 123 |
}); |
|
| 124 |
} else {
|
|
| 125 |
restored_headers.push(h); |
|
| 126 |
} |
|
| 127 |
} |
|
| 128 |
|
|
| 129 |
headers = restored_headers; |
|
| 130 |
} |
|
| 131 |
#ENDIF |
|
| 132 |
|
|
| 65 | 133 |
const policy = |
| 66 | 134 |
decide_policy(tree, details.url, !!default_allow.value, secret); |
| 67 |
if (policy.allow) |
|
| 68 |
return; |
|
| 69 | 135 |
|
| 70 |
if (policy.payload) |
|
| 71 |
headers = headers.filter(h => !csp_header_regex.test(h.name)); |
|
| 136 |
if (!policy.allow) {
|
|
| 137 |
#IF MOZILLA |
|
| 138 |
const to_append = [{name: prefix, value: ":)"}];
|
|
| 139 |
|
|
| 140 |
for (const h of headers.filter(h => csp_header_regex.test(h.name))) {
|
|
| 141 |
if (!policy.payload) |
|
| 142 |
to_append.push(Object.assign({}, h));
|
|
| 143 |
|
|
| 144 |
h.name = `${prefix}-${h.name}`;
|
|
| 145 |
} |
|
| 72 | 146 |
|
| 73 |
headers.push({name: "Content-Security-Policy", value: policy.csp});
|
|
| 147 |
headers.push(...to_append); |
|
| 148 |
#ELSE |
|
| 149 |
if (policy.payload) |
|
| 150 |
headers = headers.filter(h => !csp_header_regex.test(h.name)); |
|
| 151 |
#ENDIF |
|
| 152 |
|
|
| 153 |
headers.push({name: "Content-Security-Policy", value: policy.csp});
|
|
| 154 |
} |
|
| 74 | 155 |
|
| 75 | 156 |
#IF MOZILLA |
| 76 |
let skip = false; |
|
| 77 |
for (const header of headers) {
|
|
| 78 |
if (header.name.toLowerCase().trim() !== "content-disposition") |
|
| 79 |
continue; |
|
| 80 |
|
|
| 81 |
if (/^\s*attachment\s*(;.*)$/i.test(header.value)) {
|
|
| 82 |
skip = true; |
|
| 83 |
} else {
|
|
| 84 |
skip = false; |
|
| 85 |
break; |
|
| 157 |
/* |
|
| 158 |
* When page is meant to be viewed in the browser, use streamFilter to |
|
| 159 |
* inject a dummy <script> at the very beginning of it. This <script> |
|
| 160 |
* will cause extension's content scripts to run before page's first <meta> |
|
| 161 |
* tag is rendered so that they can prevent CSP rules inside <meta> tags |
|
| 162 |
* from blocking the payload we want to inject. |
|
| 163 |
*/ |
|
| 164 |
|
|
| 165 |
let use_stream_filter = !!policy.payload; |
|
| 166 |
if (use_stream_filter) {
|
|
| 167 |
for (const header of headers) {
|
|
| 168 |
if (header.name.toLowerCase().trim() !== "content-disposition") |
|
| 169 |
continue; |
|
| 170 |
|
|
| 171 |
if (/^\s*attachment\s*(;.*)$/i.test(header.value)) {
|
|
| 172 |
use_stream_filter = false; |
|
| 173 |
} else {
|
|
| 174 |
use_stream_filter = true; |
|
| 175 |
break; |
|
| 176 |
} |
|
| 86 | 177 |
} |
| 87 | 178 |
} |
| 88 |
skip = skip || (details.statusCode >= 300 && details.statusCode < 400); |
|
| 179 |
use_stream_filter = use_stream_filter && |
|
| 180 |
(details.statusCode < 300 || details.statusCode >= 400); |
|
| 89 | 181 |
|
| 90 |
if (!skip)
|
|
| 182 |
if (use_stream_filter)
|
|
| 91 | 183 |
headers = stream_filter.apply(details, headers, policy); |
| 92 | 184 |
#ENDIF |
| 93 | 185 |
|
| test/haketilo_test/unit/test_webrequest.py | ||
|---|---|---|
| 24 | 24 |
from ..script_loader import load_script |
| 25 | 25 |
from .utils import are_scripts_allowed |
| 26 | 26 |
|
| 27 |
allowed_url = 'https://site.with.scripts.allow.ed/' |
|
| 28 |
blocked_url = 'https://site.with.scripts.block.ed/' |
|
| 29 |
payload_url = 'https://site.with.paylo.ad/' |
|
| 30 |
|
|
| 27 | 31 |
def webrequest_js(): |
| 28 | 32 |
return (load_script('background/webrequest.js',
|
| 29 | 33 |
'#IMPORT common/patterns_query_tree.js AS pqt') + |
| ... | ... | |
| 34 | 38 |
default_allow = {name: "default_allow", value: true};
|
| 35 | 39 |
|
| 36 | 40 |
// Rule to block scripts. |
| 37 |
pqt.register(tree, "https://site.with.scripts.block.ed/***",
|
|
| 41 |
pqt.register(tree, "%(blocked)s***",
|
|
| 38 | 42 |
"~allow", 0); |
| 39 | 43 |
|
| 40 | 44 |
// Rule to allow scripts, but overridden by payload assignment. |
| 41 |
pqt.register(tree, "https://site.with.paylo.ad/***", "~allow", 1);
|
|
| 42 |
pqt.register(tree, "https://site.with.paylo.ad/***",
|
|
| 43 |
"somemapping", {identifier: "someresource"});
|
|
| 45 |
pqt.register(tree, "%(payload)s***", "~allow", 1);
|
|
| 46 |
pqt.register(tree, "%(payload)s***", "somemapping",
|
|
| 47 |
{identifier: "someresource"});
|
|
| 44 | 48 |
|
| 45 | 49 |
// Mock stream_filter. |
| 46 | 50 |
stream_filter.apply = (details, headers, policy) => headers; |
| 51 |
''' % {'blocked': blocked_url, 'payload': payload_url})
|
|
| 52 |
|
|
| 53 |
def webrequest_js_start_called(): |
|
| 54 |
return webrequest_js() + ';\nstart("somesecret");'
|
|
| 55 |
|
|
| 56 |
ext_url = 'moz-extension://49de6ce9-49fc-49e1-8102-7ef35286389c/html/settings.html' |
|
| 57 |
prefix = 'X-Haketilo-' + sha256(ext_url.encode()).digest().hex() |
|
| 58 |
|
|
| 59 |
# Prepare a list of headers as could be sent by a website. |
|
| 60 |
sample_csp_header = {
|
|
| 61 |
'name': 'Content-Security-Policy', |
|
| 62 |
'value': "script-src 'self';" |
|
| 63 |
} |
|
| 64 |
sample_csp_header_idx = 7 |
|
| 65 |
|
|
| 66 |
sample_headers = [ |
|
| 67 |
{'name': 'Content-Type', 'value': 'text/html;charset=utf-8'},
|
|
| 68 |
{'name': 'Content-Length', 'value': '61954'},
|
|
| 69 |
{'name': 'Content-Language', 'value': 'en'},
|
|
| 70 |
{'name': 'Expires', 'value': 'Mon, 12 Mar 2012 11:04...'},
|
|
| 71 |
{'name': 'Last-Modified', 'value': 'Fri, 26 Jul 2013 22:50...'},
|
|
| 72 |
{'name': 'Cache-Control', 'value': 'max-age=0, s-maxage=86...'},
|
|
| 73 |
{'name': 'Age', 'value': '224'},
|
|
| 74 |
{'name': 'Server', 'value': 'nginx/1.1.19'},
|
|
| 75 |
{'name': 'Date', 'value': 'Thu, 10 Mar 2022 12:09...'}
|
|
| 76 |
] |
|
| 77 |
|
|
| 78 |
sample_headers.insert(sample_csp_header_idx, sample_csp_header) |
|
| 79 |
|
|
| 80 |
# Prepare a list of headers as would be crafted by Haketilo when there is a |
|
| 81 |
# payload to inject. |
|
| 82 |
nonce_source = f'somemapping:someresource:{payload_url}:somesecret'.encode()
|
|
| 83 |
nonce = f'nonce-{sha256(nonce_source).digest().hex()}'
|
|
| 84 |
|
|
| 85 |
payload_csp_header = {
|
|
| 86 |
'name': f'Content-Security-Policy', |
|
| 87 |
'value': ("prefetch-src 'none'; script-src-attr 'none'; "
|
|
| 88 |
f"script-src '{nonce}'; script-src-elem '{nonce}';")
|
|
| 89 |
} |
|
| 90 |
|
|
| 91 |
sample_payload_headers = [ |
|
| 92 |
*sample_headers, |
|
| 93 |
{'name': prefix, 'value': ':)'},
|
|
| 94 |
payload_csp_header |
|
| 95 |
] |
|
| 96 |
|
|
| 97 |
sample_payload_headers[sample_csp_header_idx] = {
|
|
| 98 |
**sample_csp_header, |
|
| 99 |
'name': f'{prefix}-{sample_csp_header["name"]}',
|
|
| 100 |
} |
|
| 101 |
|
|
| 102 |
# Prepare a list of headers as would be crafted by Haketilo when scripts are |
|
| 103 |
# blocked. |
|
| 104 |
sample_blocked_headers = [*sample_payload_headers] |
|
| 105 |
sample_blocked_headers.pop() |
|
| 106 |
sample_blocked_headers.append(sample_csp_header) |
|
| 107 |
sample_blocked_headers.append({
|
|
| 108 |
'name': f'Content-Security-Policy', |
|
| 109 |
'value': ("prefetch-src 'none'; script-src-attr 'none'; "
|
|
| 110 |
f"script-src 'none'; script-src-elem 'none';") |
|
| 111 |
}) |
|
| 112 |
|
|
| 113 |
@pytest.mark.get_page('https://gotmyowndoma.in')
|
|
| 114 |
@pytest.mark.parametrize('params', [
|
|
| 115 |
(sample_headers, allowed_url), |
|
| 116 |
(sample_blocked_headers, blocked_url), |
|
| 117 |
(sample_payload_headers, payload_url), |
|
| 118 |
]) |
|
| 119 |
def test_webrequest_on_headers_received(driver, execute_in_page, params): |
|
| 120 |
"""Unit-test the on_headers_received() function.""" |
|
| 121 |
headers_out, url = params |
|
| 122 |
|
|
| 123 |
execute_in_page( |
|
| 124 |
'''{
|
|
| 125 |
// Mock browser object. |
|
| 126 |
const url = arguments[0]; |
|
| 127 |
this.browser = {runtime: {getURL: () => url}};
|
|
| 128 |
}''', |
|
| 129 |
ext_url) |
|
| 130 |
|
|
| 131 |
execute_in_page(webrequest_js()) |
|
| 132 |
|
|
| 133 |
execute_in_page('secret = "somesecret";')
|
|
| 134 |
|
|
| 135 |
for headers_in in [ |
|
| 136 |
sample_headers, |
|
| 137 |
sample_blocked_headers, |
|
| 138 |
sample_payload_headers |
|
| 139 |
]: |
|
| 140 |
details = {'url': url, 'responseHeaders': headers_in, 'fromCache': True}
|
|
| 141 |
res = execute_in_page('returnval(on_headers_received(arguments[0]));',
|
|
| 142 |
details) |
|
| 47 | 143 |
|
| 48 |
// Mock secret and start webrequest operations. |
|
| 49 |
start("somesecret");
|
|
| 50 |
''') |
|
| 144 |
assert res == {'responseHeaders': headers_out}
|
|
| 51 | 145 |
|
| 52 |
@pytest.mark.ext_data({'background_script': webrequest_js})
|
|
| 146 |
@pytest.mark.ext_data({'background_script': webrequest_js_start_called})
|
|
| 53 | 147 |
@pytest.mark.usefixtures('webextension')
|
| 54 |
def test_on_headers_received(driver, execute_in_page): |
|
| 148 |
def test_webrequest_real_pages(driver, execute_in_page): |
|
| 149 |
""" |
|
| 150 |
Test webRequest-based header modifications by loading actual pages and |
|
| 151 |
attempting to run scripts within them. |
|
| 152 |
""" |
|
| 55 | 153 |
for attempt in range(10): |
| 56 | 154 |
driver.get('https://site.with.scripts.block.ed/')
|
| 57 | 155 |
|
| ... | ... | |
| 59 | 157 |
break |
| 60 | 158 |
assert attempt != 9 |
| 61 | 159 |
|
| 62 |
driver.get('https://site.with.scripts.allow.ed/')
|
|
| 160 |
driver.get(allowed_url)
|
|
| 63 | 161 |
assert are_scripts_allowed(driver) |
| 64 | 162 |
|
| 65 |
driver.get('https://site.with.paylo.ad/')
|
|
| 163 |
driver.get(payload_url)
|
|
| 66 | 164 |
assert not are_scripts_allowed(driver) |
| 67 | 165 |
source = 'somemapping:someresource:https://site.with.paylo.ad/index.html:somesecret' |
| 68 | 166 |
assert are_scripts_allowed(driver, sha256(source.encode()).digest().hex()) |
Also available in: Unified diff
don't double-modify response headers retrieved from cache