Project

General

Profile

Download (6.68 KB) Statistics
| Branch: | Tag: | Revision:

haketilo / common / patterns.js @ f8dedf60

1
/**
2
 * This file is part of Haketilo.
3
 *
4
 * Function: Operations on page URL patterns.
5
 *
6
 * Copyright (C) 2021 Wojtek Kosior
7
 *
8
 * This program is free software: you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation, either version 3 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * This program is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * As additional permission under GNU GPL version 3 section 7, you
19
 * may distribute forms of that code without the copy of the GNU
20
 * GPL normally required by section 4, provided you include this
21
 * license notice and, in case of non-source distribution, a URL
22
 * through which recipients can access the Corresponding Source.
23
 * If you modify file(s) with this exception, you may extend this
24
 * exception to your version of the file(s), but you are not
25
 * obligated to do so. If you do not wish to do so, delete this
26
 * exception statement from your version.
27
 *
28
 * As a special exception to the GPL, any HTML file which merely
29
 * makes function calls to this code, and for that purpose
30
 * includes it by reference shall be deemed a separate work for
31
 * copyright law purposes. If you modify this code, you may extend
32
 * this exception to your version of the code, but you are not
33
 * obligated to do so. If you do not wish to do so, delete this
34
 * exception statement from your version.
35
 *
36
 * You should have received a copy of the GNU General Public License
37
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
38
 *
39
 * I, Wojtek Kosior, thereby promise not to sue for violation of this file's
40
 * license. Although I request that you do not make use of this code in a
41
 * proprietary program, I am not going to enforce this in court.
42
 */
43

    
44
const MAX = {
45
    URL_PATH_LEN:   12,
46
    URL_PATH_CHARS: 255,
47
    DOMAIN_LEN:     7,
48
    DOMAIN_CHARS:   100
49
};
50

    
51
const proto_regex = /^(\w+):\/\/(.*)$/;
52

    
53
const user_re = "[^/?#@]+@"
54
const domain_re = "[.*a-zA-Z0-9-]+";
55
const path_re = "[^?#]*";
56
const query_re = "\\??[^#]*";
57

    
58
const http_regex = new RegExp(`^(${domain_re})(${path_re})(${query_re}).*`);
59

    
60
const file_regex = new RegExp(`^(/${path_re}).*`);
61

    
62
const ftp_regex = new RegExp(`^(${user_re})?(${domain_re})(${path_re}).*`);
63

    
64
function match_or_throw(regex, string, error_msg)
65
{
66
    const match = regex.exec(string);
67
    if (match === null)
68
	throw error_msg;
69

    
70
    return match;
71
}
72

    
73
function deconstruct_url(url, use_limits=true)
74
{
75
    const max = Object.assign({}, MAX);
76
    if (!use_limits) {
77
	for (const key in MAX)
78
	    max[key] = Infinity;
79
    }
80

    
81
    const matcher = (re, str) => match_or_throw(re, str, `bad url '${url}'`)
82

    
83
    const proto_match = matcher(proto_regex, url);
84
    const deco = {proto: proto_match[1]};
85

    
86
    if (deco.proto === "file") {
87
	deco.path = matcher(file_regex, proto_match[2])[1];
88
    } else if (deco.proto === "ftp") {
89
	[deco.domain, deco.path] =
90
	    matcher(ftp_regex, proto_match[2]).slice(2, 4);
91
    } else if (deco.proto === "http" || deco.proto === "https") {
92
	[deco.domain, deco.path, deco.query] =
93
	    matcher(http_regex, proto_match[2]).slice(1, 4);
94
	deco.domain = deco.domain.toLowerCase();
95
    } else {
96
	throw `unsupported protocol in url '${url}'`;
97
    }
98

    
99
    deco.trailing_slash = deco.path[deco.path.length - 1] === "/";
100

    
101
    if (deco.domain) {
102
	if (deco.domain.length > max.DOMAIN_CHARS) {
103
	    const idx = deco.domain.indexOf(".", deco.domain.length -
104
					    max.DOMAIN_CHARS);
105
	    if (idx === -1)
106
		deco.domain = [];
107
	    else
108
		deco.domain = deco.domain.substring(idx + 1);
109

    
110
	    deco.domain_truncated = true;
111
	}
112

    
113
	if (deco.path.length > max.URL_PATH_CHARS) {
114
	    deco.path = deco.path.substring(0, deco.path.lastIndexOf("/"));
115
	    deco.path_truncated = true;
116
	}
117
    }
118

    
119
    if (typeof deco.domain === "string") {
120
	deco.domain = deco.domain.split(".");
121
	if (deco.domain.splice(0, deco.domain.length - max.DOMAIN_LEN).length
122
	    > 0)
123
	    deco.domain_truncated = true;
124
    }
125

    
126
    deco.path = deco.path.split("/").filter(s => s !== "");
127
    if (deco.domain && deco.path.splice(max.URL_PATH_LEN).length > 0)
128
	deco.path_truncated = true;
129

    
130
    return deco;
131
}
132
#EXPORT deconstruct_url
133

    
134
function* each_domain_pattern(deco)
135
{
136
    for (let slice = 0; slice < deco.domain.length - 1; slice++) {
137
	const domain_part = deco.domain.slice(slice).join(".");
138
	const domain_wildcards = [];
139
	if (slice === 0 && !deco.domain_truncated)
140
	    yield domain_part;
141
	if (slice === 1 && !deco.domain_truncated)
142
	    yield "*." + domain_part;
143
	if (slice > 1)
144
	    yield "**." + domain_part;
145
	yield "***." + domain_part;
146
    }
147
}
148

    
149
function* each_path_pattern(deco)
150
{
151
    for (let slice = deco.path.length; slice >= 0; slice--) {
152
	const path_part = ["", ...deco.path.slice(0, slice)].join("/");
153
	const path_wildcards = [];
154
	if (slice === deco.path.length && !deco.path_truncated) {
155
	    if (deco.trailing_slash)
156
		yield path_part + "/";
157
	    if (slice > 0 || deco.proto !== "file")
158
		yield path_part;
159
	}
160
	if (slice === deco.path.length - 1 && !deco.path_truncated &&
161
	    deco.path[slice] !== "*")
162
	    yield path_part + "/*";
163
	if (slice < deco.path.length - 1)
164
	    yield path_part + "/**";
165
	if (slice !== deco.path.length - 1 || deco.path_truncated ||
166
	    deco.path[slice] !== "***")
167
	    yield path_part + "/***";
168
    }
169
}
170

    
171
/* Generate every possible pattern that matches url. */
172
function* each_url_pattern(url)
173
{
174
    const deco = deconstruct_url(url);
175

    
176
    if (deco === undefined) {
177
	console.error("Haketilo: bad url format", url);
178
	return false;
179
    }
180

    
181
    const all_domains = deco.domain ? each_domain_pattern(deco) : [""];
182
    for (const domain of all_domains) {
183
	for (const path of each_path_pattern(deco))
184
	    yield `${deco.proto}://${domain}${path}`;
185
    }
186
}
187
#EXPORT each_url_pattern
188

    
189
const patterns_doc_url =
190
      "https://hydrillabugs.koszko.org/projects/haketilo/wiki/URL_patterns";
191
#EXPORT patterns_doc_url
192

    
193
function reconstruct_url(deco)
194
{
195
    const domain = deco.domain.join(".");
196
    const path = ["", ...deco.path].join("/");
197
    const trail = deco.trailing_slash ? "/" : "";
198
    return `${deco.proto}://${domain}${path}${trail}`;
199
}
200
#EXPORT reconstruct_url
201

    
202
function validate_normalize_url_pattern(url_pattern)
203
{
204
    try {
205
	return reconstruct_url(deconstruct_url(url_pattern));
206
    } catch(e) {
207
	const patterns_doc_link = document.createElement("a");
208
	patterns_doc_link.href = patterns_doc_url;
209
	patterns_doc_link.innerText = "here";
210
	const msg = document.createElement("span");
211
	msg.prepend(`'${url_pattern}' is not a valid URL pattern. See `,
212
		    patterns_doc_link, " for more details.");
213
	throw msg;
214
    }
215
}
216
#EXPORT validate_normalize_url_pattern
(8-8/11)