Project

General

Profile

Download (5.47 KB) Statistics
| Branch: | Tag: | Revision:

haketilo / common / patterns.js @ 263d03d5

1
/**
2
 * This file is part of Haketilo.
3
 *
4
 * Function: Operations on page URL patterns.
5
 *
6
 * Copyright (C) 2021 Wojtek Kosior
7
 *
8
 * This program is free software: you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation, either version 3 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * This program is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * As additional permission under GNU GPL version 3 section 7, you
19
 * may distribute forms of that code without the copy of the GNU
20
 * GPL normally required by section 4, provided you include this
21
 * license notice and, in case of non-source distribution, a URL
22
 * through which recipients can access the Corresponding Source.
23
 * If you modify file(s) with this exception, you may extend this
24
 * exception to your version of the file(s), but you are not
25
 * obligated to do so. If you do not wish to do so, delete this
26
 * exception statement from your version.
27
 *
28
 * As a special exception to the GPL, any HTML file which merely
29
 * makes function calls to this code, and for that purpose
30
 * includes it by reference shall be deemed a separate work for
31
 * copyright law purposes. If you modify this code, you may extend
32
 * this exception to your version of the code, but you are not
33
 * obligated to do so. If you do not wish to do so, delete this
34
 * exception statement from your version.
35
 *
36
 * You should have received a copy of the GNU General Public License
37
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
38
 *
39
 * I, Wojtek Kosior, thereby promise not to sue for violation of this file's
40
 * license. Although I request that you do not make use this code in a
41
 * proprietary program, I am not going to enforce this in court.
42
 */
43

    
44
const MAX_URL_PATH_LEN = 12;
45
const MAX_URL_PATH_CHARS = 255;
46
const MAX_DOMAIN_LEN = 7;
47
const MAX_DOMAIN_CHARS = 100;
48

    
49
const proto_regex = /^(\w+):\/\/(.*)$/;
50

    
51
const user_re = "[^/?#@]+@"
52
const domain_re = "[^/?#]+";
53
const path_re = "[^?#]*";
54
const query_re = "\\??[^#]*";
55

    
56
const http_regex = new RegExp(`^(${domain_re})(${path_re})(${query_re}).*`);
57

    
58
const file_regex = new RegExp(`^(${path_re}).*`);
59

    
60
const ftp_regex = new RegExp(`^(${user_re})?(${domain_re})(${path_re}).*`);
61

    
62
function deconstruct_url(url)
63
{
64
    const proto_match = proto_regex.exec(url);
65
    if (proto_match === null)
66
	return undefined;
67

    
68
    const deco = {proto: proto_match[1]};
69

    
70
    if (deco.proto === "file") {
71
	deco.path = file_regex.exec(proto_match[2])[1];
72
    } else if (deco.proto === "ftp") {
73
	[deco.domain, deco.path] = ftp_regex.exec(proto_match[2]).slice(2, 4);
74
    } else {
75
	const http_match = http_regex.exec(proto_match[2]);
76
	if (!http_match)
77
	    return undefined;
78
	[deco.domain, deco.path, deco.query] = http_match.slice(1, 4);
79
    }
80

    
81
    const leading_dash = deco.path[0] === "/";
82
    deco.trailing_dash = deco.path[deco.path.length - 1] === "/";
83

    
84
    if (deco.domain) {
85
	if (deco.domain.length > MAX_DOMAIN_CHARS) {
86
	    const idx = deco.domain.indexOf(".", deco.domain.length -
87
					    MAX_DOMAIN_CHARS);
88
	    if (idx === -1)
89
		deco.domain = [];
90
	    else
91
		deco.domain = deco.domain.substring(idx + 1);
92

    
93
	    deco.domain_truncated = true;
94
	}
95

    
96
	if (deco.path.length > MAX_URL_PATH_CHARS) {
97
	    deco.path = deco.path.substring(0, deco.path.lastIndexOf("/"));
98
	    deco.path_truncated = true;
99
	}
100
    }
101

    
102
    if (typeof deco.domain === "string") {
103
	deco.domain = deco.domain.split(".");
104
	if (deco.domain.splice(0, deco.domain.length - MAX_DOMAIN_LEN).length
105
	    > 0)
106
	    deco.domain_truncated = true;
107
    }
108

    
109
    deco.path = deco.path.split("/").filter(s => s !== "");
110
    if (deco.domain && deco.path.splice(MAX_URL_PATH_LEN).length > 0)
111
	deco.path_truncated = true;
112
    if (leading_dash || deco.path.length === 0)
113
	deco.path.unshift("");
114

    
115
    return deco;
116
}
117

    
118
function* each_domain_pattern(deco)
119
{
120
    for (let slice = 0; slice < deco.domain.length - 1; slice++) {
121
	const domain_part = deco.domain.slice(slice).join(".");
122
	const domain_wildcards = [];
123
	if (slice === 0 && !deco.domain_truncated)
124
	    yield domain_part;
125
	if (slice === 1 && !deco.domain_truncated)
126
	    yield "*." + domain_part;
127
	if (slice > 1)
128
	    yield "**." + domain_part;
129
	yield "***." + domain_part;
130
    }
131
}
132

    
133
function* each_path_pattern(deco)
134
{
135
    for (let slice = deco.path.length; slice > 0; slice--) {
136
	const path_part = deco.path.slice(0, slice).join("/");
137
	const path_wildcards = [];
138
	if (slice === deco.path.length && !deco.path_truncated) {
139
	    if (deco.trailing_dash)
140
		yield path_part + "/";
141
	    yield path_part;
142
	}
143
	if (slice === deco.path.length - 1 && !deco.path_truncated &&
144
	    deco.path[slice] !== "*")
145
	    yield path_part + "/*";
146
	if (slice < deco.path.length - 1)
147
	    yield path_part + "/**";
148
	if (slice !== deco.path.length - 1 || deco.path_truncated ||
149
	    deco.path[slice] !== "***")
150
	    yield path_part + "/***";
151
    }
152
}
153

    
154
/* Generate every possible pattern that matches url. */
155
function* each_url_pattern(url)
156
{
157
    const deco = deconstruct_url(url);
158

    
159
    if (deco === undefined) {
160
	console.error("bad url format", url);
161
	return false;
162
    }
163

    
164
    const all_domains = deco.domain ? each_domain_pattern(deco) : [""];
165
    for (const domain of all_domains) {
166
	for (const path of each_path_pattern(deco))
167
	    yield `${deco.proto}://${domain}${path}`;
168
    }
169
}
170

    
171
/*
172
 * EXPORTS_START
173
 * EXPORT each_url_pattern
174
 * EXPORTS_END
175
 */
(8-8/16)