Project

General

Profile

Download (5.1 KB) Statistics
| Branch: | Tag: | Revision:

haketilo / common / patterns.js @ 48f76d70

1
/**
2
 * Hachette operations on page url patterns
3
 *
4
 * Copyright (C) 2021 Wojtek Kosior
5
 * Redistribution terms are gathered in the `copyright' file.
6
 */
7

    
8
const proto_regex = /^(\w+):\/\/(.*)$/;
9

    
10
const user_re = "[^/?#@]+@"
11
const domain_re = "[^/?#]+";
12
const path_re = "[^?#]*";
13
const query_re = "\\??[^#]*";
14

    
15
const http_regex = new RegExp(`^(${domain_re})(${path_re})(${query_re}).*`);
16

    
17
const file_regex = new RegExp(`^(${path_re}).*`);
18

    
19
const ftp_regex = new RegExp(`^(${user_re})?(${domain_re})(${path_re}).*`);
20

    
21
function deconstruct_url(url)
22
{
23
    const proto_match = proto_regex.exec(url);
24
    if (proto_match === null)
25
	return undefined;
26

    
27
    const deco = {proto: proto_match[1]};
28

    
29
    if (deco.proto === "file") {
30
	deco.path = file_regex.exec(proto_match[2])[1];
31
    } else if (deco.proto === "ftp") {
32
	[deco.domain, deco.path] = ftp_regex.exec(proto_match[2]).slice(2, 4);
33
    } else {
34
	const http_match = http_regex.exec(proto_match[2]);
35
	if (!http_match)
36
	    return undefined;
37
	[deco.domain, deco.path, deco.query] = http_match.slice(1, 4);
38
    }
39

    
40
    if (deco.domain)
41
	deco.domain = deco.domain.split(".");
42

    
43
    const leading_dash = deco.path[0] === "/";
44
    deco.trailing_dash = deco.path[deco.path.length - 1] === "/";
45
    deco.path = deco.path.split("/").filter(s => s !== "");
46
    if (leading_dash || deco.path.length === 0)
47
	deco.path.unshift("");
48

    
49
    return deco;
50
}
51

    
52
/* Be sane: both arguments should be arrays of length >= 2 */
53
function domain_matches(url_domain, pattern_domain)
54
{
55
    const length_difference = url_domain.length - pattern_domain.length;
56

    
57
    for (let i = 1; i <= url_domain.length; i++) {
58
	const url_part = url_domain[url_domain.length - i];
59
	const pattern_part = pattern_domain[pattern_domain.length - i];
60

    
61
	if (pattern_domain.length === i) {
62
	    if (pattern_part === "*")
63
		return length_difference === 0;
64
	    if (pattern_part === "**")
65
		return length_difference > 0;
66
	    if (pattern_part === "***")
67
		return true;
68
	    return length_difference === 0 && pattern_part === url_part;
69
	}
70

    
71
	if (pattern_part !== url_part)
72
	    return false;
73
    }
74

    
75
    return pattern_domain.length === url_domain.length + 1 &&
76
	pattern_domain[0] === "***";
77
}
78

    
79
function path_matches(url_path, url_trailing_dash,
80
		      pattern_path, pattern_trailing_dash)
81
{
82
    const dashes_ok = !(pattern_trailing_dash && !url_trailing_dash);
83

    
84
    if (pattern_path.length === 0)
85
	return url_path.length === 0 && dashes_ok;
86

    
87
    const length_difference = url_path.length - pattern_path.length;
88

    
89
    for (let i = 0; i < url_path.length; i++) {
90
	if (pattern_path.length === i + 1) {
91
	    if (pattern_path[i] === "*")
92
		return length_difference === 0;
93
	    if (pattern_path[i] === "**") {
94
		return length_difference > 0 ||
95
		    (url_path[i] === "**" && dashes_ok);
96
	    }
97
	    if (pattern_path[i] === "***")
98
		return length_difference >= 0;
99
	    return length_difference === 0 &&
100
		pattern_path[i] === url_path[i] && dashes_ok;
101
	}
102

    
103
	if (pattern_path[i] !== url_path[i])
104
	    return false;
105
    }
106

    
107
    return false;
108
}
109

    
110
function url_matches(url, pattern)
111
{
112
    const url_deco = deconstruct_url(url);
113
    const pattern_deco = deconstruct_url(pattern);
114

    
115
    if (url_deco === undefined || pattern_deco === undefined) {
116
	console.log(`bad comparison: ${url} and ${pattern}`);
117
	return false
118
    }
119

    
120
    return pattern_deco.proto === url_deco.proto &&
121
	!(pattern_deco.proto === "file" && pattern_deco.trailing_dash) &&
122
	!!url_deco.domain === !!pattern_deco.domain &&
123
	(!url_deco.domain ||
124
	 domain_matches(url_deco.domain, pattern_deco.domain)) &&
125
	path_matches(url_deco.path, url_deco.trailing_dash,
126
		     pattern_deco.path, pattern_deco.trailing_dash);
127
}
128

    
129
function* each_domain_pattern(domain_segments)
130
{
131
    for (let slice = 0; slice < domain_segments.length; slice++) {
132
	const domain_part = domain_segments.slice(slice).join(".");
133
	const domain_wildcards = [];
134
	if (slice === 0)
135
	    yield domain_part;
136
	if (slice === 1)
137
	    yield "*." + domain_part;
138
	if (slice > 1)
139
	    yield "**." + domain_part;
140
	yield "***." + domain_part;
141
    }
142
}
143

    
144
function* each_path_pattern(path_segments, trailing_dash)
145
{
146
    for (let slice = path_segments.length; slice > 0; slice--) {
147
	const path_part = path_segments.slice(0, slice).join("/");
148
	const path_wildcards = [];
149
	if (slice === path_segments.length) {
150
	    if (trailing_dash)
151
		yield path_part + "/";
152
	    yield path_part;
153
	}
154
	if (slice === path_segments.length - 1 && path_segments[slice] !== "*")
155
	    yield path_part + "/*";
156
	if (slice < path_segments.length - 1)
157
	    yield path_part + "/**";
158
	if (slice < path_segments.length - 1 ||
159
	    path_segments[path_segments.length - 1] !== "***")
160
	    yield path_part + "/***";
161
    }
162
}
163

    
164
/* Generate every possible pattern that matches url. */
165
function* each_url_pattern(url)
166
{
167
    const deco = deconstruct_url(url);
168

    
169
    if (deco === undefined) {
170
	console.log("bad url format", url);
171
	return false;
172
    }
173

    
174
    const all_domains = deco.domain ? each_domain_pattern(deco.domain) : [""];
175
    for (const domain of all_domains) {
176
	for (const path of each_path_pattern(deco.path, deco.trailing_dash))
177
	    yield `${deco.proto}://${domain}${path}`;
178
    }
179
}
180

    
181
/*
182
 * EXPORTS_START
183
 * EXPORT url_matches
184
 * EXPORT each_url_pattern
185
 * EXPORTS_END
186
 */
(8-8/14)