Project

General

Profile

Download (4.87 KB) Statistics
| Branch: | Tag: | Revision:

haketilo / common / patterns.js @ 53837634

1
/**
2
 * Hachette operations on page url patterns
3
 *
4
 * Copyright (C) 2021 Wojtek Kosior
5
 * Redistribution terms are gathered in the `copyright' file.
6
 */
7

    
8
const proto_regex = /^(\w+):\/\/(.*)$/;
9

    
10
const domain_re = "[^/?#]+";
11
const path_re = "[^?#]*";
12
const query_re = "\\??[^#]*";
13

    
14
const http_regex = new RegExp(`^(${domain_re})(${path_re})(${query_re}).*`);
15

    
16
const file_regex = new RegExp(`^(${path_re}).*`);
17

    
18
function deconstruct_url(url)
19
{
20
    const proto_match = proto_regex.exec(url);
21
    if (proto_match === null)
22
	return undefined;
23

    
24
    const deco = {proto: proto_match[1]};
25

    
26
    if (deco.proto === "file") {
27
	deco.path = file_regex.exec(proto_match[2])[1];
28
    } else {
29
	const http_match = http_regex.exec(proto_match[2]);
30
	if (!http_match)
31
	    return undefined;
32
	[deco.domain, deco.path, deco.query] = http_match.slice(1, 4);
33
	deco.domain = deco.domain.split(".");
34
    }
35

    
36
    const leading_dash = deco.path[0] === "/";
37
    deco.trailing_dash = deco.path[deco.path.length - 1] === "/";
38
    deco.path = deco.path.split("/").filter(s => s !== "");
39
    if (leading_dash || deco.path.length === 0)
40
	deco.path.unshift("");
41

    
42
    return deco;
43
}
44

    
45
/* Be sane: both arguments should be arrays of length >= 2 */
46
function domain_matches(url_domain, pattern_domain)
47
{
48
    const length_difference = url_domain.length - pattern_domain.length;
49

    
50
    for (let i = 1; i <= url_domain.length; i++) {
51
	const url_part = url_domain[url_domain.length - i];
52
	const pattern_part = pattern_domain[pattern_domain.length - i];
53

    
54
	if (pattern_domain.length === i) {
55
	    if (pattern_part === "*")
56
		return length_difference === 0;
57
	    if (pattern_part === "**")
58
		return length_difference > 0;
59
	    if (pattern_part === "***")
60
		return true;
61
	    return length_difference === 0 && pattern_part === url_part;
62
	}
63

    
64
	if (pattern_part !== url_part)
65
	    return false;
66
    }
67

    
68
    return pattern_domain.length === url_domain.length + 1 &&
69
	pattern_domain[0] === "***";
70
}
71

    
72
function path_matches(url_path, url_trailing_dash,
73
		      pattern_path, pattern_trailing_dash)
74
{
75
    const dashes_ok = !(pattern_trailing_dash && !url_trailing_dash);
76

    
77
    if (pattern_path.length === 0)
78
	return url_path.length === 0 && dashes_ok;
79

    
80
    const length_difference = url_path.length - pattern_path.length;
81

    
82
    for (let i = 0; i < url_path.length; i++) {
83
	if (pattern_path.length === i + 1) {
84
	    if (pattern_path[i] === "*")
85
		return length_difference === 0;
86
	    if (pattern_path[i] === "**") {
87
		return length_difference > 0 ||
88
		    (url_path[i] === "**" && dashes_ok);
89
	    }
90
	    if (pattern_path[i] === "***")
91
		return length_difference >= 0;
92
	    return length_difference === 0 &&
93
		pattern_path[i] === url_path[i] && dashes_ok;
94
	}
95

    
96
	if (pattern_path[i] !== url_path[i])
97
	    return false;
98
    }
99

    
100
    return false;
101
}
102

    
103
function url_matches(url, pattern)
104
{
105
    const url_deco = deconstruct_url(url);
106
    const pattern_deco = deconstruct_url(pattern);
107

    
108
    if (url_deco === undefined || pattern_deco === undefined) {
109
	console.log(`bad comparison: ${url} and ${pattern}`);
110
	return false
111
    }
112

    
113
    return pattern_deco.proto === url_deco.proto &&
114
	!(pattern_deco.proto === "file" && pattern_deco.trailing_dash) &&
115
	!!url_deco.domain === !!pattern_deco.domain &&
116
	(!url_deco.domain ||
117
	 domain_matches(url_deco.domain, pattern_deco.domain)) &&
118
	path_matches(url_deco.path, url_deco.trailing_dash,
119
		     pattern_deco.path, pattern_deco.trailing_dash);
120
}
121

    
122
function* each_domain_pattern(domain_segments)
123
{
124
    for (let slice = 0; slice < domain_segments.length; slice++) {
125
	const domain_part = domain_segments.slice(slice).join(".");
126
	const domain_wildcards = [];
127
	if (slice === 0)
128
	    yield domain_part;
129
	if (slice === 1)
130
	    yield "*." + domain_part;
131
	if (slice > 1)
132
	    yield "**." + domain_part;
133
	yield "***." + domain_part;
134
    }
135
}
136

    
137
function* each_path_pattern(path_segments, trailing_dash)
138
{
139
    for (let slice = path_segments.length; slice > 0; slice--) {
140
	const path_part = path_segments.slice(0, slice).join("/");
141
	const path_wildcards = [];
142
	if (slice === path_segments.length) {
143
	    if (trailing_dash)
144
		yield path_part + "/";
145
	    yield path_part;
146
	}
147
	if (slice === path_segments.length - 1 && path_segments[slice] !== "*")
148
	    yield path_part + "/*";
149
	if (slice < path_segments.length - 1)
150
	    yield path_part + "/**";
151
	if (slice < path_segments.length - 1 ||
152
	    path_segments[path_segments.length - 1] !== "***")
153
	    yield path_part + "/***";
154
    }
155
}
156

    
157
/* Generate every possible pattern that matches url. */
158
function* each_url_pattern(url)
159
{
160
    const deco = deconstruct_url(url);
161

    
162
    if (deco === undefined) {
163
	console.log("bad url format", url);
164
	return false;
165
    }
166

    
167
    const all_domains = deco.domain ? each_domain_pattern(deco.domain) : [""];
168
    for (const domain of all_domains) {
169
	for (const path of each_path_pattern(deco.path, deco.trailing_dash))
170
	    yield `${deco.proto}://${domain}${path}`;
171
    }
172
}
173

    
174
/*
175
 * EXPORTS_START
176
 * EXPORT url_matches
177
 * EXPORT each_url_pattern
178
 * EXPORTS_END
179
 */
(8-8/14)