Project

General

Profile

Download (4.79 KB) Statistics
| Branch: | Tag: | Revision:

haketilo / common / patterns.js @ 6b12a034

1
/**
2
 * Hachette operations on page url patterns
3
 *
4
 * Copyright (C) 2021 Wojtek Kosior
5
 * Redistribution terms are gathered in the `copyright' file.
6
 */
7

    
8
const proto_re = "[a-zA-Z]*:\/\/";
9
const domain_re = "[^/?#]+";
10
const segments_re = "/[^?#]*";
11
const query_re = "\\?[^#]*";
12

    
13
const url_regex = new RegExp(`\
14
^\
15
(${proto_re})\
16
(${domain_re})\
17
(${segments_re})?\
18
(${query_re})?\
19
#?.*\$\
20
`);
21

    
22
function deconstruct_url(url)
23
{
24
    const regex_match = url_regex.exec(url);
25
    if (regex_match === null)
26
	return undefined;
27

    
28
    let [_, proto, domain, path, query] = regex_match;
29

    
30
    domain = domain.split(".");
31
    let path_trailing_dash =
32
	path && path[path.length - 1] === "/";
33
    path = (path || "").split("/").filter(s => s !== "");
34
    path.unshift("");
35

    
36
    return {proto, domain, path, query, path_trailing_dash};
37
}
38

    
39
/* Be sane: both arguments should be arrays of length >= 2 */
40
function domain_matches(url_domain, pattern_domain)
41
{
42
    const length_difference = url_domain.length - pattern_domain.length;
43

    
44
    for (let i = 1; i <= url_domain.length; i++) {
45
	const url_part = url_domain[url_domain.length - i];
46
	const pattern_part = pattern_domain[pattern_domain.length - i];
47

    
48
	if (pattern_domain.length === i) {
49
	    if (pattern_part === "*")
50
		return length_difference === 0;
51
	    if (pattern_part === "**")
52
		return length_difference > 0;
53
	    if (pattern_part === "***")
54
		return true;
55
	    return length_difference === 0 && pattern_part === url_part;
56
	}
57

    
58
	if (pattern_part !== url_part)
59
	    return false;
60
    }
61

    
62
    return pattern_domain.length === url_domain.length + 1 &&
63
	pattern_domain[0] === "***";
64
}
65

    
66
function path_matches(url_path, url_trailing_dash,
67
		      pattern_path, pattern_trailing_dash)
68
{
69
    const dashes_ok = !(pattern_trailing_dash && !url_trailing_dash);
70

    
71
    if (pattern_path.length === 0)
72
	return url_path.length === 0 && dashes_ok;
73

    
74
    const length_difference = url_path.length - pattern_path.length;
75

    
76
    for (let i = 0; i < url_path.length; i++) {
77
	if (pattern_path.length === i + 1) {
78
	    if (pattern_path[i] === "*")
79
		return length_difference === 0;
80
	    if (pattern_path[i] === "**") {
81
		return length_difference > 0 ||
82
		    (url_path[i] === "**" && dashes_ok);
83
	    }
84
	    if (pattern_path[i] === "***")
85
		return length_difference >= 0;
86
	    return length_difference === 0 &&
87
		pattern_path[i] === url_path[i] && dashes_ok;
88
	}
89

    
90
	if (pattern_path[i] !== url_path[i])
91
	    return false;
92
    }
93

    
94
    return false;
95
}
96

    
97
function url_matches(url, pattern)
98
{
99
    const url_deco = deconstruct_url(url);
100
    const pattern_deco = deconstruct_url(pattern);
101

    
102
    if (url_deco === undefined || pattern_deco === undefined) {
103
	console.log(`bad comparison: ${url} and ${pattern}`);
104
	return false
105
    }
106

    
107
    if (pattern_deco.proto !== url_deco.proto)
108
	return false;
109

    
110
    return domain_matches(url_deco.domain, pattern_deco.domain) &&
111
	path_matches(url_deco.path, url_deco.path_trailing_dash,
112
		     pattern_deco.path, pattern_deco.path_trailing_dash);
113
}
114

    
115
/*
116
 * Call callback for every possible pattern that matches url. Return when there
117
 * are no more patterns or callback returns false.
118
 */
119
function for_each_possible_pattern(url, callback)
120
{
121
    const deco = deconstruct_url(url);
122

    
123
    if (deco === undefined) {
124
	console.log("bad url format", url);
125
	return;
126
    }
127

    
128
    for (let d_slice = 0; d_slice < deco.domain.length; d_slice++) {
129
	const domain_part = deco.domain.slice(d_slice).join(".");
130
	const domain_wildcards = [];
131
	if (d_slice === 0)
132
	    domain_wildcards.push("");
133
	if (d_slice === 1)
134
	    domain_wildcards.push("*.");
135
	if (d_slice > 0)
136
	    domain_wildcards.push("**.");
137
	domain_wildcards.push("***.");
138

    
139
	for (const domain_wildcard of domain_wildcards) {
140
	    const domain_pattern = domain_wildcard + domain_part;
141

    
142
	    for (let s_slice = deco.path.length; s_slice > 0; s_slice--) {
143
		const path_part = deco.path.slice(0, s_slice).join("/");
144
		const path_wildcards = [];
145
		if (s_slice === deco.path.length) {
146
		    if (deco.path_trailing_dash)
147
			path_wildcards.push("/");
148
		    path_wildcards.push("");
149
		}
150
		if (s_slice === deco.path.length - 1 &&
151
		    deco.path[s_slice] !== "*")
152
		    path_wildcards.push("/*");
153
		if (s_slice < deco.path.length &&
154
		    (deco.path[s_slice] !== "**" ||
155
		     s_slice < deco.path.length - 1))
156
		    path_wildcards.push("/**");
157
		if (deco.path[s_slice] !== "***" || s_slice < deco.path.length)
158
		    path_wildcards.push("/***");
159

    
160
		for (const path_wildcard of path_wildcards) {
161
		    const path_pattern = path_part + path_wildcard;
162

    
163
		    const pattern = deco.proto + domain_pattern + path_pattern;
164

    
165
		    if (callback(pattern) === false)
166
			return;
167
		}
168
	    }
169
	}
170
    }
171
}
172

    
173
function possible_patterns(url)
174
{
175
    const patterns = [];
176
    for_each_possible_pattern(url, patterns.push);
177

    
178
    return patterns;
179
}
180

    
181
/*
182
 * EXPORTS_START
183
 * EXPORT url_matches
184
 * EXPORT for_each_possible_pattern
185
 * EXPORT possible_patterns
186
 * EXPORTS_END
187
 */
(5-5/8)