Project

General

Profile

Download (4.07 KB) Statistics
| Branch: | Tag: | Revision:

haketilo / common / patterns.js @ 5c583de8

1
/**
2
 * This file is part of Haketilo.
3
 *
4
 * Function: Operations on page URL patterns.
5
 *
6
 * Copyright (C) 2021 Wojtek Kosior
7
 * Redistribution terms are gathered in the `copyright' file.
8
 */
9

    
10
const MAX = {
11
    URL_PATH_LEN:   12,
12
    URL_PATH_CHARS: 255,
13
    DOMAIN_LEN:     7,
14
    DOMAIN_CHARS:   100
15
};
16

    
17
const proto_regex = /^(\w+):\/\/(.*)$/;
18

    
19
const user_re = "[^/?#@]+@"
20
const domain_re = "[.*a-zA-Z0-9-]+";
21
const path_re = "[^?#]*";
22
const query_re = "\\??[^#]*";
23

    
24
const http_regex = new RegExp(`^(${domain_re})(${path_re})(${query_re}).*`);
25

    
26
const file_regex = new RegExp(`^(/${path_re}).*`);
27

    
28
const ftp_regex = new RegExp(`^(${user_re})?(${domain_re})(${path_re}).*`);
29

    
30
function match_or_throw(regex, string, error_msg)
31
{
32
    const match = regex.exec(string);
33
    if (match === null)
34
	throw error_msg;
35

    
36
    return match;
37
}
38

    
39
function deconstruct_url(url, use_limits=true)
40
{
41
    const max = MAX;
42
    if (!use_limits) {
43
	for (key in MAX)
44
	    max[key] = Infinity;
45
    }
46

    
47
    const matcher = (re, str) => match_or_throw(re, str, `bad url '${url}'`)
48

    
49
    const proto_match = matcher(proto_regex, url);
50
    const deco = {proto: proto_match[1]};
51

    
52
    if (deco.proto === "file") {
53
	deco.path = matcher(file_regex, proto_match[2])[1];
54
    } else if (deco.proto === "ftp") {
55
	[deco.domain, deco.path] =
56
	    matcher(ftp_regex, proto_match[2]).slice(2, 4);
57
    } else if (deco.proto === "http" || deco.proto === "https") {
58
	[deco.domain, deco.path, deco.query] =
59
	    matcher(http_regex, proto_match[2]).slice(1, 4);
60
	deco.domain = deco.domain.toLowerCase();
61
    } else {
62
	throw `unsupported protocol in url '${url}'`;
63
    }
64

    
65
    deco.trailing_dash = deco.path[deco.path.length - 1] === "/";
66

    
67
    if (deco.domain) {
68
	if (deco.domain.length > max.DOMAIN_CHARS) {
69
	    const idx = deco.domain.indexOf(".", deco.domain.length -
70
					    max.DOMAIN_CHARS);
71
	    if (idx === -1)
72
		deco.domain = [];
73
	    else
74
		deco.domain = deco.domain.substring(idx + 1);
75

    
76
	    deco.domain_truncated = true;
77
	}
78

    
79
	if (deco.path.length > max.URL_PATH_CHARS) {
80
	    deco.path = deco.path.substring(0, deco.path.lastIndexOf("/"));
81
	    deco.path_truncated = true;
82
	}
83
    }
84

    
85
    if (typeof deco.domain === "string") {
86
	deco.domain = deco.domain.split(".");
87
	if (deco.domain.splice(0, deco.domain.length - max.DOMAIN_LEN).length
88
	    > 0)
89
	    deco.domain_truncated = true;
90
    }
91

    
92
    deco.path = deco.path.split("/").filter(s => s !== "");
93
    if (deco.domain && deco.path.splice(max.URL_PATH_LEN).length > 0)
94
	deco.path_truncated = true;
95

    
96
    return deco;
97
}
98

    
99
function* each_domain_pattern(deco)
100
{
101
    for (let slice = 0; slice < deco.domain.length - 1; slice++) {
102
	const domain_part = deco.domain.slice(slice).join(".");
103
	const domain_wildcards = [];
104
	if (slice === 0 && !deco.domain_truncated)
105
	    yield domain_part;
106
	if (slice === 1 && !deco.domain_truncated)
107
	    yield "*." + domain_part;
108
	if (slice > 1)
109
	    yield "**." + domain_part;
110
	yield "***." + domain_part;
111
    }
112
}
113

    
114
function* each_path_pattern(deco)
115
{
116
    for (let slice = deco.path.length; slice >= 0; slice--) {
117
	const path_part = ["", ...deco.path.slice(0, slice)].join("/");
118
	const path_wildcards = [];
119
	if (slice === deco.path.length && !deco.path_truncated) {
120
	    if (deco.trailing_dash)
121
		yield path_part + "/";
122
	    if (slice > 0 || deco.proto !== "file")
123
		yield path_part;
124
	}
125
	if (slice === deco.path.length - 1 && !deco.path_truncated &&
126
	    deco.path[slice] !== "*")
127
	    yield path_part + "/*";
128
	if (slice < deco.path.length - 1)
129
	    yield path_part + "/**";
130
	if (slice !== deco.path.length - 1 || deco.path_truncated ||
131
	    deco.path[slice] !== "***")
132
	    yield path_part + "/***";
133
    }
134
}
135

    
136
/* Generate every possible pattern that matches url. */
137
function* each_url_pattern(url)
138
{
139
    const deco = deconstruct_url(url);
140

    
141
    if (deco === undefined) {
142
	console.error("bad url format", url);
143
	return false;
144
    }
145

    
146
    const all_domains = deco.domain ? each_domain_pattern(deco) : [""];
147
    for (const domain of all_domains) {
148
	for (const path of each_path_pattern(deco))
149
	    yield `${deco.proto}://${domain}${path}`;
150
    }
151
}
152

    
153
/*
154
 * EXPORTS_START
155
 * EXPORT each_url_pattern
156
 * EXPORT deconstruct_url
157
 * EXPORTS_END
158
 */
(8-8/16)