Project

General

Profile

Download (3.92 KB) Statistics
| Branch: | Tag: | Revision:

haketilo / common / patterns.js @ 69e53743

1
/**
2
 * This file is part of Haketilo.
3
 *
4
 * Function: Operations on page URL patterns.
5
 *
6
 * Copyright (C) 2021 Wojtek Kosior
7
 * Redistribution terms are gathered in the `copyright' file.
8
 */
9

    
10
const MAX = {
11
    URL_PATH_LEN:   12,
12
    URL_PATH_CHARS: 255,
13
    DOMAIN_LEN:     7,
14
    DOMAIN_CHARS:   100
15
};
16

    
17
const proto_regex = /^(\w+):\/\/(.*)$/;
18

    
19
const user_re = "[^/?#@]+@"
20
const domain_re = "[^/?#]+";
21
const path_re = "[^?#]*";
22
const query_re = "\\??[^#]*";
23

    
24
const http_regex = new RegExp(`^(${domain_re})(${path_re})(${query_re}).*`);
25

    
26
const file_regex = new RegExp(`^(${path_re}).*`);
27

    
28
const ftp_regex = new RegExp(`^(${user_re})?(${domain_re})(${path_re}).*`);
29

    
30
function deconstruct_url(url, use_limits=true)
31
{
32
    const max = MAX;
33
    if (!use_limits) {
34
	for (key in MAX)
35
	    max[key] = Infinity;
36
    }
37

    
38
    const proto_match = proto_regex.exec(url);
39
    if (proto_match === null)
40
	throw `bad url '${url}'`;
41

    
42
    const deco = {proto: proto_match[1]};
43

    
44
    if (deco.proto === "file") {
45
	deco.path = file_regex.exec(proto_match[2])[1];
46
    } else if (deco.proto === "ftp") {
47
	[deco.domain, deco.path] = ftp_regex.exec(proto_match[2]).slice(2, 4);
48
    } else if (deco.proto === "http" || deco.proto === "https") {
49
	const http_match = http_regex.exec(proto_match[2]);
50
	if (!http_match)
51
	    return undefined;
52
	[deco.domain, deco.path, deco.query] = http_match.slice(1, 4);
53
    } else {
54
	throw `unsupported protocol in url '${url}'`;
55
    }
56

    
57
    deco.trailing_dash = deco.path[deco.path.length - 1] === "/";
58

    
59
    if (deco.domain) {
60
	if (deco.domain.length > max.DOMAIN_CHARS) {
61
	    const idx = deco.domain.indexOf(".", deco.domain.length -
62
					    max.DOMAIN_CHARS);
63
	    if (idx === -1)
64
		deco.domain = [];
65
	    else
66
		deco.domain = deco.domain.substring(idx + 1);
67

    
68
	    deco.domain_truncated = true;
69
	}
70

    
71
	if (deco.path.length > max.URL_PATH_CHARS) {
72
	    deco.path = deco.path.substring(0, deco.path.lastIndexOf("/"));
73
	    deco.path_truncated = true;
74
	}
75
    }
76

    
77
    if (typeof deco.domain === "string") {
78
	deco.domain = deco.domain.split(".");
79
	if (deco.domain.splice(0, deco.domain.length - max.DOMAIN_LEN).length
80
	    > 0)
81
	    deco.domain_truncated = true;
82
    }
83

    
84
    deco.path = deco.path.split("/").filter(s => s !== "");
85
    if (deco.domain && deco.path.splice(max.URL_PATH_LEN).length > 0)
86
	deco.path_truncated = true;
87

    
88
    return deco;
89
}
90

    
91
function* each_domain_pattern(deco)
92
{
93
    for (let slice = 0; slice < deco.domain.length - 1; slice++) {
94
	const domain_part = deco.domain.slice(slice).join(".");
95
	const domain_wildcards = [];
96
	if (slice === 0 && !deco.domain_truncated)
97
	    yield domain_part;
98
	if (slice === 1 && !deco.domain_truncated)
99
	    yield "*." + domain_part;
100
	if (slice > 1)
101
	    yield "**." + domain_part;
102
	yield "***." + domain_part;
103
    }
104
}
105

    
106
function* each_path_pattern(deco)
107
{
108
    for (let slice = deco.path.length; slice >= 0; slice--) {
109
	const path_part = ["", ...deco.path.slice(0, slice)].join("/");
110
	const path_wildcards = [];
111
	if (slice === deco.path.length && !deco.path_truncated) {
112
	    if (deco.trailing_dash && path_part !== )
113
		yield path_part + "/";
114
	    if (part_part !== "" || deco.proto !== "file")
115
		yield path_part;
116
	}
117
	if (slice === deco.path.length - 1 && !deco.path_truncated &&
118
	    deco.path[slice] !== "*")
119
	    yield path_part + "/*";
120
	if (slice < deco.path.length - 1)
121
	    yield path_part + "/**";
122
	if (slice !== deco.path.length - 1 || deco.path_truncated ||
123
	    deco.path[slice] !== "***")
124
	    yield path_part + "/***";
125
    }
126
}
127

    
128
/* Generate every possible pattern that matches url. */
129
function* each_url_pattern(url)
130
{
131
    const deco = deconstruct_url(url);
132

    
133
    if (deco === undefined) {
134
	console.error("bad url format", url);
135
	return false;
136
    }
137

    
138
    const all_domains = deco.domain ? each_domain_pattern(deco) : [""];
139
    for (const domain of all_domains) {
140
	for (const path of each_path_pattern(deco))
141
	    yield `${deco.proto}://${domain}${path}`;
142
    }
143
}
144

    
145
/*
146
 * EXPORTS_START
147
 * EXPORT each_url_pattern
148
 * EXPORT deconstruct_url
149
 * EXPORTS_END
150
 */
(8-8/16)