Project

General

Profile

Download (3.72 KB) Statistics
| Branch: | Tag: | Revision:

haketilo / common / patterns.js @ 2bd35bc4

1
/**
2
 * This file is part of Haketilo.
3
 *
4
 * Function: Operations on page URL patterns.
5
 *
6
 * Copyright (C) 2021 Wojtek Kosior
7
 * Redistribution terms are gathered in the `copyright' file.
8
 */
9

    
10
const MAX_URL_PATH_LEN = 12;
11
const MAX_URL_PATH_CHARS = 255;
12
const MAX_DOMAIN_LEN = 7;
13
const MAX_DOMAIN_CHARS = 100;
14

    
15
const proto_regex = /^(\w+):\/\/(.*)$/;
16

    
17
const user_re = "[^/?#@]+@"
18
const domain_re = "[^/?#]+";
19
const path_re = "[^?#]*";
20
const query_re = "\\??[^#]*";
21

    
22
const http_regex = new RegExp(`^(${domain_re})(${path_re})(${query_re}).*`);
23

    
24
const file_regex = new RegExp(`^(${path_re}).*`);
25

    
26
const ftp_regex = new RegExp(`^(${user_re})?(${domain_re})(${path_re}).*`);
27

    
28
function deconstruct_url(url)
29
{
30
    const proto_match = proto_regex.exec(url);
31
    if (proto_match === null)
32
	return undefined;
33

    
34
    const deco = {proto: proto_match[1]};
35

    
36
    if (deco.proto === "file") {
37
	deco.path = file_regex.exec(proto_match[2])[1];
38
    } else if (deco.proto === "ftp") {
39
	[deco.domain, deco.path] = ftp_regex.exec(proto_match[2]).slice(2, 4);
40
    } else {
41
	const http_match = http_regex.exec(proto_match[2]);
42
	if (!http_match)
43
	    return undefined;
44
	[deco.domain, deco.path, deco.query] = http_match.slice(1, 4);
45
    }
46

    
47
    const leading_dash = deco.path[0] === "/";
48
    deco.trailing_dash = deco.path[deco.path.length - 1] === "/";
49

    
50
    if (deco.domain) {
51
	if (deco.domain.length > MAX_DOMAIN_CHARS) {
52
	    const idx = deco.domain.indexOf(".", deco.domain.length -
53
					    MAX_DOMAIN_CHARS);
54
	    if (idx === -1)
55
		deco.domain = [];
56
	    else
57
		deco.domain = deco.domain.substring(idx + 1);
58

    
59
	    deco.domain_truncated = true;
60
	}
61

    
62
	if (deco.path.length > MAX_URL_PATH_CHARS) {
63
	    deco.path = deco.path.substring(0, deco.path.lastIndexOf("/"));
64
	    deco.path_truncated = true;
65
	}
66
    }
67

    
68
    if (typeof deco.domain === "string") {
69
	deco.domain = deco.domain.split(".");
70
	if (deco.domain.splice(0, deco.domain.length - MAX_DOMAIN_LEN).length
71
	    > 0)
72
	    deco.domain_truncated = true;
73
    }
74

    
75
    deco.path = deco.path.split("/").filter(s => s !== "");
76
    if (deco.domain && deco.path.splice(MAX_URL_PATH_LEN).length > 0)
77
	deco.path_truncated = true;
78
    if (leading_dash || deco.path.length === 0)
79
	deco.path.unshift("");
80

    
81
    return deco;
82
}
83

    
84
function* each_domain_pattern(deco)
85
{
86
    for (let slice = 0; slice < deco.domain.length - 1; slice++) {
87
	const domain_part = deco.domain.slice(slice).join(".");
88
	const domain_wildcards = [];
89
	if (slice === 0 && !deco.domain_truncated)
90
	    yield domain_part;
91
	if (slice === 1 && !deco.domain_truncated)
92
	    yield "*." + domain_part;
93
	if (slice > 1)
94
	    yield "**." + domain_part;
95
	yield "***." + domain_part;
96
    }
97
}
98

    
99
function* each_path_pattern(deco)
100
{
101
    for (let slice = deco.path.length; slice > 0; slice--) {
102
	const path_part = deco.path.slice(0, slice).join("/");
103
	const path_wildcards = [];
104
	if (slice === deco.path.length && !deco.path_truncated) {
105
	    if (deco.trailing_dash)
106
		yield path_part + "/";
107
	    yield path_part;
108
	}
109
	if (slice === deco.path.length - 1 && !deco.path_truncated &&
110
	    deco.path[slice] !== "*")
111
	    yield path_part + "/*";
112
	if (slice < deco.path.length - 1)
113
	    yield path_part + "/**";
114
	if (slice !== deco.path.length - 1 || deco.path_truncated ||
115
	    deco.path[slice] !== "***")
116
	    yield path_part + "/***";
117
    }
118
}
119

    
120
/* Generate every possible pattern that matches url. */
121
function* each_url_pattern(url)
122
{
123
    const deco = deconstruct_url(url);
124

    
125
    if (deco === undefined) {
126
	console.error("bad url format", url);
127
	return false;
128
    }
129

    
130
    const all_domains = deco.domain ? each_domain_pattern(deco) : [""];
131
    for (const domain of all_domains) {
132
	for (const path of each_path_pattern(deco))
133
	    yield `${deco.proto}://${domain}${path}`;
134
    }
135
}
136

    
137
/*
138
 * EXPORTS_START
139
 * EXPORT each_url_pattern
140
 * EXPORTS_END
141
 */
(8-8/16)