Project

General

Profile

« Previous | Next » 

Revision 69e53743

Added by koszko almost 2 years ago

implement more efficient querying of URL patterns

View differences:

common/patterns.js
7 7
 * Redistribution terms are gathered in the `copyright' file.
8 8
 */
9 9

  
10
const MAX_URL_PATH_LEN = 12;
11
const MAX_URL_PATH_CHARS = 255;
12
const MAX_DOMAIN_LEN = 7;
13
const MAX_DOMAIN_CHARS = 100;
10
const MAX = {
11
    URL_PATH_LEN:   12,
12
    URL_PATH_CHARS: 255,
13
    DOMAIN_LEN:     7,
14
    DOMAIN_CHARS:   100
15
};
14 16

  
15 17
const proto_regex = /^(\w+):\/\/(.*)$/;
16 18

  
......
25 27

  
26 28
const ftp_regex = new RegExp(`^(${user_re})?(${domain_re})(${path_re}).*`);
27 29

  
28
function deconstruct_url(url)
30
function deconstruct_url(url, use_limits=true)
29 31
{
32
    const max = MAX;
33
    if (!use_limits) {
34
	for (key in MAX)
35
	    max[key] = Infinity;
36
    }
37

  
30 38
    const proto_match = proto_regex.exec(url);
31 39
    if (proto_match === null)
32
	return undefined;
40
	throw `bad url '${url}'`;
33 41

  
34 42
    const deco = {proto: proto_match[1]};
35 43

  
......
37 45
	deco.path = file_regex.exec(proto_match[2])[1];
38 46
    } else if (deco.proto === "ftp") {
39 47
	[deco.domain, deco.path] = ftp_regex.exec(proto_match[2]).slice(2, 4);
40
    } else {
48
    } else if (deco.proto === "http" || deco.proto === "https") {
41 49
	const http_match = http_regex.exec(proto_match[2]);
42 50
	if (!http_match)
43 51
	    return undefined;
44 52
	[deco.domain, deco.path, deco.query] = http_match.slice(1, 4);
53
    } else {
54
	throw `unsupported protocol in url '${url}'`;
45 55
    }
46 56

  
47
    const leading_dash = deco.path[0] === "/";
48 57
    deco.trailing_dash = deco.path[deco.path.length - 1] === "/";
49 58

  
50 59
    if (deco.domain) {
51
	if (deco.domain.length > MAX_DOMAIN_CHARS) {
60
	if (deco.domain.length > max.DOMAIN_CHARS) {
52 61
	    const idx = deco.domain.indexOf(".", deco.domain.length -
53
					    MAX_DOMAIN_CHARS);
62
					    max.DOMAIN_CHARS);
54 63
	    if (idx === -1)
55 64
		deco.domain = [];
56 65
	    else
......
59 68
	    deco.domain_truncated = true;
60 69
	}
61 70

  
62
	if (deco.path.length > MAX_URL_PATH_CHARS) {
71
	if (deco.path.length > max.URL_PATH_CHARS) {
63 72
	    deco.path = deco.path.substring(0, deco.path.lastIndexOf("/"));
64 73
	    deco.path_truncated = true;
65 74
	}
......
67 76

  
68 77
    if (typeof deco.domain === "string") {
69 78
	deco.domain = deco.domain.split(".");
70
	if (deco.domain.splice(0, deco.domain.length - MAX_DOMAIN_LEN).length
79
	if (deco.domain.splice(0, deco.domain.length - max.DOMAIN_LEN).length
71 80
	    > 0)
72 81
	    deco.domain_truncated = true;
73 82
    }
74 83

  
75 84
    deco.path = deco.path.split("/").filter(s => s !== "");
76
    if (deco.domain && deco.path.splice(MAX_URL_PATH_LEN).length > 0)
85
    if (deco.domain && deco.path.splice(max.URL_PATH_LEN).length > 0)
77 86
	deco.path_truncated = true;
78
    if (leading_dash || deco.path.length === 0)
79
	deco.path.unshift("");
80 87

  
81 88
    return deco;
82 89
}
......
98 105

  
99 106
function* each_path_pattern(deco)
100 107
{
101
    for (let slice = deco.path.length; slice > 0; slice--) {
102
	const path_part = deco.path.slice(0, slice).join("/");
108
    for (let slice = deco.path.length; slice >= 0; slice--) {
109
	const path_part = ["", ...deco.path.slice(0, slice)].join("/");
103 110
	const path_wildcards = [];
104 111
	if (slice === deco.path.length && !deco.path_truncated) {
105
	    if (deco.trailing_dash)
112
	    if (deco.trailing_dash && path_part !== )
106 113
		yield path_part + "/";
107
	    yield path_part;
114
	    if (part_part !== "" || deco.proto !== "file")
115
		yield path_part;
108 116
	}
109 117
	if (slice === deco.path.length - 1 && !deco.path_truncated &&
110 118
	    deco.path[slice] !== "*")
......
137 145
/*
138 146
 * EXPORTS_START
139 147
 * EXPORT each_url_pattern
148
 * EXPORT deconstruct_url
140 149
 * EXPORTS_END
141 150
 */

Also available in: Unified diff