Project

General

Profile

« Previous | Next » 

Revision 17adc623

Added by koszko over 1 year ago

optimize Pattern Query Tree for size of its JSON.stringify()'ed representation

View differences:

common/patterns_query_tree.js
3 3
 *
4 4
 * Function: Data structure to query items by URL patterns.
5 5
 *
6
 * Copyright (C) 2021 Wojtek Kosior
6
 * Copyright (C) 2021, 2022 Wojtek Kosior <koszko@koszko.org>
7 7
 *
8 8
 * This program is free software: you can redistribute it and/or modify
9 9
 * it under the terms of the GNU General Public License as published by
......
41 41
 * proprietary program, I am not going to enforce this in court.
42 42
 */
43 43

  
44
// TODO! Modify the code to use `Object.create(null)` instead of `{}`.
45

  
46 44
#FROM common/patterns.js IMPORT deconstruct_url
47 45

  
48 46
/* "Pattern Tree" is how we refer to the data structure used for querying
......
53 51
const pattern_tree_make = () => ({})
54 52
#EXPORT  pattern_tree_make  AS make
55 53

  
56
function empty_node() {
57
    return {
58
	wildcard_matches: [null, null, null],
59
	literal_match:    null,
60
	children:         {}
61
    };
62
}
54
const empty_node = () => ({});
63 55

  
64 56
function is_empty_node(tree_node) {
65
    const children = tree_node.children;
66
    for (const key in children) {
67
	if (children.hasOwnProperty(key))
68
	    return false;
69
    }
70

  
71
    if (tree_node.wildcard_matches.reduce((a, b) => b && a !== null, 1))
57
    for (const key in tree_node)
72 58
	return false;
73 59

  
74
    return tree_node.literal_match === null;
60
    return true;
75 61
}
76 62

  
63
const wildcard_matches = node => [node["*"], node["**"], node["***"]];
64

  
77 65
const is_wildcard = segment => ["*", "**", "***"].lastIndexOf(segment) >= 0;
78 66

  
67
/*
68
 * Remove reference to given child fron node and leave the node in consistent
69
 * state afterwards, i.e. remove the "c" property if no childs are left.
70
 */
71
function delete_child(node, child_name) {
72
    if (node.c) {
73
	delete node.c[child_name];
74

  
75
	for (const key in node.c)
76
	    return;
77

  
78
	delete node.c;
79
    }
80
}
81

  
79 82
/*
80 83
 * Yields all matches of this segments sequence against the tree that starts at
81 84
 * this node. Results are produces in order from greatest to lowest pattern
82 85
 * specificity.
83 86
 */
84
function* search_sequence(tree_node, segments)
85
{
87
function* search_sequence(tree_node, segments) {
86 88
    const nodes = [tree_node];
87 89

  
88
    for (const segment of segments) {
89
	const next_node = nodes[nodes.length - 1].children[segment];
90
	if (next_node === undefined)
90
    for (const current_segment of segments) {
91
	const children = nodes[nodes.length - 1].c || {};
92
	if (!Object.hasOwnProperty.call(children, current_segment))
91 93
	    break;
92 94

  
93
	nodes.push(next_node);
95
	nodes.push(children[current_segment]);
94 96
    }
95 97

  
96 98
    const nsegments = segments.length;
......
106 108

  
107 109
    while (nodes.length) {
108 110
	const node = nodes.pop();
109
	const items = [node.literal_match, ...node.wildcard_matches];
111
	const literal_match = node.l;
112
	const items = [literal_match, ...wildcard_matches(node)];
110 113

  
111
	for (let i = 0; i < 4; i++) {
112
	    if (items[i] !== null && conds[i]())
114
	for (let i = 0; i < items.length; i++) {
115
	    if (items[i] !== undefined && conds[i]())
113 116
		yield items[i];
114 117
	}
115 118
    }
......
129 132
 * segments and value returned by item_modifier is not `null`, make the value
130 133
 * queryable by this path.
131 134
 */
132
function modify_sequence(tree_node, segments, item_modifier)
133
{
135
function modify_sequence(tree_node, segments, item_modifier) {
134 136
    const nodes = [tree_node];
135 137
    let removed = true;
136 138

  
137 139
    for (var current_segment of segments) {
138
	const child = tree_node.children[current_segment] || empty_node();
139
	tree_node.children[current_segment] = child;
140
	const children = tree_node.c || {};
141
	tree_node.c = children;
142

  
143
	const child = Object.hasOwnProperty.call(children, current_segment) ?
144
	      children[current_segment] : empty_node();
145
	children[current_segment] = child;
146

  
140 147
	tree_node = child;
141 148
	nodes.push(tree_node);
142 149
    }
143 150

  
144
    tree_node.literal_match = item_modifier(tree_node.literal_match);
145
    if (tree_node.literal_match !== null)
151
    tree_node.l = item_modifier(tree_node.l || null);
152
    if (tree_node.l === null)
153
	delete tree_node.l;
154
    else
146 155
	removed = false;
147 156

  
148 157
    let i = segments.length;
149 158

  
150 159
    if (is_wildcard(current_segment)) {
151
	const asterisks = current_segment.length - 1;
152
	const wildcards = nodes[i - 1].wildcard_matches;
153
	wildcards[asterisks] = item_modifier(wildcards[asterisks]);
154
	if (wildcards[asterisks] !== null)
160
	nodes[i - 1][current_segment] =
161
	    item_modifier(nodes[i - 1][current_segment] || null);
162
	if (nodes[i - 1][current_segment] === null)
163
	    delete nodes[i - 1][current_segment];
164
	else
155 165
	    removed = false;
156 166
    }
157 167

  
158
    while (!removed)
168
    if (!removed)
159 169
	return;
160 170

  
161 171
    while (i > 0) {
162 172
	tree_node = nodes[i--];
163 173
	if (is_empty_node(tree_node))
164
	    delete nodes[i].children[segments[i]];
174
	    delete_child(nodes[i], segments[i]);
175
	else
176
	    break;
165 177
    }
166 178
}
167 179

  
168 180
/* Helper function for modify_tree(). */
169
function modify_path(tree_node, deco, item_modifier)
170
{
181
function modify_path(tree_node, deco, item_modifier) {
171 182
    tree_node = tree_node || empty_node();
172 183
    modify_sequence(tree_node, deco.path, item_modifier);
173 184
    return is_empty_node(tree_node) ? null : tree_node;
174 185
}
175 186

  
176 187
/* Helper function for modify_tree(). */
177
function modify_domain(tree_node, deco, item_modifier)
178
{
188
function modify_domain(tree_node, deco, item_modifier) {
179 189
    const path_modifier = branch => modify_path(branch, deco, item_modifier);
180 190
    tree_node = tree_node || empty_node();
181 191
    /* We need an array of domain labels ordered most-significant-first. */
......
184 194
}
185 195

  
186 196
/* Helper function for pattern_tree_register() and pattern_tree_deregister(). */
187
function modify_tree(patterns_by_proto, pattern, item_modifier)
188
{
197
function modify_tree(patterns_by_proto, pattern, item_modifier) {
189 198
    /*
190 199
     * We pass 'false' to disable length limits on URL parts. Length limits are
191 200
     * mostly useful in case of iteration over all patterns matching given URL.
......
208 217
 * Make item queryable through the Pattern Tree that starts with the protocols
209 218
 * dictionary object passed in the first argument.
210 219
 */
211
function pattern_tree_register(patterns_by_proto, pattern, item_name, item)
212
{
220
function pattern_tree_register(patterns_by_proto, pattern, item_name, item) {
213 221
    const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_';
214 222
    item_name = key_prefix + item_name;
215 223
    const add_item = obj => Object.assign(obj || {}, {[item_name]: item});
......
218 226
#EXPORT  pattern_tree_register  AS register
219 227

  
220 228
/* Helper function for pattern_tree_deregister(). */
221
function _remove_item(obj, item_name)
222
{
229
function _remove_item(obj, item_name) {
223 230
    obj = obj || {};
224 231
    delete obj[item_name];
225 232
    for (const key in obj)
......
233 240
 * should be pattern and name that have been earlier passed to
234 241
 * pattern_tree_register().
235 242
 */
236
function pattern_tree_deregister(patterns_by_proto, pattern, item_name)
237
{
243
function pattern_tree_deregister(patterns_by_proto, pattern, item_name) {
238 244
    const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_';
239 245
    item_name = key_prefix + item_name;
240 246
    const remove_item = obj => _remove_item(obj, item_name);
......
244 250

  
245 251
/*
246 252
 * Yield registered items that match url. Each yielded value is an object with
247
 * key being matched item names and values being the items. One such object
253
 * keys being matched item names and values being the items. One such object
248 254
 * shall contain all items matched with given pattern specificity. Objects are
249 255
 * yielded in order from greatest to lowest pattern specificity.
250 256
 */
251
function* pattern_tree_search(patterns_by_proto, url)
252
{
257
function* pattern_tree_search(patterns_by_proto, url) {
253 258
    const deco = deconstruct_url(url, false);
254 259

  
255 260
    const tree_for_proto = patterns_by_proto[deco.proto] || empty_node();
test/haketilo_test/unit/test_patterns_query_tree.py
6 6

  
7 7
# This file is part of Haketilo
8 8
#
9
# Copyright (C) 2021, Wojtek Kosior
9
# Copyright (C) 2021, 2022 Wojtek Kosior <koszko@koszko.org>
10 10
#
11 11
# This program is free software: you can redistribute it and/or modify
12 12
# it under the terms of the CC0 1.0 Universal License as published by
......
70 70
        returnval(branch);
71 71
        }''')
72 72
    assert branch == {
73
        'literal_match': None,
74
        'wildcard_matches': [None, None, None],
75
        'children': {
73
        'c': {
76 74
            'com': {
77
                'literal_match': None,
78
                'wildcard_matches': [None, None, None],
79
                'children': {
75
                'c': {
80 76
                    'example': {
81
                        'literal_match': ['some_item'],
82
                        'wildcard_matches': [None, None, None],
83
                        'children': {
84
                        }
77
                        'l': ['some_item']
85 78
                    }
86 79
                }
87 80
            }
......
95 88
        returnval([branch, items_added]);
96 89
        }''', branch)
97 90
    assert items_added == 1
98
    assert branch['children']['com']['children']['example']['literal_match'] \
99
            == ['some_item', 'other_item']
91
    assert branch['c']['com']['c']['example']['l'] \
92
        == ['some_item', 'other_item']
100 93

  
101 94
    for i in range(3):
102 95
        for expected_array in [['third_item'], ['third_item', '4th_item']]:
......
110 103
                }''',
111 104
                branch, wildcard, expected_array[-1])
112 105
            assert items_added == 2
113
            sample = branch['children']['com']['children']['sample']
114
            assert sample['wildcard_matches'][i] == expected_array
115
            assert sample['children'][wildcard]['literal_match'] \
116
                == expected_array
106
            sample = branch['c']['com']['c']['sample']
107
            assert sample[wildcard] == expected_array
108
            assert sample['c'][wildcard]['l'] == expected_array
117 109

  
118 110
    branch, items_added = execute_in_page(
119 111
        '''{
......
124 116
        }''',
125 117
        branch)
126 118
    assert items_added == 1
127
    assert branch['children']['org']['children']['koszko']['children']['***']\
128
        ['children']['123']['literal_match'] == ['5th_item']
119
    assert branch['c']['org']['c']['koszko']['c']['***']['c']['123']['l'] \
120
        == ['5th_item']
129 121

  
130 122
    # Let's verify that removing a nonexistent element doesn't modify the tree.
131 123
    branch2, items_removed = execute_in_page(
......
150 142
        }''',
151 143
        branch)
152 144
    assert items_removed == 1
153
    assert 'org' not in branch['children']
145
    assert 'org' not in branch['c']
154 146

  
155 147
    for i in range(3):
156 148
        for expected_array in [['third_item'], None]:
......
166 158
            assert items_removed == 2
167 159
            if i == 2 and expected_array == []:
168 160
                break
169
            sample = branch['children']['com']['children'].get('sample', {})
170
            assert sample.get('wildcard_matches', [None, None, None])[i] \
161
            sample = branch['c']['com']['c'].get('sample', {})
162
            assert sample.get(wildcard) == expected_array
163
            assert sample.get('c', {}).get(wildcard, {}).get('l') \
171 164
                == expected_array
172
            assert sample.get('children', {}).get(wildcard, {})\
173
                .get('literal_match') == expected_array
174 165

  
175 166
    for i in range(2):
176 167
        branch, items_removed = execute_in_page(
......
182 173
            branch)
183 174
        assert items_removed == 1
184 175
        if i == 0:
185
            assert branch['children']['com']['children']['example']\
186
                ['literal_match'] == ['some_item']
187
        else:
188
            assert branch == {
189
                'literal_match': None,
190
                'wildcard_matches': [None, None, None],
191
                'children': {
192
                }
193
            }
176
            assert branch['c']['com']['c']['example']['l'] == ['some_item']
177

  
178
    assert branch == {}
194 179

  
195 180
@pytest.mark.get_page('https://gotmyowndoma.in')
196 181
def test_search_branch(execute_in_page):

Also available in: Unified diff