Project

General

Profile

Download (11.5 KB) Statistics
| Branch: | Tag: | Revision:

haketilo / common / sanitize_JSON.js @ 263d03d5

1
/**
2
 * This file is part of Haketilo.
3
 *
4
 * Function: Powerful, full-blown format enforcer for externally-obtained JSON.
5
 *
6
 * Copyright (C) 2021 Wojtek Kosior
7
 *
8
 * This program is free software: you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation, either version 3 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * This program is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * As additional permission under GNU GPL version 3 section 7, you
19
 * may distribute forms of that code without the copy of the GNU
20
 * GPL normally required by section 4, provided you include this
21
 * license notice and, in case of non-source distribution, a URL
22
 * through which recipients can access the Corresponding Source.
23
 * If you modify file(s) with this exception, you may extend this
24
 * exception to your version of the file(s), but you are not
25
 * obligated to do so. If you do not wish to do so, delete this
26
 * exception statement from your version.
27
 *
28
 * As a special exception to the GPL, any HTML file which merely
29
 * makes function calls to this code, and for that purpose
30
 * includes it by reference shall be deemed a separate work for
31
 * copyright law purposes. If you modify this code, you may extend
32
 * this exception to your version of the code, but you are not
33
 * obligated to do so. If you do not wish to do so, delete this
34
 * exception statement from your version.
35
 *
36
 * You should have received a copy of the GNU General Public License
37
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
38
 *
39
 * I, Wojtek Kosior, thereby promise not to sue for violation of this file's
40
 * license. Although I request that you do not make use this code in a
41
 * proprietary program, I am not going to enforce this in court.
42
 */
43

    
44
var error_path;
45
var invalid_schema;
46

    
47
function parse_json_with_schema(schema, json_string)
48
{
49
    error_path = [];
50
    invalid_schema = false;
51

    
52
    try {
53
	return sanitize_unknown(schema, JSON.parse(json_string));
54
    } catch (e) {
55
	throw `Invalid JSON${invalid_schema ? " schema" : ""}: ${e}.`;
56
    } finally {
57
	/* Allow garbage collection. */
58
	error_path = undefined;
59
    }
60
}
61

    
62
function error_message(cause)
63
{
64
    return `object${error_path.join("")} ${cause}`;
65
}
66

    
67
function sanitize_unknown(schema, item)
68
{
69
    let error_msg = undefined;
70
    let schema_options = [];
71
    let has_default = false;
72
    let _default = undefined;
73

    
74
    if (!Array.isArray(schema) || schema[1] === "matchentry" ||
75
	schema.length < 2 || !["ordefault", "or"].includes(schema[1]))
76
	return sanitize_unknown_no_alternatives(schema, item);
77

    
78
    if ((schema.length & 1) !== 1) {
79
	invalid_schema = true;
80
	throw error_message("was not understood");
81
    }
82

    
83
    for (let i = 0; i < schema.length; i++) {
84
	if ((i & 1) !== 1) {
85
	    schema_options.push(schema[i]);
86
	    continue;
87
	}
88

    
89
	if (schema[i] === "or")
90
	    continue;
91
	if (schema[i] === "ordefault" && schema.length === i + 2) {
92
	    has_default = true;
93
	    _default = schema[i + 1];
94
	    break;
95
	}
96

    
97
	invalid_schema = true;
98
	throw error_message("was not understood");
99
    }
100

    
101
    for (const schema_option of schema_options) {
102
	try {
103
	    return sanitize_unknown_no_alternatives(schema_option, item);
104
	} catch (e) {
105
	    if (invalid_schema)
106
		throw e;
107

    
108
	    if (has_default)
109
		continue;
110

    
111
	    if (error_msg === undefined)
112
		error_msg = e;
113
	    else
114
		error_msg = `${error_msg}, or ${e}`;
115
	}
116
    }
117

    
118
    if (has_default)
119
	return _default;
120

    
121
    throw error_msg;
122
}
123

    
124
function sanitize_unknown_no_alternatives(schema, item)
125
{
126
    for (const [schema_check, item_check, sanitizer, type_name] of checks) {
127
	if (schema_check(schema)) {
128
	    if (item_check(item))
129
		return sanitizer(schema, item);
130
	    throw error_message(`should be ${type_name} but is not`);
131
	}
132
    }
133

    
134
    invalid_schema = true;
135
    throw error_message("was not understood");
136
}
137

    
138
function key_error_path_segment(key)
139
{
140
    return /^[a-zA-Z_][a-zA-Z_0-9]*$/.exec(key) ?
141
	`.${key}` : `[${JSON.stringify(key)}]`;
142
}
143

    
144
/*
145
 * Generic object - one that can contain arbitrary keys (in addition to ones
146
 * specified explicitly in the schema).
147
 */
148
function sanitize_genobj(schema, object)
149
{
150
    let max_matched_entries = Infinity;
151
    let min_matched_entries = 0;
152
    let matched_entries = 0;
153
    const entry_schemas = [];
154
    schema = [...schema];
155

    
156
    if (schema[2] === "minentries") {
157
	if (schema.length < 4) {
158
	    invalid_schema = true;
159
	    throw error_message("was not understood");
160
	}
161

    
162
	min_matched_entries = schema[3];
163
	schema.splice(2, 2);
164
    }
165

    
166
    if (min_matched_entries < 0) {
167
	invalid_schema = true;
168
	throw error_message('specifies invalid "minentries" (should be a non-negative number)');
169
    }
170

    
171
    if (schema[2] === "maxentries") {
172
	if (schema.length < 4) {
173
	    invalid_schema = true;
174
	    throw error_message("was not understood");
175
	}
176

    
177
	max_matched_entries = schema[3];
178
	schema.splice(2, 2);
179
    }
180

    
181
    if (max_matched_entries < 0) {
182
	invalid_schema = true;
183
	throw error_message('specifies invalid "maxentries" (should be a non-negative number)');
184
    }
185

    
186
    while (schema.length > 2) {
187
	let regex = /.+/;
188

    
189
	if (schema.length > 3) {
190
	    regex = schema[2];
191
	    schema.splice(2, 1);
192
	}
193

    
194
	if (typeof regex === "string")
195
	    regex = new RegExp(regex);
196

    
197
	entry_schemas.push([regex, schema[2]]);
198
	schema.splice(2, 1);
199
    }
200

    
201
    const result = sanitize_object(schema[0], object);
202

    
203
    for (const [key, entry] of Object.entries(object)) {
204
	if (result.hasOwnProperty(key))
205
	    continue;
206

    
207
	matched_entries += 1;
208
	if (matched_entries > max_matched_entries)
209
	    throw error_message(`has more than ${max_matched_entries} matched entr${max_matched_entries === 1 ? "y" : "ies"}`);
210

    
211
	error_path.push(key_error_path_segment(key));
212

    
213
	let match = false;
214
	for (const [key_regex, entry_schema] of entry_schemas) {
215
	    if (!key_regex.exec(key))
216
		continue;
217

    
218
	    match = true;
219

    
220
	    sanitize_object_entry(result, key, entry_schema, object);
221
	    break;
222
	}
223

    
224
	if (!match) {
225
	    const regex_list = entry_schemas.map(i => i[0]).join(", ");
226
	    throw error_message(`does not match any of key regexes: [${regex_list}]`);
227
	}
228

    
229
	error_path.pop();
230
    }
231

    
232
    if (matched_entries < min_matched_entries)
233
	throw error_message(`has less than ${min_matched_entries} matched entr${min_matched_entries === 1 ? "y" : "ies"}`);
234

    
235
    return result;
236
}
237

    
238
function sanitize_array(schema, array)
239
{
240
    let min_length = 0;
241
    let max_length = Infinity;
242
    let repeat_length = 1;
243
    let i = 0;
244
    const result = [];
245

    
246
    schema = [...schema];
247
    if (schema[schema.length - 2] === "maxlen") {
248
	max_length = schema[schema.length - 1];
249
	schema.splice(schema.length - 2);
250
    }
251

    
252
    if (schema[schema.length - 2] === "minlen") {
253
	min_length = schema[schema.length - 1];
254
	schema.splice(schema.length - 2);
255
    }
256

    
257
    if (["repeat", "repeatfull"].includes(schema[schema.length - 2]))
258
	repeat_length = schema.pop();
259
    if (repeat_length < 1) {
260
	invalid_schema = true;
261
	throw error_message('specifies invalid "${schema[schema.length - 2]}" (should be number greater than 1)');
262
    }
263
    if (["repeat", "repeatfull"].includes(schema[schema.length - 1])) {
264
	var repeat_directive = schema.pop();
265
	var repeat = schema.splice(schema.length - repeat_length);
266
    } else if (schema.length !== array.length) {
267
	throw error_message(`does not have exactly ${schema.length} items`);
268
    }
269

    
270
    if (repeat_directive === "repeatfull" &&
271
	(array.length - schema.length) % repeat_length !== 0)
272
	throw error_message(`does not contain a full number of item group repetitions`);
273

    
274
    if (array.length < min_length)
275
	throw error_message(`has less than ${min_length} element${min_length === 1 ? "" : "s"}`);
276

    
277
    if (array.length > max_length)
278
	throw error_message(`has more than ${max_length} element${max_length === 1 ? "" : "s"}`);
279

    
280
    for (const item of array) {
281
	if (i >= schema.length) {
282
	    i = 0;
283
	    schema = repeat;
284
	}
285

    
286
	error_path.push(`[${i}]`);
287
	const sanitized = sanitize_unknown(schema[i], item);
288
	if (sanitized !== discard)
289
	    result.push(sanitized);
290
	error_path.pop();
291

    
292
	i++;
293
    }
294

    
295
    return result;
296
}
297

    
298
function sanitize_regex(schema, string)
299
{
300
    if (schema.test(string))
301
	return string;
302

    
303
    throw error_message(`does not match regex ${schema}`);
304
}
305

    
306
const string_spec_regex = /^string(:(.*))?$/;
307

    
308
function sanitize_string(schema, string)
309
{
310
    const regex = string_spec_regex.exec(schema)[2];
311

    
312
    if (regex === undefined)
313
	return string;
314

    
315
    return sanitize_regex(new RegExp(regex), string);
316
}
317

    
318
function sanitize_object(schema, object)
319
{
320
    const result = {};
321

    
322
    for (let [key, entry_schema] of Object.entries(schema)) {
323
	error_path.push(key_error_path_segment(key));
324
	sanitize_object_entry(result, key, entry_schema, object);
325
	error_path.pop();
326
    }
327

    
328
    return result;
329
}
330

    
331
function sanitize_object_entry(result, key, entry_schema, object)
332
{
333
    let optional = false;
334
    let has_default = false;
335
    let _default = undefined;
336

    
337
    if (Array.isArray(entry_schema) && entry_schema.length > 1) {
338
	if (entry_schema[0] === "optional") {
339
	    optional = true;
340
	    entry_schema = [...entry_schema].splice(1);
341

    
342
	    const idx_def = entry_schema.length - (entry_schema.length & 1) - 1;
343
	    if (entry_schema[idx_def] === "default") {
344
		has_default = true;
345
		_default = entry_schema[idx_def + 1];
346
		entry_schema.splice(idx_def);
347
	    } else if ((entry_schema.length & 1) !== 1) {
348
		invalid_schema = true;
349
		throw error_message("was not understood");
350
	    }
351

    
352
	    if (entry_schema.length < 2)
353
		entry_schema = entry_schema[0];
354
	}
355
    }
356

    
357
    let unsanitized_value = object[key];
358
    if (unsanitized_value === undefined) {
359
	if (!optional)
360
	    throw error_message("is missing");
361

    
362
	if (has_default)
363
	    result[key] = _default;
364

    
365
	return;
366
    }
367

    
368
    const sanitized = sanitize_unknown(entry_schema, unsanitized_value);
369
    if (sanitized !== discard)
370
	result[key] = sanitized;
371
}
372

    
373
function take_literal(schema, item)
374
{
375
    return item;
376
}
377

    
378
/*
379
 * This function is used like a symbol. Other parts of code do sth like
380
 * `item === discard` to check if item was returned by this function.
381
 */
382
function discard(schema, item)
383
{
384
    return discard;
385
}
386

    
387
/*
388
 * The following are some helper functions to categorize various
389
 * schema item specifiers (used in the array below).
390
 */
391

    
392
function is_genobj_spec(item)
393
{
394
    return Array.isArray(item) && item[1] === "matchentry";
395
}
396

    
397
function is_regex(item)
398
{
399
    return typeof item === "object" && typeof item.test === "function";
400
}
401

    
402
function is_string_spec(item)
403
{
404
    return typeof item === "string" && string_spec_regex.test(item);
405
}
406

    
407
function is_object(item)
408
{
409
    return typeof item === "object";
410
}
411

    
412
function eq(what)
413
{
414
    return i => i === what;
415
}
416

    
417
/* Array and null checks must go before object check. */
418
const checks = [
419
    [is_genobj_spec, is_object,                   sanitize_genobj, "an object"],
420
    [Array.isArray,  Array.isArray,               sanitize_array,  "an array"],
421
    [eq(null),       i => i === null,             take_literal,    "null"],
422
    [is_regex,       i => typeof i === "string",  sanitize_regex,  "a string"],
423
    [is_string_spec, i => typeof i === "string",  sanitize_string, "a string"],
424
    [is_object,      is_object,                   sanitize_object, "an object"],
425
    [eq("number"),   i => typeof i === "number",  take_literal,    "a number"],
426
    [eq("boolean"),  i => typeof i === "boolean", take_literal,    "a boolean"],
427
    [eq("anything"), i => true,                   take_literal,    "dummy"],
428
    [eq("discard"),  i => true,                   discard,         "dummy"]
429
];
430

    
431
/*
432
 * EXPORTS_START
433
 * EXPORT parse_json_with_schema
434
 * EXPORTS_END
435
 */
(9-9/16)