Project

General

Profile

Download (9.73 KB) Statistics
| Branch: | Tag: | Revision:

haketilo / common / sanitize_JSON.js @ 2fa41a54

1
/**
2
 * part of Hachette
3
 * Powerful, full-blown format enforcer for externally-obtained JSON
4
 *
5
 * Copyright (C) 2021 Wojtek Kosior
6
 * Redistribution terms are gathered in the `copyright' file.
7
 */
8

    
9
var error_path;
10
var invalid_schema;
11

    
12
function parse_json_with_schema(schema, json_string)
13
{
14
    error_path = [];
15
    invalid_schema = false;
16

    
17
    try {
18
	return sanitize_unknown(schema, JSON.parse(json_string));
19
    } catch (e) {
20
	throw `Invalid JSON${invalid_schema ? " schema" : ""}: ${e}.`;
21
    } finally {
22
	/* Allow garbage collection. */
23
	error_path = undefined;
24
    }
25
}
26

    
27
function error_message(cause)
28
{
29
    return `object${error_path.join("")} ${cause}`;
30
}
31

    
32
function sanitize_unknown(schema, item)
33
{
34
    let error_msg = undefined;
35
    let schema_options = [];
36
    let has_default = false;
37
    let _default = undefined;
38

    
39
    if (!Array.isArray(schema) || schema[1] === "matchentry" ||
40
	schema.length < 2 || !["ordefault", "or"].includes(schema))
41
	return sanitize_unknown_no_alternatives(schema, item);
42

    
43
    if ((schema.length & 1) !== 1) {
44
	invalid_schema = true;
45
	throw error_message("was not understood");
46
    }
47

    
48
    for (let i = 0; i < schema.length; i++) {
49
	if ((i & 1) !== 1) {
50
	    schema_options.push(schema[i]);
51
	    continue;
52
	}
53

    
54
	if (schema[i] === "or")
55
	    continue;
56
	if (schema[i] === "ordefault" && schema.length === i + 2) {
57
	    has_default = true;
58
	    _default = schema[i + 1];
59
	    break;
60
	}
61

    
62
	invalid_schema = true;
63
	throw error_message("was not understood");
64
    }
65

    
66
    for (const schema_option of schema_options) {
67
	try {
68
	    return sanitize_unknown_no_alternatives(schema_option, item);
69
	} catch (e) {
70
	    if (invalid_schema)
71
		throw e;
72

    
73
	    if (has_default)
74
		continue;
75

    
76
	    if (error_msg === undefined)
77
		error_msg = e;
78
	    else
79
		error_msg = `${error_msg}, or ${e}`;
80
	}
81
    }
82

    
83
    if (has_default)
84
	return _default;
85

    
86
    throw error_msg;
87
}
88

    
89
function sanitize_unknown_no_alternatives(schema, item)
90
{
91
    for (const [schema_check, item_check, sanitizer, type_name] of checks) {
92
	if (schema_check(schema)) {
93
	    if (item_check(item))
94
		return sanitizer(schema, item);
95
	    throw error_message(`should be ${type_name} but is not`);
96
	}
97
    }
98

    
99
    invalid_schema = true;
100
    throw error_message("was not understood");
101
}
102

    
103
function key_error_path_segment(key)
104
{
105
    return /^[a-zA-Z_][a-zA-Z_0-9]*$/.exec(key) ?
106
	`.${key}` : `[${JSON.stringify(key)}]`;
107
}
108

    
109
/*
110
 * Generic object - one that can contain arbitrary keys (in addition to ones
111
 * specified explicitly in the schema).
112
 */
113
function sanitize_genobj(schema, object)
114
{
115
    let max_matched_entries = Infinity;
116
    let min_matched_entries = 0;
117
    let matched_entries = 0;
118
    const entry_schemas = [];
119
    schema = [...schema];
120

    
121
    if (schema[2] === "minentries") {
122
	if (schema.length < 4) {
123
	    invalid_schema = true;
124
	    throw error_message("was not understood");
125
	}
126

    
127
	min_matched_entries = schema[3];
128
	schema.splice(2, 2);
129
    }
130

    
131
    if (min_matched_entries < 0) {
132
	invalid_schema = true;
133
	throw error_message('specifies invalid "minentries" (should be a non-negative number)');
134
    }
135

    
136
    if (schema[2] === "maxentries") {
137
	if (schema.length < 4) {
138
	    invalid_schema = true;
139
	    throw error_message("was not understood");
140
	}
141

    
142
	max_matched_entries = schema[3];
143
	schema.splice(2, 2);
144
    }
145

    
146
    if (max_matched_entries < 0) {
147
	invalid_schema = true;
148
	throw error_message('specifies invalid "maxentries" (should be a non-negative number)');
149
    }
150

    
151
    while (schema.length > 2) {
152
	let regex = /.+/;
153

    
154
	if (schema.length > 3) {
155
	    regex = schema[2];
156
	    schema.splice(2, 1);
157
	}
158

    
159
	if (typeof regex === "string")
160
	    regex = new RegExp(regex);
161

    
162
	entry_schemas.push([regex, schema[2]]);
163
	schema.splice(2, 1);
164
    }
165

    
166
    const result = sanitize_object(schema[0], object);
167

    
168
    for (const [key, entry] of Object.entries(object)) {
169
	if (result.hasOwnProperty(key))
170
	    continue;
171

    
172
	matched_entries += 1;
173
	if (matched_entries > max_matched_entries)
174
	    throw error_message(`has more than ${max_matched_entries} matched entr${max_matched_entries === 1 ? "y" : "ies"}`);
175

    
176
	error_path.push(key_error_path_segment(key));
177

    
178
	let match = false;
179
	for (const [key_regex, entry_schema] of entry_schemas) {
180
	    if (!key_regex.exec(key))
181
		continue;
182

    
183
	    match = true;
184

    
185
	    sanitize_object_entry(result, key, entry_schema, object);
186
	    break;
187
	}
188

    
189
	if (!match) {
190
	    const regex_list = entry_schemas.map(i => i[0]).join(", ");
191
	    throw error_message(`does not match any of key regexes: [${regex_list}]`);
192
	}
193

    
194
	error_path.pop();
195
    }
196

    
197
    if (matched_entries < min_matched_entries)
198
	throw error_message(`has less than ${min_matched_entries} matched entr${min_matched_entries === 1 ? "y" : "ies"}`);
199

    
200
    return result;
201
}
202

    
203
function sanitize_array(schema, array)
204
{
205
    let min_length = 0;
206
    let max_length = Infinity;
207
    let repeat_length = 1;
208
    let i = 0;
209
    const result = [];
210

    
211
    schema = [...schema];
212
    if (schema[schema.length - 2] === "maxlen") {
213
	max_length = schema[schema.length - 1];
214
	schema.splice(schema.length - 2);
215
    }
216

    
217
    if (schema[schema.length - 2] === "minlen") {
218
	min_length = schema[schema.length - 1];
219
	schema.splice(schema.length - 2);
220
    }
221

    
222
    if (["repeat", "repeatfull"].includes(schema[schema.length - 2]))
223
	repeat_length = schema.pop();
224
    if (repeat_length < 1) {
225
	invalid_schema = true;
226
	throw error_message('specifies invalid "${schema[schema.length - 2]}" (should be number greater than 1)');
227
    }
228
    if (["repeat", "repeatfull"].includes(schema[schema.length - 1])) {
229
	var repeat_directive = schema.pop();
230
	var repeat = schema.splice(schema.length - repeat_length);
231
    } else if (schema.length !== array.length) {
232
	throw error_message(`does not have exactly ${schema.length} items`);
233
    }
234

    
235
    if (repeat_directive === "repeatfull" &&
236
	(array.length - schema.length) % repeat_length !== 0)
237
	throw error_message(`does not contain a full number of item group repetitions`);
238

    
239
    if (array.length < min_length)
240
	throw error_message(`has less than ${min_length} element${min_length === 1 ? "" : "s"}`);
241

    
242
    if (array.length > max_length)
243
	throw error_message(`has more than ${max_length} element${max_length === 1 ? "" : "s"}`);
244

    
245
    for (const item of array) {
246
	if (i >= schema.length) {
247
	    i = 0;
248
	    schema = repeat;
249
	}
250

    
251
	error_path.push(`[${i}]`);
252
	const sanitized = sanitize_unknown(schema[i], item);
253
	if (sanitized !== discard)
254
	    result.push(sanitized);
255
	error_path.pop();
256

    
257
	i++;
258
    }
259

    
260
    return result;
261
}
262

    
263
function sanitize_regex(schema, string)
264
{
265
    if (schema.test(string))
266
	return string;
267

    
268
    throw error_message(`does not match regex ${schema}`);
269
}
270

    
271
const string_spec_regex = /^string(:(.*))?$/;
272

    
273
function sanitize_string(schema, string)
274
{
275
    const regex = string_spec_regex.exec(schema)[2];
276

    
277
    if (regex === undefined)
278
	return string;
279

    
280
    return sanitize_regex(new RegExp(regex), string);
281
}
282

    
283
function sanitize_object(schema, object)
284
{
285
    const result = {};
286

    
287
    for (let [key, entry_schema] of Object.entries(schema)) {
288
	error_path.push(key_error_path_segment(key));
289
	sanitize_object_entry(result, key, entry_schema, object);
290
	error_path.pop();
291
    }
292

    
293
    return result;
294
}
295

    
296
function sanitize_object_entry(result, key, entry_schema, object)
297
{
298
    let optional = false;
299
    let has_default = false;
300
    let _default = undefined;
301

    
302
    if (Array.isArray(entry_schema) && entry_schema.length > 1) {
303
	if (entry_schema[0] === "optional") {
304
	    optional = true;
305
	    entry_schema = [...entry_schema].splice(1);
306

    
307
	    const idx_def = entry_schema.length - (entry_schema.length & 1) - 1;
308
	    if (entry_schema[idx_def] === "default") {
309
		has_default = true;
310
		_default = entry_schema[idx_def + 1];
311
		entry_schema.splice(idx_def);
312
	    } else if ((entry_schema.length & 1) !== 1) {
313
		invalid_schema = true;
314
		throw error_message("was not understood");
315
	    }
316

    
317
	    if (entry_schema.length < 2)
318
		entry_schema = entry_schema[0];
319
	}
320
    }
321

    
322
    let unsanitized_value = object[key];
323
    if (unsanitized_value === undefined) {
324
	if (!optional)
325
	    throw error_message("is missing");
326

    
327
	if (has_default)
328
	    result[key] = _default;
329

    
330
	return;
331
    }
332

    
333
    const sanitized = sanitize_unknown(entry_schema, unsanitized_value);
334
    if (sanitized !== discard)
335
	result[key] = sanitized;
336
}
337

    
338
function take_literal(schema, item)
339
{
340
    return item;
341
}
342

    
343
/*
344
 * This function is used like a symbol. Other parts of code do sth like
345
 * `item === discard` to check if item was returned by this function.
346
 */
347
function discard(schema, item)
348
{
349
    return discard;
350
}
351

    
352
/*
353
 * The following are some helper functions to categorize various
354
 * schema item specifiers (used in the array below).
355
 */
356

    
357
function is_genobj_spec(item)
358
{
359
    return Array.isArray(item) && item[1] === "matchentry";
360
}
361

    
362
function is_regex(item)
363
{
364
    return typeof item === "object" && typeof item.test === "function";
365
}
366

    
367
function is_string_spec(item)
368
{
369
    return typeof item === "string" && string_spec_regex.test(item);
370
}
371

    
372
function is_object(item)
373
{
374
    return typeof item === "object";
375
}
376

    
377
function eq(what)
378
{
379
    return i => i === what;
380
}
381

    
382
/* Array and null checks must go before object check. */
383
const checks = [
384
    [is_genobj_spec, is_object,                   sanitize_genobj, "an object"],
385
    [Array.isArray,  Array.isArray,               sanitize_array,  "an array"],
386
    [eq(null),       i => i === null,             take_literal,    "null"],
387
    [is_regex,       i => typeof i === "string",  sanitize_regex,  "a string"],
388
    [is_string_spec, i => typeof i === "string",  sanitize_string, "a string"],
389
    [is_object,      is_object,                   sanitize_object, "an object"],
390
    [eq("number"),   i => typeof i === "number",  take_literal,    "a number"],
391
    [eq("boolean"),  i => typeof i === "boolean", take_literal,    "a boolean"],
392
    [eq("anything"), i => true,                   take_literal,    "dummy"],
393
    [eq("discard"),  i => true,                   discard,         "dummy"]
394
];
395

    
396
/*
397
 * EXPORTS_START
398
 * EXPORT parse_json_with_schema
399
 * EXPORTS_END
400
 */
(9-9/12)