Project

General

Profile

« Previous | Next » 

Revision 64afd5b9

Added by koszko about 2 years ago

provide a facility to sanitize externally-obtained JSON

View differences:

common/sanitize_JSON.js
1
/**
2
 * part of Hachette
3
 * Powerful, full-blown format enforcer for externally-obtained JSON
4
 *
5
 * Copyright (C) 2021 Wojtek Kosior
6
 * Redistribution terms are gathered in the `copyright' file.
7
 */
8

  
9
var error_path;
10
var invalid_schema;
11

  
12
function parse_json_with_schema(schema, json_string)
13
{
14
    error_path = [];
15
    invalid_schema = false;
16

  
17
    try {
18
	return sanitize_unknown(schema, JSON.parse(json_string));
19
    } catch (e) {
20
	throw `Invalid JSON${invalid_schema ? " schema" : ""}: ${e}.`;
21
    } finally {
22
	/* Allow garbage collection. */
23
	error_path = undefined;
24
    }
25
}
26

  
27
function error_message(cause)
28
{
29
    return `object${error_path.join("")} ${cause}`;
30
}
31

  
32
function sanitize_unknown(schema, item)
33
{
34
    console.log(`sanitize_unknown ${JSON.stringify(schema)}`);
35
    let error_msg = undefined;
36
    let schema_options = [];
37
    let has_default = false;
38
    let _default = undefined;
39

  
40
    if (!Array.isArray(schema) || schema[1] === "matchentry" ||
41
	schema.length < 2 || !["ordefault", "or"].includes(schema))
42
	return sanitize_unknown_no_alternatives(schema, item);
43

  
44
    if ((schema.length & 1) !== 1) {
45
	invalid_schema = true;
46
	throw error_message("was not understood");
47
    }
48

  
49
    for (let i = 0; i < schema.length; i++) {
50
	if ((i & 1) !== 1) {
51
	    schema_options.push(schema[i]);
52
	    continue;
53
	}
54

  
55
	if (schema[i] === "or")
56
	    continue;
57
	if (schema[i] === "ordefault" && schema.length === i + 2) {
58
	    has_default = true;
59
	    _default = schema[i + 1];
60
	    break;
61
	}
62

  
63
	invalid_schema = true;
64
	throw error_message("was not understood");
65
    }
66

  
67
    for (const schema_option of schema_options) {
68
	try {
69
	    return sanitize_unknown_no_alternatives(schema_option, item);
70
	} catch (e) {
71
	    if (invalid_schema)
72
		throw e;
73

  
74
	    if (has_default)
75
		continue;
76

  
77
	    if (error_msg === undefined)
78
		error_msg = e;
79
	    else
80
		error_msg = `${error_msg}, or ${e}`;
81
	}
82
    }
83

  
84
    if (has_default)
85
	return _default;
86

  
87
    throw error_msg;
88
}
89

  
90
function sanitize_unknown_no_alternatives(schema, item)
91
{
92
    console.log(`sanitize_unknown_no_alternatives ${JSON.stringify(schema)}`);
93
    for (const [schema_check, item_check, sanitizer, type_name] of checks) {
94
	console.log(`checking ${type_name}`);
95
	if (schema_check(schema)) {
96
	    if (item_check(item))
97
		return sanitizer(schema, item);
98
	    throw error_message(`should be ${type_name} but is not`);
99
	}
100
    }
101

  
102
    invalid_schema = true;
103
    throw error_message("was not understood");
104
}
105

  
106
function key_error_path_segment(key)
107
{
108
    return /^[a-zA-Z_][a-zA-Z_0-9]*$/.exec(key) ?
109
	`.${key}` : `[${JSON.stringify(key)}]`;
110
}
111

  
112
/*
113
 * Generic object - one that can contain arbitrary keys (in addition to ones
114
 * specified explicitly in the schema).
115
 */
116
function sanitize_genobj(schema, object)
117
{
118
    let max_matched_entries = Infinity;
119
    let min_matched_entries = 0;
120
    let matched_entries = 0;
121
    const entry_schemas = [];
122
    schema = [...schema];
123

  
124
    if (schema[2] === "minentries") {
125
	if (schema.length < 4) {
126
	    invalid_schema = true;
127
	    throw error_message("was not understood");
128
	}
129

  
130
	min_matched_entries = schema[3];
131
	schema.splice(2, 2);
132
    }
133

  
134
    if (min_matched_entries < 0) {
135
	invalid_schema = true;
136
	throw error_message('specifies invalid "minentries" (should be a non-negative number)');
137
    }
138

  
139
    if (schema[2] === "maxentries") {
140
	if (schema.length < 4) {
141
	    invalid_schema = true;
142
	    throw error_message("was not understood");
143
	}
144

  
145
	max_matched_entries = schema[3];
146
	schema.splice(2, 2);
147
    }
148

  
149
    if (max_matched_entries < 0) {
150
	invalid_schema = true;
151
	throw error_message('specifies invalid "maxentries" (should be a non-negative number)');
152
    }
153

  
154
    while (schema.length > 2) {
155
	let regex = /.+/;
156

  
157
	if (schema.length > 3) {
158
	    regex = schema[2];
159
	    schema.splice(2, 1);
160
	}
161

  
162
	if (typeof regex === "string")
163
	    regex = new RegExp(regex);
164

  
165
	entry_schemas.push([regex, schema[2]]);
166
	schema.splice(2, 1);
167
    }
168

  
169
    const result = sanitize_object(schema[0], object);
170

  
171
    for (const [key, entry] of Object.entries(object)) {
172
	if (result.hasOwnProperty(key))
173
	    continue;
174

  
175
	matched_entries += 1;
176
	if (matched_entries > max_matched_entries)
177
	    throw error_message(`has more than ${max_matched_entries} matched entr${max_matched_entries === 1 ? "y" : "ies"}`);
178

  
179
	error_path.push(key_error_path_segment(key));
180

  
181
	let match = false;
182
	for (const [key_regex, entry_schema] of entry_schemas) {
183
	    if (!key_regex.exec(key))
184
		continue;
185

  
186
	    match = true;
187

  
188
	    sanitize_object_entry(result, key, entry_schema, object);
189
	    break;
190
	}
191

  
192
	if (!match) {
193
	    const regex_list = entry_schemas.map(i => i[0]).join(", ");
194
	    throw error_message(`does not match any of key regexes: [${regex_list}]`);
195
	}
196

  
197
	error_path.pop();
198
    }
199

  
200
    if (matched_entries < min_matched_entries)
201
	throw error_message(`has less than ${min_matched_entries} matched entr${min_matched_entries === 1 ? "y" : "ies"}`);
202

  
203
    return result;
204
}
205

  
206
function sanitize_array(schema, array)
207
{
208
    console.log(`sanitize_array ${JSON.stringify(schema)}`);
209
    let min_length = 0;
210
    let max_length = Infinity;
211
    let repeat_length = 1;
212
    let i = 0;
213
    const result = [];
214

  
215
    schema = [...schema];
216
    if (schema[schema.length - 2] === "maxlen") {
217
	max_length = schema[schema.length - 1];
218
	schema.splice(schema.length - 2);
219
    }
220

  
221
    if (schema[schema.length - 2] === "minlen") {
222
	min_length = schema[schema.length - 1];
223
	schema.splice(schema.length - 2);
224
    }
225

  
226
    if (["repeat", "repeatfull"].includes(schema[schema.length - 2]))
227
	repeat_length = schema.pop();
228
    if (repeat_length < 1) {
229
	invalid_schema = true;
230
	throw error_message('specifies invalid "${schema[schema.length - 2]}" (should be number greater than 1)');
231
    }
232
    if (["repeat", "repeatfull"].includes(schema[schema.length - 1])) {
233
	var repeat_directive = schema.pop();
234
	repeat = schema.splice(schema.length - repeat_length);
235
    } else if (schema.length !== array.length) {
236
	throw error_message(`does not not have exactly ${schema.length} items`);
237
    }
238

  
239
    if (repeat_directive === "repeatfull" &&
240
	(array.length - schema.length) % repeat_length !== 0)
241
	throw error_message(`does not not contain a full number of item group repetitions`);
242

  
243
    if (array.length < min_length)
244
	throw error_message(`has less than ${min_length} element${min_length === 1 ? "" : "s"}`);
245

  
246
    if (array.length > max_length)
247
	throw error_message(`has more than ${max_length} element${max_length === 1 ? "" : "s"}`);
248

  
249
    console.log(schema, repeat);
250

  
251
    for (const item of array) {
252
	if (i >= schema.length) {
253
	    i = 0;
254
	    schema = repeat;
255
	}
256

  
257
	error_path.push(`[${i}]`);
258
	const sanitized = sanitize_unknown(schema[i], item);
259
	if (sanitized !== discard)
260
	    result.push(sanitized);
261
	error_path.pop();
262

  
263
	i++;
264
    }
265

  
266
    return result;
267
}
268

  
269
function sanitize_regex(schema, string)
270
{
271
    console.log(`sanitize_regex ${schema}`);
272
    if (schema.test(string))
273
	return string;
274

  
275
    throw error_message(`does not match regex ${schema}`);
276
}
277

  
278
const string_spec_regex = /^string(:(.*))?$/;
279

  
280
function sanitize_string(schema, string)
281
{
282
    console.log(`sanitize_string ${JSON.stringify(schema)}`);
283
    const regex = string_spec_regex.exec(schema)[2];
284

  
285
    if (regex === undefined)
286
	return string;
287

  
288
    return sanitize_regex(new RegExp(regex), string);
289
}
290

  
291
function sanitize_object(schema, object)
292
{
293
    console.log(`sanitize_object ${JSON.stringify(schema)}`);
294
    const result = {};
295

  
296
    for (let [key, entry_schema] of Object.entries(schema)) {
297
	error_path.push(key_error_path_segment(key));
298
	sanitize_object_entry(result, key, entry_schema, object);
299
	error_path.pop();
300
    }
301

  
302
    return result;
303
}
304

  
305
function sanitize_object_entry(result, key, entry_schema, object)
306
{
307
    console.log(`sanitize_object_entry ${JSON.stringify(entry_schema)}`);
308
    let optional = false;
309
    let has_default = false;
310
    let _default = undefined;
311

  
312
    if (Array.isArray(entry_schema) && entry_schema.length > 1) {
313
	if (entry_schema[0] === "optional") {
314
	    optional = true;
315
	    entry_schema = [...entry_schema].splice(1);
316

  
317
	    const idx_def = entry_schema.length - (entry_schema.length & 1) - 1;
318
	    if (entry_schema[idx_def] === "default") {
319
		has_default = true;
320
		_default = entry_schema[idx_def + 1];
321
		entry_schema.splice(idx_def);
322
	    } else if ((entry_schema.length & 1) !== 1) {
323
		invalid_schema = true;
324
		throw error_message("was not understood");
325
	    }
326

  
327
	    if (entry_schema.length < 2)
328
		entry_schema = entry_schema[0];
329
	}
330
    }
331

  
332
    let unsanitized_value = object[key];
333
    if (unsanitized_value === undefined) {
334
	if (!optional)
335
	    throw error_message("is missing");
336

  
337
	if (has_default)
338
	    result[key] = _default;
339

  
340
	return;
341
    }
342

  
343
    const sanitized = sanitize_unknown(entry_schema, unsanitized_value);
344
    if (sanitized !== discard)
345
	result[key] = sanitized;
346
}
347

  
348
function take_literal(schema, item)
349
{
350
    console.log(`take_literal ${JSON.stringify(schema)}`);
351
    return item;
352
}
353

  
354
/*
355
 * This function is used like a symbol. Other parts of code do sth like
356
 * `item === discard` to check if item was returned by this function.
357
 */
358
function discard(schema, item)
359
{
360
    console.log(`discard ${JSON.stringify(schema)}`);
361
    return discard;
362
}
363

  
364
/*
365
 * The following are some helper functions to categorize various
366
 * schema item specifiers (used in the array below).
367
 */
368

  
369
function is_genobj_spec(item)
370
{
371
    return Array.isArray(item) && item[1] === "matchentry";
372
}
373

  
374
function is_regex(item)
375
{
376
    return typeof item === "object" && typeof item.test === "function";
377
}
378

  
379
function is_string_spec(item)
380
{
381
    return typeof item === "string" && string_spec_regex.test(item);
382
}
383

  
384
function is_object(item)
385
{
386
    return typeof item === "object";
387
}
388

  
389
function eq(what)
390
{
391
    return i => i === what;
392
}
393

  
394
/* Array and null checks must go before object check. */
395
const checks = [
396
    [is_genobj_spec, is_object,                   sanitize_genobj, "an object"],
397
    [Array.isArray,  Array.isArray,               sanitize_array,  "an array"],
398
    [eq(null),       i => i === null,             take_literal,    "null"],
399
    [is_regex,       i => typeof i === "string",  sanitize_regex,  "a string"],
400
    [is_string_spec, i => typeof i === "string",  sanitize_string, "a string"],
401
    [is_object,      is_object,                   sanitize_object, "an object"],
402
    [eq("number"),   i => typeof i === "number",  take_literal,    "a number"],
403
    [eq("boolean"),  i => typeof i === "boolean", take_literal,    "a boolean"],
404
    [eq("anything"), i => true,                   take_literal,    "dummy"],
405
    [eq("discard"),  i => true,                   discard,         "dummy"]
406
];
407

  
408
/*
409
 * EXPORTS_START
410
 * EXPORT parse_json_with_schema
411
 * EXPORTS_END
412
 */

Also available in: Unified diff