1
|
/**
|
2
|
* part of Hachette
|
3
|
* Powerful, full-blown format enforcer for externally-obtained JSON
|
4
|
*
|
5
|
* Copyright (C) 2021 Wojtek Kosior
|
6
|
* Redistribution terms are gathered in the `copyright' file.
|
7
|
*/
|
8
|
|
9
|
var error_path;
|
10
|
var invalid_schema;
|
11
|
|
12
|
function parse_json_with_schema(schema, json_string)
|
13
|
{
|
14
|
error_path = [];
|
15
|
invalid_schema = false;
|
16
|
|
17
|
try {
|
18
|
return sanitize_unknown(schema, JSON.parse(json_string));
|
19
|
} catch (e) {
|
20
|
throw `Invalid JSON${invalid_schema ? " schema" : ""}: ${e}.`;
|
21
|
} finally {
|
22
|
/* Allow garbage collection. */
|
23
|
error_path = undefined;
|
24
|
}
|
25
|
}
|
26
|
|
27
|
function error_message(cause)
|
28
|
{
|
29
|
return `object${error_path.join("")} ${cause}`;
|
30
|
}
|
31
|
|
32
|
function sanitize_unknown(schema, item)
|
33
|
{
|
34
|
let error_msg = undefined;
|
35
|
let schema_options = [];
|
36
|
let has_default = false;
|
37
|
let _default = undefined;
|
38
|
|
39
|
if (!Array.isArray(schema) || schema[1] === "matchentry" ||
|
40
|
schema.length < 2 || !["ordefault", "or"].includes(schema))
|
41
|
return sanitize_unknown_no_alternatives(schema, item);
|
42
|
|
43
|
if ((schema.length & 1) !== 1) {
|
44
|
invalid_schema = true;
|
45
|
throw error_message("was not understood");
|
46
|
}
|
47
|
|
48
|
for (let i = 0; i < schema.length; i++) {
|
49
|
if ((i & 1) !== 1) {
|
50
|
schema_options.push(schema[i]);
|
51
|
continue;
|
52
|
}
|
53
|
|
54
|
if (schema[i] === "or")
|
55
|
continue;
|
56
|
if (schema[i] === "ordefault" && schema.length === i + 2) {
|
57
|
has_default = true;
|
58
|
_default = schema[i + 1];
|
59
|
break;
|
60
|
}
|
61
|
|
62
|
invalid_schema = true;
|
63
|
throw error_message("was not understood");
|
64
|
}
|
65
|
|
66
|
for (const schema_option of schema_options) {
|
67
|
try {
|
68
|
return sanitize_unknown_no_alternatives(schema_option, item);
|
69
|
} catch (e) {
|
70
|
if (invalid_schema)
|
71
|
throw e;
|
72
|
|
73
|
if (has_default)
|
74
|
continue;
|
75
|
|
76
|
if (error_msg === undefined)
|
77
|
error_msg = e;
|
78
|
else
|
79
|
error_msg = `${error_msg}, or ${e}`;
|
80
|
}
|
81
|
}
|
82
|
|
83
|
if (has_default)
|
84
|
return _default;
|
85
|
|
86
|
throw error_msg;
|
87
|
}
|
88
|
|
89
|
function sanitize_unknown_no_alternatives(schema, item)
|
90
|
{
|
91
|
for (const [schema_check, item_check, sanitizer, type_name] of checks) {
|
92
|
if (schema_check(schema)) {
|
93
|
if (item_check(item))
|
94
|
return sanitizer(schema, item);
|
95
|
throw error_message(`should be ${type_name} but is not`);
|
96
|
}
|
97
|
}
|
98
|
|
99
|
invalid_schema = true;
|
100
|
throw error_message("was not understood");
|
101
|
}
|
102
|
|
103
|
function key_error_path_segment(key)
|
104
|
{
|
105
|
return /^[a-zA-Z_][a-zA-Z_0-9]*$/.exec(key) ?
|
106
|
`.${key}` : `[${JSON.stringify(key)}]`;
|
107
|
}
|
108
|
|
109
|
/*
|
110
|
* Generic object - one that can contain arbitrary keys (in addition to ones
|
111
|
* specified explicitly in the schema).
|
112
|
*/
|
113
|
function sanitize_genobj(schema, object)
|
114
|
{
|
115
|
let max_matched_entries = Infinity;
|
116
|
let min_matched_entries = 0;
|
117
|
let matched_entries = 0;
|
118
|
const entry_schemas = [];
|
119
|
schema = [...schema];
|
120
|
|
121
|
if (schema[2] === "minentries") {
|
122
|
if (schema.length < 4) {
|
123
|
invalid_schema = true;
|
124
|
throw error_message("was not understood");
|
125
|
}
|
126
|
|
127
|
min_matched_entries = schema[3];
|
128
|
schema.splice(2, 2);
|
129
|
}
|
130
|
|
131
|
if (min_matched_entries < 0) {
|
132
|
invalid_schema = true;
|
133
|
throw error_message('specifies invalid "minentries" (should be a non-negative number)');
|
134
|
}
|
135
|
|
136
|
if (schema[2] === "maxentries") {
|
137
|
if (schema.length < 4) {
|
138
|
invalid_schema = true;
|
139
|
throw error_message("was not understood");
|
140
|
}
|
141
|
|
142
|
max_matched_entries = schema[3];
|
143
|
schema.splice(2, 2);
|
144
|
}
|
145
|
|
146
|
if (max_matched_entries < 0) {
|
147
|
invalid_schema = true;
|
148
|
throw error_message('specifies invalid "maxentries" (should be a non-negative number)');
|
149
|
}
|
150
|
|
151
|
while (schema.length > 2) {
|
152
|
let regex = /.+/;
|
153
|
|
154
|
if (schema.length > 3) {
|
155
|
regex = schema[2];
|
156
|
schema.splice(2, 1);
|
157
|
}
|
158
|
|
159
|
if (typeof regex === "string")
|
160
|
regex = new RegExp(regex);
|
161
|
|
162
|
entry_schemas.push([regex, schema[2]]);
|
163
|
schema.splice(2, 1);
|
164
|
}
|
165
|
|
166
|
const result = sanitize_object(schema[0], object);
|
167
|
|
168
|
for (const [key, entry] of Object.entries(object)) {
|
169
|
if (result.hasOwnProperty(key))
|
170
|
continue;
|
171
|
|
172
|
matched_entries += 1;
|
173
|
if (matched_entries > max_matched_entries)
|
174
|
throw error_message(`has more than ${max_matched_entries} matched entr${max_matched_entries === 1 ? "y" : "ies"}`);
|
175
|
|
176
|
error_path.push(key_error_path_segment(key));
|
177
|
|
178
|
let match = false;
|
179
|
for (const [key_regex, entry_schema] of entry_schemas) {
|
180
|
if (!key_regex.exec(key))
|
181
|
continue;
|
182
|
|
183
|
match = true;
|
184
|
|
185
|
sanitize_object_entry(result, key, entry_schema, object);
|
186
|
break;
|
187
|
}
|
188
|
|
189
|
if (!match) {
|
190
|
const regex_list = entry_schemas.map(i => i[0]).join(", ");
|
191
|
throw error_message(`does not match any of key regexes: [${regex_list}]`);
|
192
|
}
|
193
|
|
194
|
error_path.pop();
|
195
|
}
|
196
|
|
197
|
if (matched_entries < min_matched_entries)
|
198
|
throw error_message(`has less than ${min_matched_entries} matched entr${min_matched_entries === 1 ? "y" : "ies"}`);
|
199
|
|
200
|
return result;
|
201
|
}
|
202
|
|
203
|
function sanitize_array(schema, array)
|
204
|
{
|
205
|
let min_length = 0;
|
206
|
let max_length = Infinity;
|
207
|
let repeat_length = 1;
|
208
|
let i = 0;
|
209
|
const result = [];
|
210
|
|
211
|
schema = [...schema];
|
212
|
if (schema[schema.length - 2] === "maxlen") {
|
213
|
max_length = schema[schema.length - 1];
|
214
|
schema.splice(schema.length - 2);
|
215
|
}
|
216
|
|
217
|
if (schema[schema.length - 2] === "minlen") {
|
218
|
min_length = schema[schema.length - 1];
|
219
|
schema.splice(schema.length - 2);
|
220
|
}
|
221
|
|
222
|
if (["repeat", "repeatfull"].includes(schema[schema.length - 2]))
|
223
|
repeat_length = schema.pop();
|
224
|
if (repeat_length < 1) {
|
225
|
invalid_schema = true;
|
226
|
throw error_message('specifies invalid "${schema[schema.length - 2]}" (should be number greater than 1)');
|
227
|
}
|
228
|
if (["repeat", "repeatfull"].includes(schema[schema.length - 1])) {
|
229
|
var repeat_directive = schema.pop();
|
230
|
var repeat = schema.splice(schema.length - repeat_length);
|
231
|
} else if (schema.length !== array.length) {
|
232
|
throw error_message(`does not have exactly ${schema.length} items`);
|
233
|
}
|
234
|
|
235
|
if (repeat_directive === "repeatfull" &&
|
236
|
(array.length - schema.length) % repeat_length !== 0)
|
237
|
throw error_message(`does not contain a full number of item group repetitions`);
|
238
|
|
239
|
if (array.length < min_length)
|
240
|
throw error_message(`has less than ${min_length} element${min_length === 1 ? "" : "s"}`);
|
241
|
|
242
|
if (array.length > max_length)
|
243
|
throw error_message(`has more than ${max_length} element${max_length === 1 ? "" : "s"}`);
|
244
|
|
245
|
for (const item of array) {
|
246
|
if (i >= schema.length) {
|
247
|
i = 0;
|
248
|
schema = repeat;
|
249
|
}
|
250
|
|
251
|
error_path.push(`[${i}]`);
|
252
|
const sanitized = sanitize_unknown(schema[i], item);
|
253
|
if (sanitized !== discard)
|
254
|
result.push(sanitized);
|
255
|
error_path.pop();
|
256
|
|
257
|
i++;
|
258
|
}
|
259
|
|
260
|
return result;
|
261
|
}
|
262
|
|
263
|
function sanitize_regex(schema, string)
|
264
|
{
|
265
|
if (schema.test(string))
|
266
|
return string;
|
267
|
|
268
|
throw error_message(`does not match regex ${schema}`);
|
269
|
}
|
270
|
|
271
|
const string_spec_regex = /^string(:(.*))?$/;
|
272
|
|
273
|
function sanitize_string(schema, string)
|
274
|
{
|
275
|
const regex = string_spec_regex.exec(schema)[2];
|
276
|
|
277
|
if (regex === undefined)
|
278
|
return string;
|
279
|
|
280
|
return sanitize_regex(new RegExp(regex), string);
|
281
|
}
|
282
|
|
283
|
function sanitize_object(schema, object)
|
284
|
{
|
285
|
const result = {};
|
286
|
|
287
|
for (let [key, entry_schema] of Object.entries(schema)) {
|
288
|
error_path.push(key_error_path_segment(key));
|
289
|
sanitize_object_entry(result, key, entry_schema, object);
|
290
|
error_path.pop();
|
291
|
}
|
292
|
|
293
|
return result;
|
294
|
}
|
295
|
|
296
|
function sanitize_object_entry(result, key, entry_schema, object)
|
297
|
{
|
298
|
let optional = false;
|
299
|
let has_default = false;
|
300
|
let _default = undefined;
|
301
|
|
302
|
if (Array.isArray(entry_schema) && entry_schema.length > 1) {
|
303
|
if (entry_schema[0] === "optional") {
|
304
|
optional = true;
|
305
|
entry_schema = [...entry_schema].splice(1);
|
306
|
|
307
|
const idx_def = entry_schema.length - (entry_schema.length & 1) - 1;
|
308
|
if (entry_schema[idx_def] === "default") {
|
309
|
has_default = true;
|
310
|
_default = entry_schema[idx_def + 1];
|
311
|
entry_schema.splice(idx_def);
|
312
|
} else if ((entry_schema.length & 1) !== 1) {
|
313
|
invalid_schema = true;
|
314
|
throw error_message("was not understood");
|
315
|
}
|
316
|
|
317
|
if (entry_schema.length < 2)
|
318
|
entry_schema = entry_schema[0];
|
319
|
}
|
320
|
}
|
321
|
|
322
|
let unsanitized_value = object[key];
|
323
|
if (unsanitized_value === undefined) {
|
324
|
if (!optional)
|
325
|
throw error_message("is missing");
|
326
|
|
327
|
if (has_default)
|
328
|
result[key] = _default;
|
329
|
|
330
|
return;
|
331
|
}
|
332
|
|
333
|
const sanitized = sanitize_unknown(entry_schema, unsanitized_value);
|
334
|
if (sanitized !== discard)
|
335
|
result[key] = sanitized;
|
336
|
}
|
337
|
|
338
|
function take_literal(schema, item)
|
339
|
{
|
340
|
return item;
|
341
|
}
|
342
|
|
343
|
/*
|
344
|
* This function is used like a symbol. Other parts of code do sth like
|
345
|
* `item === discard` to check if item was returned by this function.
|
346
|
*/
|
347
|
function discard(schema, item)
|
348
|
{
|
349
|
return discard;
|
350
|
}
|
351
|
|
352
|
/*
|
353
|
* The following are some helper functions to categorize various
|
354
|
* schema item specifiers (used in the array below).
|
355
|
*/
|
356
|
|
357
|
function is_genobj_spec(item)
|
358
|
{
|
359
|
return Array.isArray(item) && item[1] === "matchentry";
|
360
|
}
|
361
|
|
362
|
function is_regex(item)
|
363
|
{
|
364
|
return typeof item === "object" && typeof item.test === "function";
|
365
|
}
|
366
|
|
367
|
function is_string_spec(item)
|
368
|
{
|
369
|
return typeof item === "string" && string_spec_regex.test(item);
|
370
|
}
|
371
|
|
372
|
function is_object(item)
|
373
|
{
|
374
|
return typeof item === "object";
|
375
|
}
|
376
|
|
377
|
function eq(what)
|
378
|
{
|
379
|
return i => i === what;
|
380
|
}
|
381
|
|
382
|
/* Array and null checks must go before object check. */
|
383
|
const checks = [
|
384
|
[is_genobj_spec, is_object, sanitize_genobj, "an object"],
|
385
|
[Array.isArray, Array.isArray, sanitize_array, "an array"],
|
386
|
[eq(null), i => i === null, take_literal, "null"],
|
387
|
[is_regex, i => typeof i === "string", sanitize_regex, "a string"],
|
388
|
[is_string_spec, i => typeof i === "string", sanitize_string, "a string"],
|
389
|
[is_object, is_object, sanitize_object, "an object"],
|
390
|
[eq("number"), i => typeof i === "number", take_literal, "a number"],
|
391
|
[eq("boolean"), i => typeof i === "boolean", take_literal, "a boolean"],
|
392
|
[eq("anything"), i => true, take_literal, "dummy"],
|
393
|
[eq("discard"), i => true, discard, "dummy"]
|
394
|
];
|
395
|
|
396
|
/*
|
397
|
* EXPORTS_START
|
398
|
* EXPORT parse_json_with_schema
|
399
|
* EXPORTS_END
|
400
|
*/
|