1
|
/**
|
2
|
* This file is part of Haketilo.
|
3
|
*
|
4
|
* Function: Powerful, full-blown format enforcer for externally-obtained JSON.
|
5
|
*
|
6
|
* Copyright (C) 2021 Wojtek Kosior
|
7
|
*
|
8
|
* This program is free software: you can redistribute it and/or modify
|
9
|
* it under the terms of the GNU General Public License as published by
|
10
|
* the Free Software Foundation, either version 3 of the License, or
|
11
|
* (at your option) any later version.
|
12
|
*
|
13
|
* This program is distributed in the hope that it will be useful,
|
14
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16
|
* GNU General Public License for more details.
|
17
|
*
|
18
|
* As additional permission under GNU GPL version 3 section 7, you
|
19
|
* may distribute forms of that code without the copy of the GNU
|
20
|
* GPL normally required by section 4, provided you include this
|
21
|
* license notice and, in case of non-source distribution, a URL
|
22
|
* through which recipients can access the Corresponding Source.
|
23
|
* If you modify file(s) with this exception, you may extend this
|
24
|
* exception to your version of the file(s), but you are not
|
25
|
* obligated to do so. If you do not wish to do so, delete this
|
26
|
* exception statement from your version.
|
27
|
*
|
28
|
* As a special exception to the GPL, any HTML file which merely
|
29
|
* makes function calls to this code, and for that purpose
|
30
|
* includes it by reference shall be deemed a separate work for
|
31
|
* copyright law purposes. If you modify this code, you may extend
|
32
|
* this exception to your version of the code, but you are not
|
33
|
* obligated to do so. If you do not wish to do so, delete this
|
34
|
* exception statement from your version.
|
35
|
*
|
36
|
* You should have received a copy of the GNU General Public License
|
37
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
38
|
*
|
39
|
* I, Wojtek Kosior, thereby promise not to sue for violation of this file's
|
40
|
* license. Although I request that you do not make use this code in a
|
41
|
* proprietary program, I am not going to enforce this in court.
|
42
|
*/
|
43
|
|
44
|
var error_path;
|
45
|
var invalid_schema;
|
46
|
|
47
|
function parse_json_with_schema(schema, json_string)
|
48
|
{
|
49
|
error_path = [];
|
50
|
invalid_schema = false;
|
51
|
|
52
|
try {
|
53
|
return sanitize_unknown(schema, JSON.parse(json_string));
|
54
|
} catch (e) {
|
55
|
throw `Invalid JSON${invalid_schema ? " schema" : ""}: ${e}.`;
|
56
|
} finally {
|
57
|
/* Allow garbage collection. */
|
58
|
error_path = undefined;
|
59
|
}
|
60
|
}
|
61
|
|
62
|
function error_message(cause)
|
63
|
{
|
64
|
return `object${error_path.join("")} ${cause}`;
|
65
|
}
|
66
|
|
67
|
function sanitize_unknown(schema, item)
|
68
|
{
|
69
|
let error_msg = undefined;
|
70
|
let schema_options = [];
|
71
|
let has_default = false;
|
72
|
let _default = undefined;
|
73
|
|
74
|
if (!Array.isArray(schema) || schema[1] === "matchentry" ||
|
75
|
schema.length < 2 || !["ordefault", "or"].includes(schema[1]))
|
76
|
return sanitize_unknown_no_alternatives(schema, item);
|
77
|
|
78
|
if ((schema.length & 1) !== 1) {
|
79
|
invalid_schema = true;
|
80
|
throw error_message("was not understood");
|
81
|
}
|
82
|
|
83
|
for (let i = 0; i < schema.length; i++) {
|
84
|
if ((i & 1) !== 1) {
|
85
|
schema_options.push(schema[i]);
|
86
|
continue;
|
87
|
}
|
88
|
|
89
|
if (schema[i] === "or")
|
90
|
continue;
|
91
|
if (schema[i] === "ordefault" && schema.length === i + 2) {
|
92
|
has_default = true;
|
93
|
_default = schema[i + 1];
|
94
|
break;
|
95
|
}
|
96
|
|
97
|
invalid_schema = true;
|
98
|
throw error_message("was not understood");
|
99
|
}
|
100
|
|
101
|
for (const schema_option of schema_options) {
|
102
|
try {
|
103
|
return sanitize_unknown_no_alternatives(schema_option, item);
|
104
|
} catch (e) {
|
105
|
if (invalid_schema)
|
106
|
throw e;
|
107
|
|
108
|
if (has_default)
|
109
|
continue;
|
110
|
|
111
|
if (error_msg === undefined)
|
112
|
error_msg = e;
|
113
|
else
|
114
|
error_msg = `${error_msg}, or ${e}`;
|
115
|
}
|
116
|
}
|
117
|
|
118
|
if (has_default)
|
119
|
return _default;
|
120
|
|
121
|
throw error_msg;
|
122
|
}
|
123
|
|
124
|
function sanitize_unknown_no_alternatives(schema, item)
|
125
|
{
|
126
|
for (const [schema_check, item_check, sanitizer, type_name] of checks) {
|
127
|
if (schema_check(schema)) {
|
128
|
if (item_check(item))
|
129
|
return sanitizer(schema, item);
|
130
|
throw error_message(`should be ${type_name} but is not`);
|
131
|
}
|
132
|
}
|
133
|
|
134
|
invalid_schema = true;
|
135
|
throw error_message("was not understood");
|
136
|
}
|
137
|
|
138
|
function key_error_path_segment(key)
|
139
|
{
|
140
|
return /^[a-zA-Z_][a-zA-Z_0-9]*$/.exec(key) ?
|
141
|
`.${key}` : `[${JSON.stringify(key)}]`;
|
142
|
}
|
143
|
|
144
|
/*
|
145
|
* Generic object - one that can contain arbitrary keys (in addition to ones
|
146
|
* specified explicitly in the schema).
|
147
|
*/
|
148
|
function sanitize_genobj(schema, object)
|
149
|
{
|
150
|
let max_matched_entries = Infinity;
|
151
|
let min_matched_entries = 0;
|
152
|
let matched_entries = 0;
|
153
|
const entry_schemas = [];
|
154
|
schema = [...schema];
|
155
|
|
156
|
if (schema[2] === "minentries") {
|
157
|
if (schema.length < 4) {
|
158
|
invalid_schema = true;
|
159
|
throw error_message("was not understood");
|
160
|
}
|
161
|
|
162
|
min_matched_entries = schema[3];
|
163
|
schema.splice(2, 2);
|
164
|
}
|
165
|
|
166
|
if (min_matched_entries < 0) {
|
167
|
invalid_schema = true;
|
168
|
throw error_message('specifies invalid "minentries" (should be a non-negative number)');
|
169
|
}
|
170
|
|
171
|
if (schema[2] === "maxentries") {
|
172
|
if (schema.length < 4) {
|
173
|
invalid_schema = true;
|
174
|
throw error_message("was not understood");
|
175
|
}
|
176
|
|
177
|
max_matched_entries = schema[3];
|
178
|
schema.splice(2, 2);
|
179
|
}
|
180
|
|
181
|
if (max_matched_entries < 0) {
|
182
|
invalid_schema = true;
|
183
|
throw error_message('specifies invalid "maxentries" (should be a non-negative number)');
|
184
|
}
|
185
|
|
186
|
while (schema.length > 2) {
|
187
|
let regex = /.+/;
|
188
|
|
189
|
if (schema.length > 3) {
|
190
|
regex = schema[2];
|
191
|
schema.splice(2, 1);
|
192
|
}
|
193
|
|
194
|
if (typeof regex === "string")
|
195
|
regex = new RegExp(regex);
|
196
|
|
197
|
entry_schemas.push([regex, schema[2]]);
|
198
|
schema.splice(2, 1);
|
199
|
}
|
200
|
|
201
|
const result = sanitize_object(schema[0], object);
|
202
|
|
203
|
for (const [key, entry] of Object.entries(object)) {
|
204
|
if (result.hasOwnProperty(key))
|
205
|
continue;
|
206
|
|
207
|
matched_entries += 1;
|
208
|
if (matched_entries > max_matched_entries)
|
209
|
throw error_message(`has more than ${max_matched_entries} matched entr${max_matched_entries === 1 ? "y" : "ies"}`);
|
210
|
|
211
|
error_path.push(key_error_path_segment(key));
|
212
|
|
213
|
let match = false;
|
214
|
for (const [key_regex, entry_schema] of entry_schemas) {
|
215
|
if (!key_regex.exec(key))
|
216
|
continue;
|
217
|
|
218
|
match = true;
|
219
|
|
220
|
sanitize_object_entry(result, key, entry_schema, object);
|
221
|
break;
|
222
|
}
|
223
|
|
224
|
if (!match) {
|
225
|
const regex_list = entry_schemas.map(i => i[0]).join(", ");
|
226
|
throw error_message(`does not match any of key regexes: [${regex_list}]`);
|
227
|
}
|
228
|
|
229
|
error_path.pop();
|
230
|
}
|
231
|
|
232
|
if (matched_entries < min_matched_entries)
|
233
|
throw error_message(`has less than ${min_matched_entries} matched entr${min_matched_entries === 1 ? "y" : "ies"}`);
|
234
|
|
235
|
return result;
|
236
|
}
|
237
|
|
238
|
function sanitize_array(schema, array)
|
239
|
{
|
240
|
let min_length = 0;
|
241
|
let max_length = Infinity;
|
242
|
let repeat_length = 1;
|
243
|
let i = 0;
|
244
|
const result = [];
|
245
|
|
246
|
schema = [...schema];
|
247
|
if (schema[schema.length - 2] === "maxlen") {
|
248
|
max_length = schema[schema.length - 1];
|
249
|
schema.splice(schema.length - 2);
|
250
|
}
|
251
|
|
252
|
if (schema[schema.length - 2] === "minlen") {
|
253
|
min_length = schema[schema.length - 1];
|
254
|
schema.splice(schema.length - 2);
|
255
|
}
|
256
|
|
257
|
if (["repeat", "repeatfull"].includes(schema[schema.length - 2]))
|
258
|
repeat_length = schema.pop();
|
259
|
if (repeat_length < 1) {
|
260
|
invalid_schema = true;
|
261
|
throw error_message('specifies invalid "${schema[schema.length - 2]}" (should be number greater than 1)');
|
262
|
}
|
263
|
if (["repeat", "repeatfull"].includes(schema[schema.length - 1])) {
|
264
|
var repeat_directive = schema.pop();
|
265
|
var repeat = schema.splice(schema.length - repeat_length);
|
266
|
} else if (schema.length !== array.length) {
|
267
|
throw error_message(`does not have exactly ${schema.length} items`);
|
268
|
}
|
269
|
|
270
|
if (repeat_directive === "repeatfull" &&
|
271
|
(array.length - schema.length) % repeat_length !== 0)
|
272
|
throw error_message(`does not contain a full number of item group repetitions`);
|
273
|
|
274
|
if (array.length < min_length)
|
275
|
throw error_message(`has less than ${min_length} element${min_length === 1 ? "" : "s"}`);
|
276
|
|
277
|
if (array.length > max_length)
|
278
|
throw error_message(`has more than ${max_length} element${max_length === 1 ? "" : "s"}`);
|
279
|
|
280
|
for (const item of array) {
|
281
|
if (i >= schema.length) {
|
282
|
i = 0;
|
283
|
schema = repeat;
|
284
|
}
|
285
|
|
286
|
error_path.push(`[${i}]`);
|
287
|
const sanitized = sanitize_unknown(schema[i], item);
|
288
|
if (sanitized !== discard)
|
289
|
result.push(sanitized);
|
290
|
error_path.pop();
|
291
|
|
292
|
i++;
|
293
|
}
|
294
|
|
295
|
return result;
|
296
|
}
|
297
|
|
298
|
function sanitize_regex(schema, string)
|
299
|
{
|
300
|
if (schema.test(string))
|
301
|
return string;
|
302
|
|
303
|
throw error_message(`does not match regex ${schema}`);
|
304
|
}
|
305
|
|
306
|
const string_spec_regex = /^string(:(.*))?$/;
|
307
|
|
308
|
function sanitize_string(schema, string)
|
309
|
{
|
310
|
const regex = string_spec_regex.exec(schema)[2];
|
311
|
|
312
|
if (regex === undefined)
|
313
|
return string;
|
314
|
|
315
|
return sanitize_regex(new RegExp(regex), string);
|
316
|
}
|
317
|
|
318
|
function sanitize_object(schema, object)
|
319
|
{
|
320
|
const result = {};
|
321
|
|
322
|
for (let [key, entry_schema] of Object.entries(schema)) {
|
323
|
error_path.push(key_error_path_segment(key));
|
324
|
sanitize_object_entry(result, key, entry_schema, object);
|
325
|
error_path.pop();
|
326
|
}
|
327
|
|
328
|
return result;
|
329
|
}
|
330
|
|
331
|
function sanitize_object_entry(result, key, entry_schema, object)
|
332
|
{
|
333
|
let optional = false;
|
334
|
let has_default = false;
|
335
|
let _default = undefined;
|
336
|
|
337
|
if (Array.isArray(entry_schema) && entry_schema.length > 1) {
|
338
|
if (entry_schema[0] === "optional") {
|
339
|
optional = true;
|
340
|
entry_schema = [...entry_schema].splice(1);
|
341
|
|
342
|
const idx_def = entry_schema.length - (entry_schema.length & 1) - 1;
|
343
|
if (entry_schema[idx_def] === "default") {
|
344
|
has_default = true;
|
345
|
_default = entry_schema[idx_def + 1];
|
346
|
entry_schema.splice(idx_def);
|
347
|
} else if ((entry_schema.length & 1) !== 1) {
|
348
|
invalid_schema = true;
|
349
|
throw error_message("was not understood");
|
350
|
}
|
351
|
|
352
|
if (entry_schema.length < 2)
|
353
|
entry_schema = entry_schema[0];
|
354
|
}
|
355
|
}
|
356
|
|
357
|
let unsanitized_value = object[key];
|
358
|
if (unsanitized_value === undefined) {
|
359
|
if (!optional)
|
360
|
throw error_message("is missing");
|
361
|
|
362
|
if (has_default)
|
363
|
result[key] = _default;
|
364
|
|
365
|
return;
|
366
|
}
|
367
|
|
368
|
const sanitized = sanitize_unknown(entry_schema, unsanitized_value);
|
369
|
if (sanitized !== discard)
|
370
|
result[key] = sanitized;
|
371
|
}
|
372
|
|
373
|
function take_literal(schema, item)
|
374
|
{
|
375
|
return item;
|
376
|
}
|
377
|
|
378
|
/*
|
379
|
* This function is used like a symbol. Other parts of code do sth like
|
380
|
* `item === discard` to check if item was returned by this function.
|
381
|
*/
|
382
|
function discard(schema, item)
|
383
|
{
|
384
|
return discard;
|
385
|
}
|
386
|
|
387
|
/*
|
388
|
* The following are some helper functions to categorize various
|
389
|
* schema item specifiers (used in the array below).
|
390
|
*/
|
391
|
|
392
|
function is_genobj_spec(item)
|
393
|
{
|
394
|
return Array.isArray(item) && item[1] === "matchentry";
|
395
|
}
|
396
|
|
397
|
function is_regex(item)
|
398
|
{
|
399
|
return typeof item === "object" && typeof item.test === "function";
|
400
|
}
|
401
|
|
402
|
function is_string_spec(item)
|
403
|
{
|
404
|
return typeof item === "string" && string_spec_regex.test(item);
|
405
|
}
|
406
|
|
407
|
function is_object(item)
|
408
|
{
|
409
|
return typeof item === "object";
|
410
|
}
|
411
|
|
412
|
function eq(what)
|
413
|
{
|
414
|
return i => i === what;
|
415
|
}
|
416
|
|
417
|
/* Array and null checks must go before object check. */
|
418
|
const checks = [
|
419
|
[is_genobj_spec, is_object, sanitize_genobj, "an object"],
|
420
|
[Array.isArray, Array.isArray, sanitize_array, "an array"],
|
421
|
[eq(null), i => i === null, take_literal, "null"],
|
422
|
[is_regex, i => typeof i === "string", sanitize_regex, "a string"],
|
423
|
[is_string_spec, i => typeof i === "string", sanitize_string, "a string"],
|
424
|
[is_object, is_object, sanitize_object, "an object"],
|
425
|
[eq("number"), i => typeof i === "number", take_literal, "a number"],
|
426
|
[eq("boolean"), i => typeof i === "boolean", take_literal, "a boolean"],
|
427
|
[eq("anything"), i => true, take_literal, "dummy"],
|
428
|
[eq("discard"), i => true, discard, "dummy"]
|
429
|
];
|
430
|
|
431
|
/*
|
432
|
* EXPORTS_START
|
433
|
* EXPORT parse_json_with_schema
|
434
|
* EXPORTS_END
|
435
|
*/
|