Revision 64afd5b9
Added by koszko about 2 years ago
common/sanitize_JSON.js | ||
---|---|---|
1 |
/** |
|
2 |
* part of Hachette |
|
3 |
* Powerful, full-blown format enforcer for externally-obtained JSON |
|
4 |
* |
|
5 |
* Copyright (C) 2021 Wojtek Kosior |
|
6 |
* Redistribution terms are gathered in the `copyright' file. |
|
7 |
*/ |
|
8 |
|
|
9 |
var error_path; |
|
10 |
var invalid_schema; |
|
11 |
|
|
12 |
function parse_json_with_schema(schema, json_string) |
|
13 |
{ |
|
14 |
error_path = []; |
|
15 |
invalid_schema = false; |
|
16 |
|
|
17 |
try { |
|
18 |
return sanitize_unknown(schema, JSON.parse(json_string)); |
|
19 |
} catch (e) { |
|
20 |
throw `Invalid JSON${invalid_schema ? " schema" : ""}: ${e}.`; |
|
21 |
} finally { |
|
22 |
/* Allow garbage collection. */ |
|
23 |
error_path = undefined; |
|
24 |
} |
|
25 |
} |
|
26 |
|
|
27 |
function error_message(cause) |
|
28 |
{ |
|
29 |
return `object${error_path.join("")} ${cause}`; |
|
30 |
} |
|
31 |
|
|
32 |
function sanitize_unknown(schema, item) |
|
33 |
{ |
|
34 |
console.log(`sanitize_unknown ${JSON.stringify(schema)}`); |
|
35 |
let error_msg = undefined; |
|
36 |
let schema_options = []; |
|
37 |
let has_default = false; |
|
38 |
let _default = undefined; |
|
39 |
|
|
40 |
if (!Array.isArray(schema) || schema[1] === "matchentry" || |
|
41 |
schema.length < 2 || !["ordefault", "or"].includes(schema)) |
|
42 |
return sanitize_unknown_no_alternatives(schema, item); |
|
43 |
|
|
44 |
if ((schema.length & 1) !== 1) { |
|
45 |
invalid_schema = true; |
|
46 |
throw error_message("was not understood"); |
|
47 |
} |
|
48 |
|
|
49 |
for (let i = 0; i < schema.length; i++) { |
|
50 |
if ((i & 1) !== 1) { |
|
51 |
schema_options.push(schema[i]); |
|
52 |
continue; |
|
53 |
} |
|
54 |
|
|
55 |
if (schema[i] === "or") |
|
56 |
continue; |
|
57 |
if (schema[i] === "ordefault" && schema.length === i + 2) { |
|
58 |
has_default = true; |
|
59 |
_default = schema[i + 1]; |
|
60 |
break; |
|
61 |
} |
|
62 |
|
|
63 |
invalid_schema = true; |
|
64 |
throw error_message("was not understood"); |
|
65 |
} |
|
66 |
|
|
67 |
for (const schema_option of schema_options) { |
|
68 |
try { |
|
69 |
return sanitize_unknown_no_alternatives(schema_option, item); |
|
70 |
} catch (e) { |
|
71 |
if (invalid_schema) |
|
72 |
throw e; |
|
73 |
|
|
74 |
if (has_default) |
|
75 |
continue; |
|
76 |
|
|
77 |
if (error_msg === undefined) |
|
78 |
error_msg = e; |
|
79 |
else |
|
80 |
error_msg = `${error_msg}, or ${e}`; |
|
81 |
} |
|
82 |
} |
|
83 |
|
|
84 |
if (has_default) |
|
85 |
return _default; |
|
86 |
|
|
87 |
throw error_msg; |
|
88 |
} |
|
89 |
|
|
90 |
function sanitize_unknown_no_alternatives(schema, item) |
|
91 |
{ |
|
92 |
console.log(`sanitize_unknown_no_alternatives ${JSON.stringify(schema)}`); |
|
93 |
for (const [schema_check, item_check, sanitizer, type_name] of checks) { |
|
94 |
console.log(`checking ${type_name}`); |
|
95 |
if (schema_check(schema)) { |
|
96 |
if (item_check(item)) |
|
97 |
return sanitizer(schema, item); |
|
98 |
throw error_message(`should be ${type_name} but is not`); |
|
99 |
} |
|
100 |
} |
|
101 |
|
|
102 |
invalid_schema = true; |
|
103 |
throw error_message("was not understood"); |
|
104 |
} |
|
105 |
|
|
106 |
function key_error_path_segment(key) |
|
107 |
{ |
|
108 |
return /^[a-zA-Z_][a-zA-Z_0-9]*$/.exec(key) ? |
|
109 |
`.${key}` : `[${JSON.stringify(key)}]`; |
|
110 |
} |
|
111 |
|
|
112 |
/* |
|
113 |
* Generic object - one that can contain arbitrary keys (in addition to ones |
|
114 |
* specified explicitly in the schema). |
|
115 |
*/ |
|
116 |
function sanitize_genobj(schema, object) |
|
117 |
{ |
|
118 |
let max_matched_entries = Infinity; |
|
119 |
let min_matched_entries = 0; |
|
120 |
let matched_entries = 0; |
|
121 |
const entry_schemas = []; |
|
122 |
schema = [...schema]; |
|
123 |
|
|
124 |
if (schema[2] === "minentries") { |
|
125 |
if (schema.length < 4) { |
|
126 |
invalid_schema = true; |
|
127 |
throw error_message("was not understood"); |
|
128 |
} |
|
129 |
|
|
130 |
min_matched_entries = schema[3]; |
|
131 |
schema.splice(2, 2); |
|
132 |
} |
|
133 |
|
|
134 |
if (min_matched_entries < 0) { |
|
135 |
invalid_schema = true; |
|
136 |
throw error_message('specifies invalid "minentries" (should be a non-negative number)'); |
|
137 |
} |
|
138 |
|
|
139 |
if (schema[2] === "maxentries") { |
|
140 |
if (schema.length < 4) { |
|
141 |
invalid_schema = true; |
|
142 |
throw error_message("was not understood"); |
|
143 |
} |
|
144 |
|
|
145 |
max_matched_entries = schema[3]; |
|
146 |
schema.splice(2, 2); |
|
147 |
} |
|
148 |
|
|
149 |
if (max_matched_entries < 0) { |
|
150 |
invalid_schema = true; |
|
151 |
throw error_message('specifies invalid "maxentries" (should be a non-negative number)'); |
|
152 |
} |
|
153 |
|
|
154 |
while (schema.length > 2) { |
|
155 |
let regex = /.+/; |
|
156 |
|
|
157 |
if (schema.length > 3) { |
|
158 |
regex = schema[2]; |
|
159 |
schema.splice(2, 1); |
|
160 |
} |
|
161 |
|
|
162 |
if (typeof regex === "string") |
|
163 |
regex = new RegExp(regex); |
|
164 |
|
|
165 |
entry_schemas.push([regex, schema[2]]); |
|
166 |
schema.splice(2, 1); |
|
167 |
} |
|
168 |
|
|
169 |
const result = sanitize_object(schema[0], object); |
|
170 |
|
|
171 |
for (const [key, entry] of Object.entries(object)) { |
|
172 |
if (result.hasOwnProperty(key)) |
|
173 |
continue; |
|
174 |
|
|
175 |
matched_entries += 1; |
|
176 |
if (matched_entries > max_matched_entries) |
|
177 |
throw error_message(`has more than ${max_matched_entries} matched entr${max_matched_entries === 1 ? "y" : "ies"}`); |
|
178 |
|
|
179 |
error_path.push(key_error_path_segment(key)); |
|
180 |
|
|
181 |
let match = false; |
|
182 |
for (const [key_regex, entry_schema] of entry_schemas) { |
|
183 |
if (!key_regex.exec(key)) |
|
184 |
continue; |
|
185 |
|
|
186 |
match = true; |
|
187 |
|
|
188 |
sanitize_object_entry(result, key, entry_schema, object); |
|
189 |
break; |
|
190 |
} |
|
191 |
|
|
192 |
if (!match) { |
|
193 |
const regex_list = entry_schemas.map(i => i[0]).join(", "); |
|
194 |
throw error_message(`does not match any of key regexes: [${regex_list}]`); |
|
195 |
} |
|
196 |
|
|
197 |
error_path.pop(); |
|
198 |
} |
|
199 |
|
|
200 |
if (matched_entries < min_matched_entries) |
|
201 |
throw error_message(`has less than ${min_matched_entries} matched entr${min_matched_entries === 1 ? "y" : "ies"}`); |
|
202 |
|
|
203 |
return result; |
|
204 |
} |
|
205 |
|
|
206 |
function sanitize_array(schema, array) |
|
207 |
{ |
|
208 |
console.log(`sanitize_array ${JSON.stringify(schema)}`); |
|
209 |
let min_length = 0; |
|
210 |
let max_length = Infinity; |
|
211 |
let repeat_length = 1; |
|
212 |
let i = 0; |
|
213 |
const result = []; |
|
214 |
|
|
215 |
schema = [...schema]; |
|
216 |
if (schema[schema.length - 2] === "maxlen") { |
|
217 |
max_length = schema[schema.length - 1]; |
|
218 |
schema.splice(schema.length - 2); |
|
219 |
} |
|
220 |
|
|
221 |
if (schema[schema.length - 2] === "minlen") { |
|
222 |
min_length = schema[schema.length - 1]; |
|
223 |
schema.splice(schema.length - 2); |
|
224 |
} |
|
225 |
|
|
226 |
if (["repeat", "repeatfull"].includes(schema[schema.length - 2])) |
|
227 |
repeat_length = schema.pop(); |
|
228 |
if (repeat_length < 1) { |
|
229 |
invalid_schema = true; |
|
230 |
throw error_message('specifies invalid "${schema[schema.length - 2]}" (should be number greater than 1)'); |
|
231 |
} |
|
232 |
if (["repeat", "repeatfull"].includes(schema[schema.length - 1])) { |
|
233 |
var repeat_directive = schema.pop(); |
|
234 |
repeat = schema.splice(schema.length - repeat_length); |
|
235 |
} else if (schema.length !== array.length) { |
|
236 |
throw error_message(`does not not have exactly ${schema.length} items`); |
|
237 |
} |
|
238 |
|
|
239 |
if (repeat_directive === "repeatfull" && |
|
240 |
(array.length - schema.length) % repeat_length !== 0) |
|
241 |
throw error_message(`does not not contain a full number of item group repetitions`); |
|
242 |
|
|
243 |
if (array.length < min_length) |
|
244 |
throw error_message(`has less than ${min_length} element${min_length === 1 ? "" : "s"}`); |
|
245 |
|
|
246 |
if (array.length > max_length) |
|
247 |
throw error_message(`has more than ${max_length} element${max_length === 1 ? "" : "s"}`); |
|
248 |
|
|
249 |
console.log(schema, repeat); |
|
250 |
|
|
251 |
for (const item of array) { |
|
252 |
if (i >= schema.length) { |
|
253 |
i = 0; |
|
254 |
schema = repeat; |
|
255 |
} |
|
256 |
|
|
257 |
error_path.push(`[${i}]`); |
|
258 |
const sanitized = sanitize_unknown(schema[i], item); |
|
259 |
if (sanitized !== discard) |
|
260 |
result.push(sanitized); |
|
261 |
error_path.pop(); |
|
262 |
|
|
263 |
i++; |
|
264 |
} |
|
265 |
|
|
266 |
return result; |
|
267 |
} |
|
268 |
|
|
269 |
function sanitize_regex(schema, string) |
|
270 |
{ |
|
271 |
console.log(`sanitize_regex ${schema}`); |
|
272 |
if (schema.test(string)) |
|
273 |
return string; |
|
274 |
|
|
275 |
throw error_message(`does not match regex ${schema}`); |
|
276 |
} |
|
277 |
|
|
278 |
const string_spec_regex = /^string(:(.*))?$/; |
|
279 |
|
|
280 |
function sanitize_string(schema, string) |
|
281 |
{ |
|
282 |
console.log(`sanitize_string ${JSON.stringify(schema)}`); |
|
283 |
const regex = string_spec_regex.exec(schema)[2]; |
|
284 |
|
|
285 |
if (regex === undefined) |
|
286 |
return string; |
|
287 |
|
|
288 |
return sanitize_regex(new RegExp(regex), string); |
|
289 |
} |
|
290 |
|
|
291 |
function sanitize_object(schema, object) |
|
292 |
{ |
|
293 |
console.log(`sanitize_object ${JSON.stringify(schema)}`); |
|
294 |
const result = {}; |
|
295 |
|
|
296 |
for (let [key, entry_schema] of Object.entries(schema)) { |
|
297 |
error_path.push(key_error_path_segment(key)); |
|
298 |
sanitize_object_entry(result, key, entry_schema, object); |
|
299 |
error_path.pop(); |
|
300 |
} |
|
301 |
|
|
302 |
return result; |
|
303 |
} |
|
304 |
|
|
305 |
function sanitize_object_entry(result, key, entry_schema, object) |
|
306 |
{ |
|
307 |
console.log(`sanitize_object_entry ${JSON.stringify(entry_schema)}`); |
|
308 |
let optional = false; |
|
309 |
let has_default = false; |
|
310 |
let _default = undefined; |
|
311 |
|
|
312 |
if (Array.isArray(entry_schema) && entry_schema.length > 1) { |
|
313 |
if (entry_schema[0] === "optional") { |
|
314 |
optional = true; |
|
315 |
entry_schema = [...entry_schema].splice(1); |
|
316 |
|
|
317 |
const idx_def = entry_schema.length - (entry_schema.length & 1) - 1; |
|
318 |
if (entry_schema[idx_def] === "default") { |
|
319 |
has_default = true; |
|
320 |
_default = entry_schema[idx_def + 1]; |
|
321 |
entry_schema.splice(idx_def); |
|
322 |
} else if ((entry_schema.length & 1) !== 1) { |
|
323 |
invalid_schema = true; |
|
324 |
throw error_message("was not understood"); |
|
325 |
} |
|
326 |
|
|
327 |
if (entry_schema.length < 2) |
|
328 |
entry_schema = entry_schema[0]; |
|
329 |
} |
|
330 |
} |
|
331 |
|
|
332 |
let unsanitized_value = object[key]; |
|
333 |
if (unsanitized_value === undefined) { |
|
334 |
if (!optional) |
|
335 |
throw error_message("is missing"); |
|
336 |
|
|
337 |
if (has_default) |
|
338 |
result[key] = _default; |
|
339 |
|
|
340 |
return; |
|
341 |
} |
|
342 |
|
|
343 |
const sanitized = sanitize_unknown(entry_schema, unsanitized_value); |
|
344 |
if (sanitized !== discard) |
|
345 |
result[key] = sanitized; |
|
346 |
} |
|
347 |
|
|
348 |
function take_literal(schema, item) |
|
349 |
{ |
|
350 |
console.log(`take_literal ${JSON.stringify(schema)}`); |
|
351 |
return item; |
|
352 |
} |
|
353 |
|
|
354 |
/* |
|
355 |
* This function is used like a symbol. Other parts of code do sth like |
|
356 |
* `item === discard` to check if item was returned by this function. |
|
357 |
*/ |
|
358 |
function discard(schema, item) |
|
359 |
{ |
|
360 |
console.log(`discard ${JSON.stringify(schema)}`); |
|
361 |
return discard; |
|
362 |
} |
|
363 |
|
|
364 |
/* |
|
365 |
* The following are some helper functions to categorize various |
|
366 |
* schema item specifiers (used in the array below). |
|
367 |
*/ |
|
368 |
|
|
369 |
function is_genobj_spec(item) |
|
370 |
{ |
|
371 |
return Array.isArray(item) && item[1] === "matchentry"; |
|
372 |
} |
|
373 |
|
|
374 |
function is_regex(item) |
|
375 |
{ |
|
376 |
return typeof item === "object" && typeof item.test === "function"; |
|
377 |
} |
|
378 |
|
|
379 |
function is_string_spec(item) |
|
380 |
{ |
|
381 |
return typeof item === "string" && string_spec_regex.test(item); |
|
382 |
} |
|
383 |
|
|
384 |
function is_object(item) |
|
385 |
{ |
|
386 |
return typeof item === "object"; |
|
387 |
} |
|
388 |
|
|
389 |
function eq(what) |
|
390 |
{ |
|
391 |
return i => i === what; |
|
392 |
} |
|
393 |
|
|
394 |
/* Array and null checks must go before object check. */ |
|
395 |
const checks = [ |
|
396 |
[is_genobj_spec, is_object, sanitize_genobj, "an object"], |
|
397 |
[Array.isArray, Array.isArray, sanitize_array, "an array"], |
|
398 |
[eq(null), i => i === null, take_literal, "null"], |
|
399 |
[is_regex, i => typeof i === "string", sanitize_regex, "a string"], |
|
400 |
[is_string_spec, i => typeof i === "string", sanitize_string, "a string"], |
|
401 |
[is_object, is_object, sanitize_object, "an object"], |
|
402 |
[eq("number"), i => typeof i === "number", take_literal, "a number"], |
|
403 |
[eq("boolean"), i => typeof i === "boolean", take_literal, "a boolean"], |
|
404 |
[eq("anything"), i => true, take_literal, "dummy"], |
|
405 |
[eq("discard"), i => true, discard, "dummy"] |
|
406 |
]; |
|
407 |
|
|
408 |
/* |
|
409 |
* EXPORTS_START |
|
410 |
* EXPORT parse_json_with_schema |
|
411 |
* EXPORTS_END |
|
412 |
*/ |
Also available in: Unified diff
provide a facility to sanitize externally-obtained JSON