The problem you are experiencing is that your data is recursive and your schema is linear, ie "flat". You attempted to encode meaning into strings like "boolean" and "array:employee". This is a poor way of trying to represent your recursive structure as a schema.
If you want to build a proper schema validator, start by designing pieces for making the schemas. Use basics and work your way up -
design
// main.js
import { primitives, validate } from "./schema.js"
const [tnumber, tboolean, tstring] = primitives()
const temployee = ...
const tschema = ...
const mydata = ...
validate(tschema, mydata)
With our primitives defined, we can make more advanced types, like temployee and tschema -
// main.js
import { primitives, validate, required, optional } from "./schema.js"
const [tnumber, tboolean, tstring] = primitives()
const temployee = {
name: required(tstring),
title: required(tstring),
salary: optional(tnumber),
remote: optional(tboolean),
get reports() { return optional(tschema) } // recursive types supported!
}
const tschema = [temployee] // array of temployee!
const mydata = ...
validate(tschema, mydata)
implement
Now we start the Schema module -
primitives - generates symbolic primitive types
required - type guards against null values
optional - type validates only if value is present
// schema.js
function *primitives() { while(true) yield Symbol() }
const required = t => v => {
if (v == null)
throw Error(`cannot be null`)
validate(t, v)
}
const optional = t => v => {
if (v != null)
validate(t, v)
}
export { primitives, required, optional }
Next we'll write an internal helper, validatePrimitive, for validating basic types -
// schema.js (continued)
function validatePrimitive(t, v) {
switch(t) {
case tnumber:
if (v?.constructor != Number)
throw Error(`${v} is not a number`)
break
case tboolean:
if (v?.constructor != Boolean)
throw Error(`${v} is not a boolean`)
break
case tstring:
if (v?.constructor != String)
throw Error(`${v} is not a string`)
break
default:
throw Error(`unsupported primitive type`)
}
}
Finally we write the public validate interface. It's recursive because both the schemas and the data we are validating are recursive. This harmony of data and code makes it easier for us to think about the problem and write a program that solves it -
// schema.js (continued)
function validate(t, v) {
switch (t?.constructor) {
case Symbol:
return validatePrimitive(t, v)
case Array:
if (t.length !== 1) throw Error("Array schema must specify exactly one type")
for (const k of Object.keys(v))
validate(t[0], v[k])
break
case Object:
for (const k of Object.keys(t))
validate(t[k], v[k])
break
case Function:
t(v)
break
default:
throw Error(`unsupported schema: ${t}`)
}
}
export { ..., validate }
run it
import { primitives, required, optional, validate } from "./schema.js"
const [tnumber, tboolean, tstring] = primitives()
const temployee = {
name: required(tstring),
title: required(tstring),
salary: optional(tnumber),
remote: optional(tboolean),
get reports() { return optional(tschema) }
}
const tschema = [temployee] // array of temployee
const employees = [
{ name: 'alice',
title: "ceo",
salary: 100,
reports: [{
name: "bob",
title: "cfo",
salary: 10,
reports: [{
name: 'zorp',
title:"controller",
salary: 40
}],
}],
},
…
]
validate(tschema, employees) // throws an Error only if invalid
what's next?
You could design more schema tools like -
withDefault(t, defaultValue) - substitutes null values with a default
const temployee = {
name: tstring,
remote: withDefault(tboolean, false)
}
const tstudent = {
name: tstring,
major: withDefault(tstring, "undeclared")
}
const tcourse = {
teacher: temployee,
enrollments: withDefault([tstudent], [])
}
inRange(min, max) - numeric range guards
const temployee = {
name: tstring,
salary: inRange(0, Infinity) // negative salary invalid!
}
oneOf(t, choices) - inclusive value guards
const temployee = {
name: tstring,
title: oneOf(tstring, ["exec", "vp", "staff"]) // must be one of these!
}
We could improve the error messages by adding try..catch around the recursive calls. This allows us to add context to the point of failure so the user knows the full path to the offending leaf -
// schema.js (continued)
function validate(t, v) {
let k
switch (t?.constructor) {
case Symbol:
return validatePrimitive(t, v)
case Array:
if (t.length !== 1) throw Error("Array schema must specify exactly one type")
try {
for (k of Object.keys(v))
validate(t[0], v[k])
}
catch (err) {
throw Error(`${k}th child invalid: ${err.message}`)
}
break
case Object:
try {
for (k of Object.keys(t))
validate(t[k], v[k])
}
catch (err) {
throw Error(`${k} invalid: ${err.message}`)
}
break
case Function:
t(v)
break
default:
throw Error(`unsupported schema: ${t}`)
}
}
Maybe export common types like -
temail - a valid email address
tphone - numeric string with accepted punctuation
tpassword - string at least 20 characters
Choose "required" or "optional" as the default behavior. Currently these have the same effect -
const temployee = {
name: required(tstring),
...
}
const temployee = {
name: tstring, // null is not a string, so null will fail validation
...
}
This means required is implicit and we can remove it from our Schema module. Users are expected to use optional or withDefault when nullary values are acceptable.
remarks
Remember all complex things are made from combining simple things. If you design things that cannot be combined, you are writing dead-end code.
This means we can write sophisticated validation expresssions by combining other validation expressions! Consider adding validation combinators like and and or, among others.
const tuser = {
newPassword:
// password must be
// at least 20 characters
// AND at most 40 characters
// AND include 2 symbols
and(minLength(20), maxLength(40), requireSymbols(2))
...
}
const tuser = {
newPassword:
// password can be
// at least 20 characters
// OR 8 characters AND includes 2 symbols
or(minLength(20), and(requireSymbols(2), minLength(8)))
...
}