There are several Python libraries available for validating JSON data, especially when it comes to complex schemas with fixed and user-defined keys. Here are some commonly used libraries, each with unique strengths and options for managing dynamic structures.
The most common are:
Using jsonschema
from jsonschema import validate, ValidationError
# Define JSON Schema
schema = {
"type": "object",
"properties": {
"a": {"type": "string"},
"b": {"type": "string"},
"c": {
"type": "object",
"patternProperties": {
"^custom_": { # Any key in "c" must start with "custom_"
"type": "array",
"items": {"type": "object"}
}
},
"additionalProperties": False
},
"d": {
"type": "array",
"items": {"type": "object"}
}
},
"required": ["a", "b", "c", "d"],
"additionalProperties": False
}
# Sample JSON data
data = {
"a": "some value",
"b": "another value",
"c": {
"custom_a": [{"key1": "value1"}, {"key2": "value2"}],
"custom_b": [{"key3": "value3"}]
},
"d": [{"key4": "value4"}, {"key5": "value5"}]
}
# Validate the JSON data
try:
validate(instance=data, schema=schema)
print("Validation successful!")
except ValidationError as e:
print("Validation failed:", e.message)
Using marshmallow
from marshmallow import Schema, fields, validate, ValidationError
class CustomEntrySchema(Schema):
# This allows any string keys and values in each dictionary
class Meta:
unknown = 'include'
class MainSchema(Schema):
a = fields.String(required=True)
b = fields.String(required=True)
c = fields.Dict(
keys=fields.String(validate=validate.Regexp(r'^custom_')),
values=fields.List(fields.Nested(CustomEntrySchema)),
required=True
)
d = fields.List(fields.Nested(CustomEntrySchema), required=True)
# Sample JSON data
data = {
"a": "some value",
"b": "another value",
"c": {
"custom_a": [{"key1": "value1"}, {"key2": "value2"}],
"custom_b": [{"key3": "value3"}]
},
"d": [{"key4": "value4"}, {"key5": "value5"}]
}
# Validate the JSON data
schema = MainSchema()
try:
schema.load(data)
print("Validation successful!")
except ValidationError as e:
print("Validation failed:", e.messages)
Using pydantic
from pydantic import BaseModel, Field, ValidationError, RootModel, model_validator
from typing import List, Dict
import re
class CustomEntryModel(RootModel[Dict[str, str]]):
"""This allows arbitrary key-value pairs in each entry of 'c' and 'd'."""
class MainModel(BaseModel):
a: str
b: str
c: Dict[str, List[CustomEntryModel]] # We'll validate keys in 'c' manually
d: List[CustomEntryModel]
@model_validator(mode="before")
def validate_custom_keys(cls, values):
# Check that all keys in 'c' start with "custom_"
c_data = values.get("c", {})
for key in c_data:
if not re.match(r'^custom_', key):
raise ValueError(f"Key '{key}' in 'c' must start with 'custom_'")
return values
# Sample JSON data
data = {
"a": "some value",
"b": "another value",
"c": {
"custom_a": [{"key1": "value1"}, {"key2": "value2"}],
"custom_b": [{"key3": "value3"}]
},
"d": [{"key4": "value4"}, {"key5": "value5"}]
}
# Validate the JSON data
try:
model = MainModel(**data)
print("Validation successful!")
except ValidationError as e:
print("Validation failed:", e)
Output
When I ran all of them at once, the validation succeeded equally for each:
Validation successful!
Validation successful!
Validation successful!