load_json_example <- function(file, dir = "modified-hubmeta-examples") {
jsonlite::read_json(
here::here("json-schema", dir, file),
simplifyVector = TRUE,
simplifyDataFrame = FALSE)
}
apply_validator <- function(path) {
validated <- validator(path, verbose = TRUE)
if (validated) {
validated
} else {
validated |>
attr("errors") |>
View()
}
}Using JSON schema to validate Hub Metadata
File names & paths
simple_mod_file <- "simple-hubmeta-mod.json"
complex_mod_file <- "complex-hubmeta-mod.json"
simple_mod_path <- here::here("json-schema", "modified-hubmeta-examples", simple_mod_file)
complex_mod_path <- here::here("json-schema", "modified-hubmeta-examples", complex_mod_file)Modified examples
I’ve modified original examples, primarily to enable generalised validation of multiple round specification. This will affect current hubUtils functionality but now is a good time to change it if necessary.
Simple modified example
hub-infrastructure-experiments/json-schema/modified-hubmeta-examples/simple-hubmeta-mod.json
You can navigate the file by clicking below
simple = FileAttachment("modified-hubmeta-examples/simple-hubmeta-mod.json").json()
simpleComplex modified example
hub-infrastructure-experiments/json-schema/modified-hubmeta-examples/complex-hubmeta-mod.json
You can navigate the file by clicking below
complex = FileAttachment("modified-hubmeta-examples/complex-hubmeta-mod.json").json()
complexValidate
Load the validator from hub-infrastructure-experiments/json-schema/hubmeta-schema.json
You can navigate the schema below
schema = FileAttachment("hubmeta-schema.json").json()
schemavalidator <- jsonvalidate::json_validator(
schema = here::here("json-schema", "hubmeta-schema.json"),
engine = "ajv"
)validator(simple_mod_path, verbose = TRUE)[1] TRUE
validator(complex_mod_path, verbose = TRUE)[1] TRUE
validator(complex_mod_path, verbose = TRUE) |>
attr("errors")NULL
The 3 errors still present when validating the complex hubmeta arises from the fact that I’ve not figure out values which either should be a typed array or could contain a "$ref" = "#defs" value key pair. Given this functionality should be available to any property, I need to figure out how to encode that in the schema (rather than to each property individually).
Experiments to address $ref validation
For context see https://github.com/Infectious-Disease-Modeling-Hubs/schemas/issues/1
# Read JSON into an R list
complex_mod_path <- here::here("json-schema",
"modified-hubmeta-examples",
"complex-hubmeta-mod.json")
json_list <- jsonlite::read_json(complex_mod_path,
simplifyVector = TRUE,
simplifyDataFrame = FALSE
)
# Attempt at serialising without unboxing.
# All vectors serialised as arrays
json_list |> jsonlite::toJSON(
null = "null",
na = "string",
pretty = TRUE
){
"rounds": [
{
"round_id": ["round-1"],
"model_tasks": [
{
"task_ids": {
"origin_date": {
"required": ["2022-09-03"],
"optional": null
},
"scenario_id": {
"required": [1],
"optional": null
},
"location": {
"required": [
{
"$ref": ["#/$defs/task_ids/location/us_states"]
}
],
"optional": ["US"]
},
"target": {
"required": null,
"optional": ["weekly rate"]
},
"horizon": {
"required": null,
"optional": [1, 2]
}
},
"output_types": {
"mean": {
"type_id": {
"required": null,
"optional": ["NA"]
},
"value": {
"type": ["integer"],
"minimum": [0]
}
},
"quantile": {
"type_id": {
"required": [0.25, 0.5, 0.75],
"optional": [0.1, 0.2, 0.3, 0.4, 0.6, 0.7, 0.8, 0.9]
},
"value": {
"type": ["numeric"],
"minimum": [0],
"maximum": [1]
}
},
"cdf": {
"type_id": {
"required": [10, 20],
"optional": null
},
"value": {
"type": ["numeric"],
"minimum": [0],
"maximum": [1]
}
}
}
},
{
"task_ids": {
"origin_date": {
"required": ["2022-09-03"],
"optional": null
},
"scenario_id": {
"required": [1],
"optional": null
},
"location": {
"required": [
{
"$ref": ["#/$defs/task_ids/location/us_states"]
}
],
"optional": ["US"]
},
"target": {
"required": null,
"optional": ["peak week"]
},
"horizon": {
"required": null,
"optional": ["NA"]
}
},
"output_types": {
"cdf": {
"type_id": {
"required": ["EW202240", "EW202241", "EW202242", "EW202243", "EW202244", "EW202245", "EW202246", "EW202247", "EW202248", "EW202249", "EW202250", "EW202251", "EW202252", "EW202301", "EW202302", "EW202303", "EW202304", "EW202305", "EW202306", "EW202307", "EW202308", "EW202309", "EW202310", "EW202311", "EW202312", "EW202313", "EW202314", "EW202315", "EW202316", "EW202317", "EW202318", "EW202319", "EW202320"],
"optional": null
},
"value": {
"type": ["numeric"],
"minimum": [0]
}
}
}
}
],
"submissions_due": {
"start": ["2022-09-01"],
"end": ["2022-09-05"]
}
},
{
"round_id": ["round-2"],
"model_tasks": [
{
"task_ids": {
"origin_date": {
"required": ["2022-10-01"],
"optional": null
},
"scenario_id": {
"required": null,
"optional": [2, 3]
},
"location": {
"required": [
{
"$ref": ["#/$defs/task_ids/location/us_states"]
}
],
"optional": ["US"]
},
"target": {
"required": null,
"optional": ["weekly rate"]
},
"age_group": {
"required": null,
"optional": ["0-5", "6-18", "19-24", "25-64", "65+"]
},
"horizon": {
"required": null,
"optional": [1, 2]
}
},
"output_types": {
"quantile": {
"type_id": {
"required": [0.25, 0.5, 0.75],
"optional": [0.1, 0.2, 0.3, 0.4, 0.6, 0.7, 0.8, 0.9]
},
"value": {
"type": ["integer"],
"minimum": [0]
}
}
}
}
],
"submissions_due": {
"start": ["2022-09-28"],
"end": ["2022-10-01"]
},
"last_data_date": ["2022-09-30"]
}
],
"$defs": {
"task_ids": {
"location": {
"us_states": ["01", "02", "04", "05", "06", "08", "09", "10", "11", "12", "13", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39", "40", "41", "42", "44", "45", "46", "47", "48", "49", "50", "51", "53", "54", "55", "56"]
}
}
}
}
# Attempt at serialising with unboxing.
# All 1 length vectors serialised as single values
json_list |> jsonlite::toJSON(
null = "null",
na = "string",
pretty = TRUE,
auto_unbox = TRUE
){
"rounds": [
{
"round_id": "round-1",
"model_tasks": [
{
"task_ids": {
"origin_date": {
"required": "2022-09-03",
"optional": null
},
"scenario_id": {
"required": 1,
"optional": null
},
"location": {
"required": [
{
"$ref": "#/$defs/task_ids/location/us_states"
}
],
"optional": "US"
},
"target": {
"required": null,
"optional": "weekly rate"
},
"horizon": {
"required": null,
"optional": [1, 2]
}
},
"output_types": {
"mean": {
"type_id": {
"required": null,
"optional": "NA"
},
"value": {
"type": "integer",
"minimum": 0
}
},
"quantile": {
"type_id": {
"required": [0.25, 0.5, 0.75],
"optional": [0.1, 0.2, 0.3, 0.4, 0.6, 0.7, 0.8, 0.9]
},
"value": {
"type": "numeric",
"minimum": 0,
"maximum": 1
}
},
"cdf": {
"type_id": {
"required": [10, 20],
"optional": null
},
"value": {
"type": "numeric",
"minimum": 0,
"maximum": 1
}
}
}
},
{
"task_ids": {
"origin_date": {
"required": "2022-09-03",
"optional": null
},
"scenario_id": {
"required": 1,
"optional": null
},
"location": {
"required": [
{
"$ref": "#/$defs/task_ids/location/us_states"
}
],
"optional": "US"
},
"target": {
"required": null,
"optional": "peak week"
},
"horizon": {
"required": null,
"optional": "NA"
}
},
"output_types": {
"cdf": {
"type_id": {
"required": ["EW202240", "EW202241", "EW202242", "EW202243", "EW202244", "EW202245", "EW202246", "EW202247", "EW202248", "EW202249", "EW202250", "EW202251", "EW202252", "EW202301", "EW202302", "EW202303", "EW202304", "EW202305", "EW202306", "EW202307", "EW202308", "EW202309", "EW202310", "EW202311", "EW202312", "EW202313", "EW202314", "EW202315", "EW202316", "EW202317", "EW202318", "EW202319", "EW202320"],
"optional": null
},
"value": {
"type": "numeric",
"minimum": 0
}
}
}
}
],
"submissions_due": {
"start": "2022-09-01",
"end": "2022-09-05"
}
},
{
"round_id": "round-2",
"model_tasks": [
{
"task_ids": {
"origin_date": {
"required": "2022-10-01",
"optional": null
},
"scenario_id": {
"required": null,
"optional": [2, 3]
},
"location": {
"required": [
{
"$ref": "#/$defs/task_ids/location/us_states"
}
],
"optional": "US"
},
"target": {
"required": null,
"optional": "weekly rate"
},
"age_group": {
"required": null,
"optional": ["0-5", "6-18", "19-24", "25-64", "65+"]
},
"horizon": {
"required": null,
"optional": [1, 2]
}
},
"output_types": {
"quantile": {
"type_id": {
"required": [0.25, 0.5, 0.75],
"optional": [0.1, 0.2, 0.3, 0.4, 0.6, 0.7, 0.8, 0.9]
},
"value": {
"type": "integer",
"minimum": 0
}
}
}
}
],
"submissions_due": {
"start": "2022-09-28",
"end": "2022-10-01"
},
"last_data_date": "2022-09-30"
}
],
"$defs": {
"task_ids": {
"location": {
"us_states": ["01", "02", "04", "05", "06", "08", "09", "10", "11", "12", "13", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39", "40", "41", "42", "44", "45", "46", "47", "48", "49", "50", "51", "53", "54", "55", "56"]
}
}
}
}
# Read JSON into an R list
complex_mod_path <- here::here("json-schema",
"modified-hubmeta-examples",
"complex-hubmeta-mod.json")
json_list <- jsonlite::read_json(complex_mod_path,
simplifyVector = TRUE,
simplifyDataFrame = FALSE
)
# Create new schema instance
schema <- jsonvalidate::json_schema$new(
schema = here::here("json-schema", "hubmeta-schema.json"),
engine = "ajv")
# Use Schema to serialise list to JSON
json <- schema$serialise(json_list)Error in context_eval(join(src), private$context, serialize, await): TypeError: Cannot convert undefined or null to object
# Use Schema to validate JSON
schema$validate(json)Error in get_string(json): object 'json' not found