Skip to content

Commit

Permalink
feat: Evaluate method
Browse files Browse the repository at this point in the history
Signed-off-by: Dmitry Dygalo <[email protected]>
  • Loading branch information
Stranger6667 committed Jan 30, 2025
1 parent 33bce5a commit b464673
Show file tree
Hide file tree
Showing 3 changed files with 14,507 additions and 0 deletions.
81 changes: 81 additions & 0 deletions analyze_schemas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import json
from pathlib import Path
from jschon import create_catalog, JSON, JSONSchema, URI, LocalSource


def load_test_suite(path):
with open(path) as f:
return json.load(f)


def process_test_case(schema, test_cases):
results = []

# Create schema
if isinstance(schema, dict) and schema.get("$defs") == {
"true": True,
"false": False,
}:
return []
schema = JSONSchema(
schema,
uri=URI("urn:test"),
metaschema_uri=URI("https://json-schema.org/draft/2020-12/schema"),
)

for test in test_cases:
if not test.get("valid", True): # Only process invalid cases
instance = test["data"]
validation_result = schema.evaluate(JSON(instance))

# Get the validation output
output = validation_result.output("basic")

results.append({"instance": instance, "errors": output.get("errors", [])})

return results


def main():
catalog = create_catalog("2020-12")

test_suite_path = Path("crates/jsonschema/tests/suite/tests/draft2020-12")

catalog.add_uri_source(
URI("http://localhost:1234/"),
LocalSource(Path("crates/jsonschema/tests/suite/remotes/"), suffix=""),
)
catalog.add_uri_source(
URI("https://json-schema.org/draft/2020-12/"),
LocalSource(
Path("crates/jsonschema-referencing/metaschemas/draft2020-12"),
suffix=".json",
),
)

output = {"tests": []}

# Process each test file
for test_file in test_suite_path.glob("*.json"):
test_cases = load_test_suite(test_file)

for test_group in test_cases:
schema = test_group["schema"]
instances = process_test_case(schema, test_group["tests"])

if instances:
output["tests"].append(
{
"schema": schema,
"schema_id": test_file.stem,
"instances": instances,
}
)

# Save the dataset
with open("output_basic_draft2020_12.json", "w") as f:
json.dump(output, f, indent=2)


if __name__ == "__main__":
main()
127 changes: 127 additions & 0 deletions crates/jsonschema/tests/output.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
use std::collections::BTreeSet;

use ahash::HashSet;
use serde_json::json;
use test_case::test_case;

Expand Down Expand Up @@ -1017,3 +1020,127 @@ fn test_additional_properties_basic_output(
panic!("\nExpected:\n{}\n\nGot:\n{}\n", expected_str, actual_str);
}
}

#[derive(serde::Deserialize)]
struct TestSuite {
tests: Vec<OutputTest>,
}

#[derive(serde::Deserialize)]
struct OutputTest {
schema: serde_json::Value,
schema_id: String,
instances: Vec<Instance>,
}

#[derive(serde::Deserialize)]
struct Instance {
instance: serde_json::Value,
errors: Vec<serde_json::Value>,
}

#[test]
fn test_error_locations() {
let data: TestSuite = serde_json::from_str(include_str!("output_basic_draft2020_12.json"))
.expect("Invalid output tests");

for mut test in data.tests {
if test.schema_id != "oneOf" {
continue;
}
if test.schema.is_object() && test.schema["$id"].is_null() {
test.schema["$id"] = "urn:test".into();
}
let validator = jsonschema::validator_for(&test.schema).expect("Invalid schema");

for instance in test.instances {
let jsonschema::BasicOutput::Invalid(result) =
validator.apply(&instance.instance).basic()
else {
panic!(
"Instance {:?} should not be valid against schema {}",
&instance.instance, &test.schema_id
);
};

// Extract only `keywordLocation`, `instanceLocation`, and `absoluteKeywordLocation`
fn normalize_error(err: &serde_json::Value) -> (String, String, String) {
let keyword_location = err
.get("keywordLocation")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let instance_location = err
.get("instanceLocation")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let absolute_keyword_location = err
.get("absoluteKeywordLocation")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
(
keyword_location,
instance_location,
absolute_keyword_location,
)
}

let expected_errors: HashSet<_> = instance.errors.iter().map(normalize_error).collect();
let actual_errors: HashSet<_> = result
.iter()
.map(|e| normalize_error(&serde_json::to_value(e).unwrap_or_default()))
.collect();

let missing_errors: BTreeSet<_> = expected_errors.difference(&actual_errors).collect();
let extra_errors: BTreeSet<_> = actual_errors.difference(&expected_errors).collect();

if !missing_errors.is_empty() || !extra_errors.is_empty() {
panic!(
"\nMismatched errors for instance:\n\
- Schema ID: {}\n\
- Schema:\n{}\n\
- Instance:\n{}\n\
- Expected count: {}\n\
- Actual count: {}\n\
- Missing errors:\n{}\n\
- Extra errors:\n{}\n",
test.schema_id,
serde_json::to_string_pretty(&test.schema)
.unwrap_or_else(|_| "<failed to serialize schema>".to_string()),
serde_json::to_string_pretty(&instance.instance)
.unwrap_or_else(|_| "<failed to serialize instance>".to_string()),
expected_errors.len(),
actual_errors.len(),
format_error_list(&missing_errors),
format_error_list(&extra_errors)
);
}

//assert_eq!(
// instance.errors.len(),
// result.len(),
// "Instance {:?} produces a different number of errors ({}) than expected ({}). Schema: {}",
// &instance.instance, result.len(), instance.errors.len(), &test.schema_id
//);
}
}
}

/// Formats a set of `(keywordLocation, instanceLocation, absoluteKeywordLocation)` triplets into a readable string list.
fn format_error_list(errors: &BTreeSet<&(String, String, String)>) -> String {
if errors.is_empty() {
return " (none)".to_string();
}
errors
.iter()
.map(|(keyword, instance, absolute)| {
format!(
" - keyword: {}\n instance: {}\n absolute: {}",
keyword, instance, absolute
)
})
.collect::<Vec<_>>()
.join("\n")
}
Loading

0 comments on commit b464673

Please sign in to comment.