Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Incorrect output data #705

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions analyze_schemas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import json
from pathlib import Path
from jschon import create_catalog, JSON, JSONSchema, URI, LocalSource


def load_test_suite(path):
with open(path) as f:
return json.load(f)


def process_test_case(schema, test_cases):
results = []

# Create schema
if isinstance(schema, dict) and schema.get("$defs") == {
"true": True,
"false": False,
}:
return []
schema = JSONSchema(
schema,
uri=URI("urn:test"),
metaschema_uri=URI("https://json-schema.org/draft/2020-12/schema"),
)

for test in test_cases:
if not test.get("valid", True): # Only process invalid cases
instance = test["data"]
validation_result = schema.evaluate(JSON(instance))

# Get the validation output
output = validation_result.output("basic")

results.append({"instance": instance, "errors": output.get("errors", [])})

return results


def main():
catalog = create_catalog("2020-12")

test_suite_path = Path("crates/jsonschema/tests/suite/tests/draft2020-12")

catalog.add_uri_source(
URI("http://localhost:1234/"),
LocalSource(Path("crates/jsonschema/tests/suite/remotes/"), suffix=""),
)
catalog.add_uri_source(
URI("https://json-schema.org/draft/2020-12/"),
LocalSource(
Path("crates/jsonschema-referencing/metaschemas/draft2020-12"),
suffix=".json",
),
)

output = {"tests": []}

# Process each test file
for test_file in test_suite_path.glob("*.json"):
test_cases = load_test_suite(test_file)

for test_group in test_cases:
schema = test_group["schema"]
instances = process_test_case(schema, test_group["tests"])

if instances:
output["tests"].append(
{
"schema": schema,
"schema_id": test_file.stem,
"instances": instances,
}
)

# Save the dataset
with open("output_basic_draft2020_12.json", "w") as f:
json.dump(output, f, indent=2)


if __name__ == "__main__":
main()
127 changes: 127 additions & 0 deletions crates/jsonschema/tests/output.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
use std::collections::BTreeSet;

use ahash::HashSet;
use serde_json::json;
use test_case::test_case;

Expand Down Expand Up @@ -1017,3 +1020,127 @@ fn test_additional_properties_basic_output(
panic!("\nExpected:\n{}\n\nGot:\n{}\n", expected_str, actual_str);
}
}

#[derive(serde::Deserialize)]
struct TestSuite {
tests: Vec<OutputTest>,
}

#[derive(serde::Deserialize)]
struct OutputTest {
schema: serde_json::Value,
schema_id: String,
instances: Vec<Instance>,
}

#[derive(serde::Deserialize)]
struct Instance {
instance: serde_json::Value,
errors: Vec<serde_json::Value>,
}

#[test]
fn test_error_locations() {
let data: TestSuite = serde_json::from_str(include_str!("output_basic_draft2020_12.json"))
.expect("Invalid output tests");

for mut test in data.tests {
if test.schema_id != "oneOf" {
continue;
}
if test.schema.is_object() && test.schema["$id"].is_null() {
test.schema["$id"] = "urn:test".into();
}
let validator = jsonschema::validator_for(&test.schema).expect("Invalid schema");

for instance in test.instances {
let jsonschema::BasicOutput::Invalid(result) =
validator.apply(&instance.instance).basic()
else {
panic!(
"Instance {:?} should not be valid against schema {}",
&instance.instance, &test.schema_id
);
};

// Extract only `keywordLocation`, `instanceLocation`, and `absoluteKeywordLocation`
fn normalize_error(err: &serde_json::Value) -> (String, String, String) {
let keyword_location = err
.get("keywordLocation")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let instance_location = err
.get("instanceLocation")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let absolute_keyword_location = err
.get("absoluteKeywordLocation")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
(
keyword_location,
instance_location,
absolute_keyword_location,
)
}

let expected_errors: HashSet<_> = instance.errors.iter().map(normalize_error).collect();
let actual_errors: HashSet<_> = result
.iter()
.map(|e| normalize_error(&serde_json::to_value(e).unwrap_or_default()))
.collect();

let missing_errors: BTreeSet<_> = expected_errors.difference(&actual_errors).collect();
let extra_errors: BTreeSet<_> = actual_errors.difference(&expected_errors).collect();

if !missing_errors.is_empty() || !extra_errors.is_empty() {
panic!(
"\nMismatched errors for instance:\n\
- Schema ID: {}\n\
- Schema:\n{}\n\
- Instance:\n{}\n\
- Expected count: {}\n\
- Actual count: {}\n\
- Missing errors:\n{}\n\
- Extra errors:\n{}\n",
test.schema_id,
serde_json::to_string_pretty(&test.schema)
.unwrap_or_else(|_| "<failed to serialize schema>".to_string()),
serde_json::to_string_pretty(&instance.instance)
.unwrap_or_else(|_| "<failed to serialize instance>".to_string()),
expected_errors.len(),
actual_errors.len(),
format_error_list(&missing_errors),
format_error_list(&extra_errors)
);
}

//assert_eq!(
// instance.errors.len(),
// result.len(),
// "Instance {:?} produces a different number of errors ({}) than expected ({}). Schema: {}",
// &instance.instance, result.len(), instance.errors.len(), &test.schema_id
//);
}
}
}

/// Formats a set of `(keywordLocation, instanceLocation, absoluteKeywordLocation)` triplets into a readable string list.
fn format_error_list(errors: &BTreeSet<&(String, String, String)>) -> String {
if errors.is_empty() {
return " (none)".to_string();
}
errors
.iter()
.map(|(keyword, instance, absolute)| {
format!(
" - keyword: {}\n instance: {}\n absolute: {}",
keyword, instance, absolute
)
})
.collect::<Vec<_>>()
.join("\n")
}
Loading
Loading