94 lines
2.2 KiB
JSON
94 lines
2.2 KiB
JSON
{
|
|
"run_id": "eval-xJm3dBJ6oZ",
|
|
"timestamp": "2026-03-23T13:48:54.952Z",
|
|
"cases_total": 10,
|
|
"metrics": {
|
|
"schema_validation_pass_rate": 100,
|
|
"intent_class_accuracy": 80,
|
|
"route_hint_accuracy": 100,
|
|
"causal_flag_accuracy": 100,
|
|
"high_confidence_error_rate": 0
|
|
},
|
|
"results": [
|
|
{
|
|
"case_id": "NQ-001",
|
|
"validation_passed": true,
|
|
"intent_match": true,
|
|
"route_match": true,
|
|
"causal_flags_match": true,
|
|
"trace_id": "phW6TQF_zwo4ps"
|
|
},
|
|
{
|
|
"case_id": "NQ-002",
|
|
"validation_passed": true,
|
|
"intent_match": true,
|
|
"route_match": true,
|
|
"causal_flags_match": true,
|
|
"trace_id": "HFw3C2vcTq98Jl"
|
|
},
|
|
{
|
|
"case_id": "NQ-003",
|
|
"validation_passed": true,
|
|
"intent_match": true,
|
|
"route_match": true,
|
|
"causal_flags_match": true,
|
|
"trace_id": "SHAUEgGSDSpOnQ"
|
|
},
|
|
{
|
|
"case_id": "NQ-004",
|
|
"validation_passed": true,
|
|
"intent_match": true,
|
|
"route_match": true,
|
|
"causal_flags_match": true,
|
|
"trace_id": "psgRyAubKKbNIb"
|
|
},
|
|
{
|
|
"case_id": "NQ-005",
|
|
"validation_passed": true,
|
|
"intent_match": false,
|
|
"route_match": true,
|
|
"causal_flags_match": true,
|
|
"trace_id": "_8FofWSTg5zVKK"
|
|
},
|
|
{
|
|
"case_id": "NQ-006",
|
|
"validation_passed": true,
|
|
"intent_match": true,
|
|
"route_match": true,
|
|
"causal_flags_match": true,
|
|
"trace_id": "NVvbOsKjvEDCJ2"
|
|
},
|
|
{
|
|
"case_id": "NQ-007",
|
|
"validation_passed": true,
|
|
"intent_match": true,
|
|
"route_match": true,
|
|
"causal_flags_match": true,
|
|
"trace_id": "7gponohJuP_3ya"
|
|
},
|
|
{
|
|
"case_id": "NQ-008",
|
|
"validation_passed": true,
|
|
"intent_match": true,
|
|
"route_match": true,
|
|
"causal_flags_match": true,
|
|
"trace_id": "HdD4JkYZzN3jOX"
|
|
},
|
|
{
|
|
"case_id": "NQ-009",
|
|
"validation_passed": true,
|
|
"intent_match": false,
|
|
"route_match": true,
|
|
"causal_flags_match": true,
|
|
"trace_id": "ApCNbWg0IG-BPE"
|
|
},
|
|
{
|
|
"case_id": "NQ-010",
|
|
"validation_passed": true,
|
|
"intent_match": true,
|
|
"route_match": true,
|
|
"causal_flags_match": true,
|
|
"trace_id": "Aram-Qxzh6zRSP"
|
|
}
|
|
]
|
|
} |