102 lines
2.4 KiB
JSON
102 lines
2.4 KiB
JSON
{
|
|
"run_id": "eval-YxrhL2dCcH",
|
|
"timestamp": "2026-03-23T13:55:23.242Z",
|
|
"cases_total": 11,
|
|
"metrics": {
|
|
"schema_validation_pass_rate": 100,
|
|
"intent_class_accuracy": 72.73,
|
|
"route_hint_accuracy": 90.91,
|
|
"causal_flag_accuracy": 81.82,
|
|
"high_confidence_error_rate": 9.09
|
|
},
|
|
"results": [
|
|
{
|
|
"case_id": "NQ-001",
|
|
"validation_passed": true,
|
|
"intent_match": true,
|
|
"route_match": true,
|
|
"causal_flags_match": true,
|
|
"trace_id": "WBCFIgEMf1OFC-"
|
|
},
|
|
{
|
|
"case_id": "NQ-002",
|
|
"validation_passed": true,
|
|
"intent_match": true,
|
|
"route_match": true,
|
|
"causal_flags_match": true,
|
|
"trace_id": "COIYD-5V_3EChj"
|
|
},
|
|
{
|
|
"case_id": "NQ-003",
|
|
"validation_passed": true,
|
|
"intent_match": true,
|
|
"route_match": true,
|
|
"causal_flags_match": true,
|
|
"trace_id": "8s-4eCbeXywVVK"
|
|
},
|
|
{
|
|
"case_id": "NQ-004",
|
|
"validation_passed": true,
|
|
"intent_match": false,
|
|
"route_match": true,
|
|
"causal_flags_match": false,
|
|
"trace_id": "h5BLdC1oBO0wY6"
|
|
},
|
|
{
|
|
"case_id": "NQ-005",
|
|
"validation_passed": true,
|
|
"intent_match": true,
|
|
"route_match": true,
|
|
"causal_flags_match": true,
|
|
"trace_id": "p7A-wb3EfMen5P"
|
|
},
|
|
{
|
|
"case_id": "NQ-006",
|
|
"validation_passed": true,
|
|
"intent_match": true,
|
|
"route_match": true,
|
|
"causal_flags_match": true,
|
|
"trace_id": "Um8MUe8yfaKLxw"
|
|
},
|
|
{
|
|
"case_id": "NQ-007",
|
|
"validation_passed": true,
|
|
"intent_match": true,
|
|
"route_match": true,
|
|
"causal_flags_match": true,
|
|
"trace_id": "I6V3tELpvi6k1i"
|
|
},
|
|
{
|
|
"case_id": "NQ-008",
|
|
"validation_passed": true,
|
|
"intent_match": false,
|
|
"route_match": true,
|
|
"causal_flags_match": false,
|
|
"trace_id": "iel5ScdccVZ4zT"
|
|
},
|
|
{
|
|
"case_id": "NQ-009",
|
|
"validation_passed": true,
|
|
"intent_match": false,
|
|
"route_match": false,
|
|
"causal_flags_match": true,
|
|
"trace_id": "1C9MATbKvo5FnF"
|
|
},
|
|
{
|
|
"case_id": "NQ-010",
|
|
"validation_passed": true,
|
|
"intent_match": true,
|
|
"route_match": true,
|
|
"causal_flags_match": true,
|
|
"trace_id": "ti9EWEi85p-RnD"
|
|
},
|
|
{
|
|
"case_id": "NQ-1774273974528",
|
|
"validation_passed": true,
|
|
"intent_match": true,
|
|
"route_match": true,
|
|
"causal_flags_match": true,
|
|
"trace_id": "lSYWnMUueaZgc2"
|
|
}
|
|
]
|
|
} |