NODEDC_1C/llm_normalizer/data/eval_cases/eval-YxrhL2dCcH.report.json

103 lines
2.4 KiB
JSON

{
"run_id": "eval-YxrhL2dCcH",
"timestamp": "2026-03-23T13:55:23.242Z",
"cases_total": 11,
"metrics": {
"schema_validation_pass_rate": 100,
"intent_class_accuracy": 72.73,
"route_hint_accuracy": 90.91,
"causal_flag_accuracy": 81.82,
"high_confidence_error_rate": 9.09
},
"results": [
{
"case_id": "NQ-001",
"validation_passed": true,
"intent_match": true,
"route_match": true,
"causal_flags_match": true,
"trace_id": "WBCFIgEMf1OFC-"
},
{
"case_id": "NQ-002",
"validation_passed": true,
"intent_match": true,
"route_match": true,
"causal_flags_match": true,
"trace_id": "COIYD-5V_3EChj"
},
{
"case_id": "NQ-003",
"validation_passed": true,
"intent_match": true,
"route_match": true,
"causal_flags_match": true,
"trace_id": "8s-4eCbeXywVVK"
},
{
"case_id": "NQ-004",
"validation_passed": true,
"intent_match": false,
"route_match": true,
"causal_flags_match": false,
"trace_id": "h5BLdC1oBO0wY6"
},
{
"case_id": "NQ-005",
"validation_passed": true,
"intent_match": true,
"route_match": true,
"causal_flags_match": true,
"trace_id": "p7A-wb3EfMen5P"
},
{
"case_id": "NQ-006",
"validation_passed": true,
"intent_match": true,
"route_match": true,
"causal_flags_match": true,
"trace_id": "Um8MUe8yfaKLxw"
},
{
"case_id": "NQ-007",
"validation_passed": true,
"intent_match": true,
"route_match": true,
"causal_flags_match": true,
"trace_id": "I6V3tELpvi6k1i"
},
{
"case_id": "NQ-008",
"validation_passed": true,
"intent_match": false,
"route_match": true,
"causal_flags_match": false,
"trace_id": "iel5ScdccVZ4zT"
},
{
"case_id": "NQ-009",
"validation_passed": true,
"intent_match": false,
"route_match": false,
"causal_flags_match": true,
"trace_id": "1C9MATbKvo5FnF"
},
{
"case_id": "NQ-010",
"validation_passed": true,
"intent_match": true,
"route_match": true,
"causal_flags_match": true,
"trace_id": "ti9EWEi85p-RnD"
},
{
"case_id": "NQ-1774273974528",
"validation_passed": true,
"intent_match": true,
"route_match": true,
"causal_flags_match": true,
"trace_id": "lSYWnMUueaZgc2"
}
]
}