NODEDC_1C/llm_normalizer/data/eval_cases/eval-wnexicBv1L.report.json

112 lines
3.2 KiB
JSON

{
"run_id": "eval-wnexicBv1L",
"timestamp": "2026-03-26T17:04:55.449Z",
"mode": "single-pass-strict",
"use_mock": true,
"prompt_version": "normalizer_v2_0_2",
"schema_version": "v2_0_2",
"dataset": {
"source": "inline_raw_questions",
"file": null,
"raw_questions_count": 2
},
"cases_total": 2,
"metrics": {
"schema_validation_pass_rate": 100,
"scope_detection_accuracy": null,
"scope_in_scope_rate": 100,
"multi_intent_detected_rate": 0,
"clarification_required_rate": 0,
"avg_fragments_per_message": 1,
"out_of_scope_fragment_rate": 0,
"routed_fragment_rate": 100,
"no_route_fragment_rate": 0,
"route_resolution_accuracy": null,
"no_route_precision": null,
"false_no_route_rate": null,
"execution_state_consistency_rate": 100,
"executable_with_soft_assumptions_rate": 100,
"soft_assumption_used_fragment_rate": 100,
"clarification_precision": null,
"clarification_recall": null,
"false_clarification_rate": null
},
"budget": {
"requests_total": 0,
"retries_used": 0
},
"clarification_eval": {
"labeled_cases": 0,
"true_positive": 0,
"false_positive": 0,
"false_negative": 0
},
"route_eval": {
"labeled_cases": 0,
"correct_cases": 0,
"expected_routed_cases": 0,
"no_route_true_positive": 0,
"no_route_false_positive": 0
},
"scope_eval": {
"labeled_cases": 0,
"correct_cases": 0
},
"execution_state_eval": {
"checks_total": 2,
"checks_passed": 2
},
"route_distribution": {
"store_feature_risk": 2
},
"fallback_distribution": {
"none": 2
},
"results": [
{
"case_id": "BQ-001",
"raw_question": "Проверь счет 60 за июнь 2020",
"validation_passed": true,
"message_in_scope": true,
"scope_confidence": "high",
"contains_multiple_tasks": false,
"fragments_total": 1,
"in_scope_fragments": 1,
"out_of_scope_fragments": 0,
"unclear_fragments": 0,
"fallback_type": "none",
"predicted_route_status": "routed",
"expected_route_status": null,
"predicted_no_route_reason": null,
"expected_no_route_reason": null,
"predicted_clarification_required": false,
"expected_clarification_required": null,
"executable_with_soft_assumptions_fragments": 1,
"trace_id": "LChjIRCQvUVBJI",
"request_count_for_case": 0
},
{
"case_id": "BQ-002",
"raw_question": "Покажи риски по счету 97",
"validation_passed": true,
"message_in_scope": true,
"scope_confidence": "high",
"contains_multiple_tasks": false,
"fragments_total": 1,
"in_scope_fragments": 1,
"out_of_scope_fragments": 0,
"unclear_fragments": 0,
"fallback_type": "none",
"predicted_route_status": "routed",
"expected_route_status": null,
"predicted_no_route_reason": null,
"expected_no_route_reason": null,
"predicted_clarification_required": false,
"expected_clarification_required": null,
"executable_with_soft_assumptions_fragments": 1,
"trace_id": "jBg8Projzr0MZ0",
"request_count_for_case": 0
}
]
}