NODEDC_1C/llm_normalizer/data/eval_cases/eval-IpucvMroko.report.json

84 lines
2.3 KiB
JSON

{
"run_id": "eval-IpucvMroko",
"timestamp": "2026-03-23T17:59:01.977Z",
"mode": "single-pass-strict",
"use_mock": true,
"prompt_version": "normalizer_v2",
"schema_version": "v2",
"dataset": {
"source": "inline_raw_questions",
"file": null,
"raw_questions_count": 3
},
"cases_total": 3,
"metrics": {
"schema_validation_pass_rate": 100,
"scope_in_scope_rate": 33.33,
"multi_intent_detected_rate": 0,
"clarification_required_rate": 33.33,
"avg_fragments_per_message": 1,
"out_of_scope_fragment_rate": 33.33,
"routed_fragment_rate": 33.33,
"no_route_fragment_rate": 66.67
},
"budget": {
"requests_total": 0,
"retries_used": 0
},
"route_distribution": {
"store_feature_risk": 1,
"no_route": 2
},
"fallback_distribution": {
"clarification": 1,
"out_of_scope": 2
},
"results": [
{
"case_id": "BQ-001",
"raw_question": "Проверь хвосты по поставщикам и разложи цепочку",
"validation_passed": true,
"message_in_scope": true,
"scope_confidence": "high",
"contains_multiple_tasks": false,
"fragments_total": 1,
"in_scope_fragments": 1,
"out_of_scope_fragments": 0,
"unclear_fragments": 0,
"fallback_type": "clarification",
"trace_id": "lzwdL3W9zRCLVe",
"request_count_for_case": 0
},
{
"case_id": "BQ-002",
"raw_question": "Как вообще по ФСБУ",
"validation_passed": true,
"message_in_scope": false,
"scope_confidence": "low",
"contains_multiple_tasks": false,
"fragments_total": 1,
"in_scope_fragments": 0,
"out_of_scope_fragments": 1,
"unclear_fragments": 0,
"fallback_type": "out_of_scope",
"trace_id": "b2LgkaTILQ8a4G",
"request_count_for_case": 0
},
{
"case_id": "BQ-003",
"raw_question": "Покажи топ рисков за июнь 2020",
"validation_passed": true,
"message_in_scope": false,
"scope_confidence": "low",
"contains_multiple_tasks": false,
"fragments_total": 1,
"in_scope_fragments": 0,
"out_of_scope_fragments": 0,
"unclear_fragments": 1,
"fallback_type": "out_of_scope",
"trace_id": "DYWHGwbQ2K4p1K",
"request_count_for_case": 0
}
]
}