{ "run_id": "eval-xk1NE5ndVV", "timestamp": "2026-03-23T14:35:48.176Z", "mode": "single-pass-strict", "use_mock": true, "prompt_version": "normalizer_v1_1", "dataset": { "source": "file", "file": "normalizer_eval_v1_1_30cases.json" }, "cases_total": 30, "metrics": { "schema_validation_pass_rate": 100, "intent_class_accuracy": 83.33, "route_hint_accuracy": 100, "causal_flag_accuracy": 100, "high_confidence_error_rate": 0 }, "baseline_metrics": { "schema_validation_pass_rate": 100, "intent_class_accuracy": 72.73, "route_hint_accuracy": 90.91, "causal_flag_accuracy": 81.82, "high_confidence_error_rate": 9.09 }, "baseline_delta": { "schema_validation_pass_rate": 0, "intent_class_accuracy": 10.6, "route_hint_accuracy": 9.09, "causal_flag_accuracy": 18.18, "high_confidence_error_rate": -9.09 }, "class_accuracy": { "cross_entity": { "total": 10, "passed": 10, "accuracy_percent": 100 }, "heavy_analytical": { "total": 5, "passed": 5, "accuracy_percent": 100 }, "drilldown_explain": { "total": 5, "passed": 5, "accuracy_percent": 100 }, "rule_based_account_control": { "total": 5, "passed": 5, "accuracy_percent": 100 }, "anomaly_probe": { "total": 2, "passed": 0, "accuracy_percent": 0 }, "ambiguous_human_query": { "total": 1, "passed": 0, "accuracy_percent": 0 }, "period_close_risk": { "total": 2, "passed": 0, "accuracy_percent": 0 } }, "budget": { "requests_total": 0, "retries_used": 0, "guidance": { "forensic_calls_max": 10, "final_eval_calls_max": 30, "target_total_calls_max": 40, "hard_cap_calls_max": 45 } }, "mismatches": [ { "case_id": "NQ-005", "expected_intent_class": "anomaly_probe", "actual_intent_class": "rule_based_account_control", "expected_route_hint": "store_feature_risk", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "comment": "Route chosen correctly, but intent_class drifted into a neighboring taxonomy bucket.", "trace_id": "lO6b7E4iQGlEAf" }, { "case_id": "NQ-009", "expected_intent_class": "ambiguous_human_query", "actual_intent_class": "heavy_analytical", "expected_route_hint": "batch_refresh_then_store", "actual_route_hint": "batch_refresh_then_store", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": false, "needs_ranking": true, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": true, "needs_evidence": false }, "comment": "Route chosen correctly, but intent_class drifted into a neighboring taxonomy bucket.", "trace_id": "Gk7UiRhZg-AapE" }, { "case_id": "V11-OT-003", "expected_intent_class": "period_close_risk", "actual_intent_class": "heavy_analytical", "expected_route_hint": "batch_refresh_then_store", "actual_route_hint": "batch_refresh_then_store", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": false, "needs_ranking": true, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": true, "needs_evidence": false }, "comment": "Route chosen correctly, but intent_class drifted into a neighboring taxonomy bucket.", "trace_id": "i4o4wkvxfHKUpe" }, { "case_id": "V11-OT-004", "expected_intent_class": "anomaly_probe", "actual_intent_class": "rule_based_account_control", "expected_route_hint": "store_feature_risk", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": true, "needs_evidence": false }, "comment": "Route chosen correctly, but intent_class drifted into a neighboring taxonomy bucket.", "trace_id": "2riratZ9G8TMN_" }, { "case_id": "V11-OT-005", "expected_intent_class": "period_close_risk", "actual_intent_class": "heavy_analytical", "expected_route_hint": "batch_refresh_then_store", "actual_route_hint": "batch_refresh_then_store", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": false, "needs_ranking": true, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "comment": "Route chosen correctly, but intent_class drifted into a neighboring taxonomy bucket.", "trace_id": "aw6boDZZ59P2sF" } ], "bad_confidence_cases": [], "results": [ { "case_id": "NQ-001", "raw_question": "По каким поставщикам на конец июня не бьются взаиморасчеты, покажи документы, оплаты и хвосты.", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "cross_entity", "actual_intent_class": "cross_entity", "expected_route_hint": "hybrid_store_plus_live", "actual_route_hint": "hybrid_store_plus_live", "expected_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": true, "needs_evidence": true }, "confidence_overall": "medium", "trace_id": "M0qLQbv0uSFcP_", "request_count_for_case": 0 }, { "case_id": "NQ-006", "raw_question": "По каким реализациям 90/62 хвосты не закрылись оплатой, разложи по цепочке документов.", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "cross_entity", "actual_intent_class": "cross_entity", "expected_route_hint": "hybrid_store_plus_live", "actual_route_hint": "hybrid_store_plus_live", "expected_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": true }, "confidence_overall": "low", "trace_id": "GRYZNvT3wt8Z2G", "request_count_for_case": 0 }, { "case_id": "V11-CE-003", "raw_question": "Где в июне не сходится 60/51: разложи по документу, оплате и закрывающему, чем подтверждается каждый шаг.", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "cross_entity", "actual_intent_class": "cross_entity", "expected_route_hint": "hybrid_store_plus_live", "actual_route_hint": "hybrid_store_plus_live", "expected_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": true, "needs_evidence": true }, "confidence_overall": "medium", "trace_id": "l-k8uL2wR12Yu7", "request_count_for_case": 0 }, { "case_id": "V11-CE-004", "raw_question": "Разложи по контрагентам цепочку: отгрузка -> оплата -> закрывающий, чтобы понять где рвется подтверждение.", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "cross_entity", "actual_intent_class": "cross_entity", "expected_route_hint": "hybrid_store_plus_live", "actual_route_hint": "hybrid_store_plus_live", "expected_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": true }, "confidence_overall": "low", "trace_id": "Dq25fAd3L1t5EM", "request_count_for_case": 0 }, { "case_id": "V11-CE-005", "raw_question": "По поставщикам где повисло в цепочке поступление-оплата-закрытие по 60, покажи проблемные связки.", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "cross_entity", "actual_intent_class": "cross_entity", "expected_route_hint": "hybrid_store_plus_live", "actual_route_hint": "hybrid_store_plus_live", "expected_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": true }, "confidence_overall": "low", "trace_id": "nC1bMGxs-fCBso", "request_count_for_case": 0 }, { "case_id": "V11-CE-006", "raw_question": "Найди где по 62 не собралось: нужен разбор по документам, оплатам и проводкам с причинно-следственной цепочкой.", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "cross_entity", "actual_intent_class": "cross_entity", "expected_route_hint": "hybrid_store_plus_live", "actual_route_hint": "hybrid_store_plus_live", "expected_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": true }, "confidence_overall": "low", "trace_id": "9jmHTGw-seHVRN", "request_count_for_case": 0 }, { "case_id": "V11-CE-007", "raw_question": "Покажи по июню все случаи когда реализация без оплаты и где в цепочке ошибка подтверждения.", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "cross_entity", "actual_intent_class": "cross_entity", "expected_route_hint": "hybrid_store_plus_live", "actual_route_hint": "hybrid_store_plus_live", "expected_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": true, "needs_evidence": true }, "confidence_overall": "medium", "trace_id": "Shno6vSxK1KaU_", "request_count_for_case": 0 }, { "case_id": "V11-CE-008", "raw_question": "Сделай причинный разбор хвостов по 60: документ, оплата, проводка, закрывающий, где пошло криво.", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "cross_entity", "actual_intent_class": "cross_entity", "expected_route_hint": "hybrid_store_plus_live", "actual_route_hint": "hybrid_store_plus_live", "expected_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": true }, "confidence_overall": "low", "trace_id": "x6VKGs-iLjiEQZ", "request_count_for_case": 0 }, { "case_id": "V11-CE-009", "raw_question": "Почему у части покупателей не видно закрытия, разложи цепочку документов и оплат по июню.", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "cross_entity", "actual_intent_class": "cross_entity", "expected_route_hint": "hybrid_store_plus_live", "actual_route_hint": "hybrid_store_plus_live", "expected_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": true, "needs_evidence": true }, "confidence_overall": "medium", "trace_id": "x8dzpFWkQkXAaJ", "request_count_for_case": 0 }, { "case_id": "V11-CE-010", "raw_question": "У кого из контрагентов в июне хвосты между 60 и банком, разложи по документам/оплатам/закрывающим.", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "cross_entity", "actual_intent_class": "cross_entity", "expected_route_hint": "hybrid_store_plus_live", "actual_route_hint": "hybrid_store_plus_live", "expected_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": true, "needs_evidence": true }, "confidence_overall": "medium", "trace_id": "NdmcqZDJKUyktD", "request_count_for_case": 0 }, { "case_id": "NQ-002", "raw_question": "Сделай рейтинг самых рисковых хвостов перед закрытием периода за июнь.", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "heavy_analytical", "actual_intent_class": "heavy_analytical", "expected_route_hint": "batch_refresh_then_store", "actual_route_hint": "batch_refresh_then_store", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": false, "needs_ranking": true, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": true, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "sGbMyIfaek6Urk", "request_count_for_case": 0 }, { "case_id": "NQ-007", "raw_question": "Что у нас выглядит самым проблемным перед закрытием июня, если смотреть на компанию в целом?", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "heavy_analytical", "actual_intent_class": "heavy_analytical", "expected_route_hint": "batch_refresh_then_store", "actual_route_hint": "batch_refresh_then_store", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": false, "needs_ranking": true, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": true, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "d8EWv8U6yyHhsr", "request_count_for_case": 0 }, { "case_id": "V11-HA-003", "raw_question": "Собери топ-10 риск-зон учета по июню и приоритизируй, куда лезть сначала.", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "heavy_analytical", "actual_intent_class": "heavy_analytical", "expected_route_hint": "batch_refresh_then_store", "actual_route_hint": "batch_refresh_then_store", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": false, "needs_ranking": true, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": true, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "2Z_F539WXmzOKY", "request_count_for_case": 0 }, { "case_id": "V11-HA-004", "raw_question": "Дай обзорный риск-срез перед сдачей отчетности: где максимальная концентрация ошибок.", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "heavy_analytical", "actual_intent_class": "heavy_analytical", "expected_route_hint": "batch_refresh_then_store", "actual_route_hint": "batch_refresh_then_store", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": false, "needs_ranking": true, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "low", "trace_id": "WOLy5XIs2Gp3TH", "request_count_for_case": 0 }, { "case_id": "V11-HA-005", "raw_question": "Сделай приоритизированный обзор ручных проверок по компании за июнь.", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "heavy_analytical", "actual_intent_class": "heavy_analytical", "expected_route_hint": "batch_refresh_then_store", "actual_route_hint": "batch_refresh_then_store", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": false, "needs_ranking": true, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": true, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "D7GXkCRK4LYQsd", "request_count_for_case": 0 }, { "case_id": "NQ-003", "raw_question": "Покажи документ по номеру 000123 и строку проводки, нужен точный source-of-record.", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "drilldown_explain", "actual_intent_class": "drilldown_explain", "expected_route_hint": "live_mcp_drilldown", "actual_route_hint": "live_mcp_drilldown", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": true, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": true, "needs_period_cut": false, "needs_evidence": true }, "confidence_overall": "low", "trace_id": "LabIQZzDA0B3f9", "request_count_for_case": 0 }, { "case_id": "NQ-008", "raw_question": "Покажи по банку документ №TRX-88 и связанную проводку по 51.", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "drilldown_explain", "actual_intent_class": "drilldown_explain", "expected_route_hint": "live_mcp_drilldown", "actual_route_hint": "live_mcp_drilldown", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": true, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": true, "needs_period_cut": false, "needs_evidence": true }, "confidence_overall": "low", "trace_id": "9zwPrDybqODSUE", "request_count_for_case": 0 }, { "case_id": "V11-DD-003", "raw_question": "Покажи проводку по документу INV-2020-0615, нужна конкретная строка и источник.", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "drilldown_explain", "actual_intent_class": "drilldown_explain", "expected_route_hint": "live_mcp_drilldown", "actual_route_hint": "live_mcp_drilldown", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": true, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": true, "needs_period_cut": false, "needs_evidence": true }, "confidence_overall": "low", "trace_id": "J36zx_j3ooTYEv", "request_count_for_case": 0 }, { "case_id": "V11-DD-004", "raw_question": "Дай точечный drilldown по документу №PAY-441 и его проводке по 51.", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "drilldown_explain", "actual_intent_class": "drilldown_explain", "expected_route_hint": "live_mcp_drilldown", "actual_route_hint": "live_mcp_drilldown", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": true, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": true, "needs_period_cut": false, "needs_evidence": true }, "confidence_overall": "low", "trace_id": "jG7D_g3E-D_sMq", "request_count_for_case": 0 }, { "case_id": "V11-DD-005", "raw_question": "Покажи карточку конкретной операции DOC-7781 и связанную проводку.", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "drilldown_explain", "actual_intent_class": "drilldown_explain", "expected_route_hint": "live_mcp_drilldown", "actual_route_hint": "live_mcp_drilldown", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": true, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": true, "needs_period_cut": false, "needs_evidence": true }, "confidence_overall": "low", "trace_id": "fE_w5vzkNkzUzb", "request_count_for_case": 0 }, { "case_id": "NQ-004", "raw_question": "По 97 счету проверь, где возможна ошибка дат начала и окончания списания.", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "rule_based_account_control", "actual_intent_class": "rule_based_account_control", "expected_route_hint": "store_feature_risk", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "low", "trace_id": "3OILIBmF5h-YN-", "request_count_for_case": 0 }, { "case_id": "V11-RB-002", "raw_question": "Проверь контрольные правила по ОС: где ошибки в сроках амортизации и учетной группе.", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "rule_based_account_control", "actual_intent_class": "rule_based_account_control", "expected_route_hint": "store_feature_risk", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "low", "trace_id": "EshIW8vaOGxjhH", "request_count_for_case": 0 }, { "case_id": "V11-RB-003", "raw_question": "По 10 счету проверь где нарушены правила оценки остатков.", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "rule_based_account_control", "actual_intent_class": "rule_based_account_control", "expected_route_hint": "store_feature_risk", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "low", "trace_id": "llnpzqJoB6RxGY", "request_count_for_case": 0 }, { "case_id": "V11-RB-004", "raw_question": "По НДС на 68.02 найди нарушения контрольных правил расчета за июнь.", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "rule_based_account_control", "actual_intent_class": "rule_based_account_control", "expected_route_hint": "store_feature_risk", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": true, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "NJs3r150DkVdqZ", "request_count_for_case": 0 }, { "case_id": "V11-RB-005", "raw_question": "Проверь учетные настройки списания на 97 и покажи где высокий риск ручной ошибки.", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "rule_based_account_control", "actual_intent_class": "rule_based_account_control", "expected_route_hint": "store_feature_risk", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "low", "trace_id": "RhidN0FhNQqEk4", "request_count_for_case": 0 }, { "case_id": "NQ-005", "raw_question": "Есть ли аномальные материалы на счете 10, которые зависли и выглядят нелогично?", "validation_passed": true, "intent_match": false, "route_match": true, "causal_flags_match": true, "expected_intent_class": "anomaly_probe", "actual_intent_class": "rule_based_account_control", "expected_route_hint": "store_feature_risk", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "low", "trace_id": "lO6b7E4iQGlEAf", "request_count_for_case": 0 }, { "case_id": "NQ-009", "raw_question": "Где у нас пахнет ручной ошибкой по июню?", "validation_passed": true, "intent_match": false, "route_match": true, "causal_flags_match": true, "expected_intent_class": "ambiguous_human_query", "actual_intent_class": "heavy_analytical", "expected_route_hint": "batch_refresh_then_store", "actual_route_hint": "batch_refresh_then_store", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": false, "needs_ranking": true, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": true, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "Gk7UiRhZg-AapE", "request_count_for_case": 0 }, { "case_id": "V11-OT-003", "raw_question": "Перед закрытием периода что у нас может взорваться в последний день?", "validation_passed": true, "intent_match": false, "route_match": true, "causal_flags_match": true, "expected_intent_class": "period_close_risk", "actual_intent_class": "heavy_analytical", "expected_route_hint": "batch_refresh_then_store", "actual_route_hint": "batch_refresh_then_store", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": false, "needs_ranking": true, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": true, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "i4o4wkvxfHKUpe", "request_count_for_case": 0 }, { "case_id": "V11-OT-004", "raw_question": "Где по июню выглядит подозрительно, но без точечного документа, просто дай зоны риска.", "validation_passed": true, "intent_match": false, "route_match": true, "causal_flags_match": true, "expected_intent_class": "anomaly_probe", "actual_intent_class": "rule_based_account_control", "expected_route_hint": "store_feature_risk", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": true, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "2riratZ9G8TMN_", "request_count_for_case": 0 }, { "case_id": "V11-OT-005", "raw_question": "Че-то все криво на предзакрытии, где самые опасные места?", "validation_passed": true, "intent_match": false, "route_match": true, "causal_flags_match": true, "expected_intent_class": "period_close_risk", "actual_intent_class": "heavy_analytical", "expected_route_hint": "batch_refresh_then_store", "actual_route_hint": "batch_refresh_then_store", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": false, "needs_ranking": true, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "low", "trace_id": "aw6boDZZ59P2sF", "request_count_for_case": 0 } ] }