{ "run_id": "eval-h3k8TyTFuu", "timestamp": "2026-03-23T16:30:36.413Z", "mode": "single-pass-strict", "use_mock": false, "prompt_version": "normalizer_v1_1_2_1", "dataset": { "source": "file", "file": "normalizer_eval_v1_1_2_1_30cases.json" }, "cases_total": 30, "metrics": { "schema_validation_pass_rate": 100, "intent_class_accuracy": 70, "route_hint_accuracy": 80, "causal_flag_accuracy": 60, "high_confidence_error_rate": 3.33 }, "baseline_metrics": { "schema_validation_pass_rate": 100, "intent_class_accuracy": 72.73, "route_hint_accuracy": 90.91, "causal_flag_accuracy": 81.82, "high_confidence_error_rate": 9.09 }, "baseline_delta": { "schema_validation_pass_rate": 0, "intent_class_accuracy": -2.73, "route_hint_accuracy": -10.91, "causal_flag_accuracy": -21.82, "high_confidence_error_rate": -5.76 }, "class_accuracy": { "cross_entity": { "total": 14, "passed": 14, "accuracy_percent": 100 }, "anomaly_probe": { "total": 6, "passed": 3, "accuracy_percent": 50 }, "heavy_analytical": { "total": 5, "passed": 1, "accuracy_percent": 20 }, "rule_based_account_control": { "total": 5, "passed": 3, "accuracy_percent": 60 } }, "budget": { "requests_total": 30, "retries_used": 0, "guidance": { "forensic_calls_max": 10, "final_eval_calls_max": 30, "target_total_calls_max": 40, "hard_cap_calls_max": 45 } }, "mismatches": [ { "case_id": "V1121-B1-06", "expected_intent_class": "anomaly_probe", "actual_intent_class": "cross_entity", "expected_route_hint": "store_feature_risk", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "comment": "Route chosen correctly, but intent_class drifted into a neighboring taxonomy bucket.", "trace_id": "UKCM7zvsU6WUSG" }, { "case_id": "V1121-B2-01", "expected_intent_class": "heavy_analytical", "actual_intent_class": "cross_entity", "expected_route_hint": "batch_refresh_then_store", "actual_route_hint": "hybrid_store_plus_live", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": true, "needs_evidence": false }, "comment": "Both intent and route misclassified; likely lexical ambiguity in causal vs risk wording.", "trace_id": "DjcpTNQM8KgQCi" }, { "case_id": "V1121-B2-03", "expected_intent_class": "heavy_analytical", "actual_intent_class": "cross_entity", "expected_route_hint": "batch_refresh_then_store", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": true, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "comment": "Both intent and route misclassified; likely lexical ambiguity in causal vs risk wording.", "trace_id": "_ghrU-zDCd7_58" }, { "case_id": "V1121-B2-06", "expected_intent_class": "heavy_analytical", "actual_intent_class": "cross_entity", "expected_route_hint": "batch_refresh_then_store", "actual_route_hint": "hybrid_store_plus_live", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "comment": "Both intent and route misclassified; likely lexical ambiguity in causal vs risk wording.", "trace_id": "vfAte4AVljlOJD" }, { "case_id": "V1121-B3-01", "expected_intent_class": "cross_entity", "actual_intent_class": "cross_entity", "expected_route_hint": "hybrid_store_plus_live", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "comment": "Intent understood, but route_hint selected a weaker execution route.", "trace_id": "DKB3P_fnbRPEQx" }, { "case_id": "V1121-B3-02", "expected_intent_class": "cross_entity", "actual_intent_class": "cross_entity", "expected_route_hint": "hybrid_store_plus_live", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "comment": "Intent understood, but route_hint selected a weaker execution route.", "trace_id": "dG5iqslpfsCAs-" }, { "case_id": "V1121-B3-03", "expected_intent_class": "anomaly_probe", "actual_intent_class": "anomaly_probe", "expected_route_hint": "store_feature_risk", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "comment": "Causal flags are inconsistent with expected relationship depth.", "trace_id": "R7Qmh0qYqvLtFM" }, { "case_id": "V1121-B3-05", "expected_intent_class": "heavy_analytical", "actual_intent_class": "cross_entity", "expected_route_hint": "batch_refresh_then_store", "actual_route_hint": "hybrid_store_plus_live", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "comment": "Both intent and route misclassified; likely lexical ambiguity in causal vs risk wording.", "trace_id": "68el6FsqLTj0Wr" }, { "case_id": "V1121-B4-02", "expected_intent_class": "anomaly_probe", "actual_intent_class": "anomaly_probe", "expected_route_hint": "store_feature_risk", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "comment": "Causal flags are inconsistent with expected relationship depth.", "trace_id": "OeCwt50KwUWf0j" }, { "case_id": "V1121-B5-01", "expected_intent_class": "rule_based_account_control", "actual_intent_class": "cross_entity", "expected_route_hint": "store_feature_risk", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "comment": "Route chosen correctly, but intent_class drifted into a neighboring taxonomy bucket.", "trace_id": "6QkkgoRcMhK0Gi" }, { "case_id": "V1121-B5-02", "expected_intent_class": "anomaly_probe", "actual_intent_class": "anomaly_probe", "expected_route_hint": "store_feature_risk", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "comment": "Causal flags are inconsistent with expected relationship depth.", "trace_id": "0lcF5KNdyHtHss" }, { "case_id": "V1121-B5-03", "expected_intent_class": "anomaly_probe", "actual_intent_class": "cross_entity", "expected_route_hint": "store_feature_risk", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "comment": "Route chosen correctly, but intent_class drifted into a neighboring taxonomy bucket.", "trace_id": "dEMSXmj7nUFesp" }, { "case_id": "V1121-B6-02", "expected_intent_class": "anomaly_probe", "actual_intent_class": "cross_entity", "expected_route_hint": "store_feature_risk", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "comment": "Route chosen correctly, but intent_class drifted into a neighboring taxonomy bucket.", "trace_id": "gZT6epMO1Vz9Yq" }, { "case_id": "V1121-B7-02", "expected_intent_class": "rule_based_account_control", "actual_intent_class": "anomaly_probe", "expected_route_hint": "store_feature_risk", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "comment": "Route chosen correctly, but intent_class drifted into a neighboring taxonomy bucket.", "trace_id": "0lNcvDtvYn5C63" } ], "bad_confidence_cases": [ { "case_id": "V1121-B2-06", "confidence_overall": "high", "intent_match": false, "route_match": false, "causal_match": false, "trace_id": "vfAte4AVljlOJD" } ], "results": [ { "case_id": "V1121-B1-01", "raw_question": "По каким поставщикам у нас на конец месяца остались хвосты, которые уже не похожи на обычную задержку документов, а выглядят как реальная проблема в цепочке?", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "cross_entity", "actual_intent_class": "cross_entity", "expected_route_hint": "hybrid_store_plus_live", "actual_route_hint": "hybrid_store_plus_live", "expected_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "7L4ARgMdcABam4", "request_count_for_case": 1 }, { "case_id": "V1121-B1-02", "raw_question": "Где по покупателям у нас висит история \"отгрузили - денег нет - закрытия нет\", и по каким контрагентам это уже требует ручной проверки?", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "cross_entity", "actual_intent_class": "cross_entity", "expected_route_hint": "hybrid_store_plus_live", "actual_route_hint": "hybrid_store_plus_live", "expected_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "fzUNOFmwmU2v_A", "request_count_for_case": 1 }, { "case_id": "V1121-B1-03", "raw_question": "Покажи контрагентов, по которым сальдо у нас, скорее всего, не совпадет с их актом сверки, если его запросить прямо сейчас.", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "cross_entity", "actual_intent_class": "cross_entity", "expected_route_hint": "hybrid_store_plus_live", "actual_route_hint": "hybrid_store_plus_live", "expected_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "clytLLPPo6L-ZM", "request_count_for_case": 1 }, { "case_id": "V1121-B1-04", "raw_question": "Где у нас есть оплаты, но не хватает документов, которые должны были закрыть взаиморасчеты?", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "cross_entity", "actual_intent_class": "cross_entity", "expected_route_hint": "hybrid_store_plus_live", "actual_route_hint": "hybrid_store_plus_live", "expected_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "2EPBctOTQBi61R", "request_count_for_case": 1 }, { "case_id": "V1121-B1-05", "raw_question": "По каким контрагентам, наоборот, документы есть, а нормального закрытия оплатами не видно?", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "cross_entity", "actual_intent_class": "cross_entity", "expected_route_hint": "hybrid_store_plus_live", "actual_route_hint": "hybrid_store_plus_live", "expected_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "pH8V994Y8zzGru", "request_count_for_case": 1 }, { "case_id": "V1121-B1-06", "raw_question": "Есть ли такие зависшие авансы, которые уже давно надо было либо закрыть, либо хотя бы перепроверить руками?", "validation_passed": true, "intent_match": false, "route_match": true, "causal_flags_match": false, "expected_intent_class": "anomaly_probe", "actual_intent_class": "cross_entity", "expected_route_hint": "store_feature_risk", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "UKCM7zvsU6WUSG", "request_count_for_case": 1 }, { "case_id": "V1121-B2-01", "raw_question": "Какие реализации на конец периода выглядят так, будто они зависли и будут портить картину по выручке, если их не проверить заранее?", "validation_passed": true, "intent_match": false, "route_match": false, "causal_flags_match": false, "expected_intent_class": "heavy_analytical", "actual_intent_class": "cross_entity", "expected_route_hint": "batch_refresh_then_store", "actual_route_hint": "hybrid_store_plus_live", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": true, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "DjcpTNQM8KgQCi", "request_count_for_case": 1 }, { "case_id": "V1121-B2-02", "raw_question": "По каким отгрузкам видно, что проблема не просто в том, что клиент не оплатил, а в том, что сама связка документов собрана криво?", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "cross_entity", "actual_intent_class": "cross_entity", "expected_route_hint": "hybrid_store_plus_live", "actual_route_hint": "hybrid_store_plus_live", "expected_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "b8JTHbxO6YuBij", "request_count_for_case": 1 }, { "case_id": "V1121-B2-03", "raw_question": "Покажи реализации, где хвост выглядит особенно неприятно: сумма не маленькая, возраст хвоста уже заметный, и при этом не видно нормального завершения цепочки.", "validation_passed": true, "intent_match": false, "route_match": false, "causal_flags_match": false, "expected_intent_class": "heavy_analytical", "actual_intent_class": "cross_entity", "expected_route_hint": "batch_refresh_then_store", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": true, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "_ghrU-zDCd7_58", "request_count_for_case": 1 }, { "case_id": "V1121-B2-04", "raw_question": "Где по 90/62 история похожа на \"вроде все проведено, но если копнуть, закрытие держится на кривой связке\"?", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "cross_entity", "actual_intent_class": "cross_entity", "expected_route_hint": "hybrid_store_plus_live", "actual_route_hint": "hybrid_store_plus_live", "expected_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "SBMs3pLfp4FCdz", "request_count_for_case": 1 }, { "case_id": "V1121-B2-05", "raw_question": "Есть ли случаи, где реализация попала в период, а подтверждающие документы или оплата до сих пор живут в какой-то полуразобранной логике?", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "cross_entity", "actual_intent_class": "cross_entity", "expected_route_hint": "hybrid_store_plus_live", "actual_route_hint": "hybrid_store_plus_live", "expected_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "elTaYImxTBzijg", "request_count_for_case": 1 }, { "case_id": "V1121-B2-06", "raw_question": "По каким продажам на конец месяца видно, что бухгалтер потом будет долго распутывать, почему все это не сошлось нормально?", "validation_passed": true, "intent_match": false, "route_match": false, "causal_flags_match": false, "expected_intent_class": "heavy_analytical", "actual_intent_class": "cross_entity", "expected_route_hint": "batch_refresh_then_store", "actual_route_hint": "hybrid_store_plus_live", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "high", "trace_id": "vfAte4AVljlOJD", "request_count_for_case": 1 }, { "case_id": "V1121-B3-01", "raw_question": "Какие банковские движения выглядят так, будто выписка есть, а нормального отражения в учете под ней не хватает?", "validation_passed": true, "intent_match": true, "route_match": false, "causal_flags_match": true, "expected_intent_class": "cross_entity", "actual_intent_class": "cross_entity", "expected_route_hint": "hybrid_store_plus_live", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "DKB3P_fnbRPEQx", "request_count_for_case": 1 }, { "case_id": "V1121-B3-02", "raw_question": "Где по банку можно заподозрить, что документ и проводка вроде есть, но логика операции все равно не собрана в нормальную цепочку?", "validation_passed": true, "intent_match": true, "route_match": false, "causal_flags_match": true, "expected_intent_class": "cross_entity", "actual_intent_class": "cross_entity", "expected_route_hint": "hybrid_store_plus_live", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "dG5iqslpfsCAs-", "request_count_for_case": 1 }, { "case_id": "V1121-B3-03", "raw_question": "Есть ли движения по счету 51, которые выглядят корректно по сумме, но по смыслу оставляют после себя подозрительный хвост?", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": false, "expected_intent_class": "anomaly_probe", "actual_intent_class": "anomaly_probe", "expected_route_hint": "store_feature_risk", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "R7Qmh0qYqvLtFM", "request_count_for_case": 1 }, { "case_id": "V1121-B3-04", "raw_question": "Покажи банковские кейсы, где, скорее всего, проблема не в платеже как таковом, а в том, что он не туда лег или не тем документом закрылся.", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "cross_entity", "actual_intent_class": "cross_entity", "expected_route_hint": "hybrid_store_plus_live", "actual_route_hint": "hybrid_store_plus_live", "expected_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "ZcOCCDGyPv7OXQ", "request_count_for_case": 1 }, { "case_id": "V1121-B3-05", "raw_question": "Где банк и бухгалтерский контур, скорее всего, расходятся не по одной строке, а по паттерну, который уже начинает повторяться?", "validation_passed": true, "intent_match": false, "route_match": false, "causal_flags_match": false, "expected_intent_class": "heavy_analytical", "actual_intent_class": "cross_entity", "expected_route_hint": "batch_refresh_then_store", "actual_route_hint": "hybrid_store_plus_live", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "68el6FsqLTj0Wr", "request_count_for_case": 1 }, { "case_id": "V1121-B4-01", "raw_question": "Какие товарные позиции выглядят так, будто их уже продавали, а нормального прихода под них в базе не видно?", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "cross_entity", "actual_intent_class": "cross_entity", "expected_route_hint": "hybrid_store_plus_live", "actual_route_hint": "hybrid_store_plus_live", "expected_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "QjumNdr-CETXfz", "request_count_for_case": 1 }, { "case_id": "V1121-B4-02", "raw_question": "Где по товарам у нас отрицательные или подозрительные остатки, которые, скорее всего, связаны не с жизнью, а с ошибкой в учете?", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": false, "expected_intent_class": "anomaly_probe", "actual_intent_class": "anomaly_probe", "expected_route_hint": "store_feature_risk", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "OeCwt50KwUWf0j", "request_count_for_case": 1 }, { "case_id": "V1121-B4-03", "raw_question": "Есть ли случаи, где приход и реализация вроде есть оба, но даты между ними выглядят так, будто кто-то завел документы задним числом или с ошибкой?", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "cross_entity", "actual_intent_class": "cross_entity", "expected_route_hint": "hybrid_store_plus_live", "actual_route_hint": "hybrid_store_plus_live", "expected_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "57yG2XnBCG74ZO", "request_count_for_case": 1 }, { "case_id": "V1121-B4-04", "raw_question": "Покажи товарные хвосты, которые сильнее всего искажают картину периода и требуют проверки до закрытия месяца.", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "heavy_analytical", "actual_intent_class": "heavy_analytical", "expected_route_hint": "batch_refresh_then_store", "actual_route_hint": "batch_refresh_then_store", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": false, "needs_ranking": true, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": true, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "AvpSzYOujxuRJZ", "request_count_for_case": 1 }, { "case_id": "V1121-B4-05", "raw_question": "Где по складу и реализации видно, что себестоимость продажи подтверждена слабо или вообще опирается на кривую цепочку?", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "cross_entity", "actual_intent_class": "cross_entity", "expected_route_hint": "hybrid_store_plus_live", "actual_route_hint": "hybrid_store_plus_live", "expected_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "n7E9BbeQif1ag5", "request_count_for_case": 1 }, { "case_id": "V1121-B5-01", "raw_question": "Что сейчас лежит на 10 счете так, будто это уже давно надо было либо списать, либо хотя бы проверить, почему оно до сих пор висит?", "validation_passed": true, "intent_match": false, "route_match": true, "causal_flags_match": false, "expected_intent_class": "rule_based_account_control", "actual_intent_class": "cross_entity", "expected_route_hint": "store_feature_risk", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "6QkkgoRcMhK0Gi", "request_count_for_case": 1 }, { "case_id": "V1121-B5-02", "raw_question": "Есть ли материалы, по которым остаток выглядит нелогично: движения были, хозяйственная логика слабая, а в учете все еще что-то торчит?", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": false, "expected_intent_class": "anomaly_probe", "actual_intent_class": "anomaly_probe", "expected_route_hint": "store_feature_risk", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "0lcF5KNdyHtHss", "request_count_for_case": 1 }, { "case_id": "V1121-B5-03", "raw_question": "Покажи позиции по материалам, где возможен эффект \"вроде сумма не огромная, но учетная логика выглядит криво\".", "validation_passed": true, "intent_match": false, "route_match": true, "causal_flags_match": false, "expected_intent_class": "anomaly_probe", "actual_intent_class": "cross_entity", "expected_route_hint": "store_feature_risk", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "dEMSXmj7nUFesp", "request_count_for_case": 1 }, { "case_id": "V1121-B6-01", "raw_question": "Какие записи на 97 счете больше всего похожи на ошибку в датах начала, конца или самом сроке списания?", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "rule_based_account_control", "actual_intent_class": "rule_based_account_control", "expected_route_hint": "store_feature_risk", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "CWLmVHuj0_EbgR", "request_count_for_case": 1 }, { "case_id": "V1121-B6-02", "raw_question": "Есть ли такие расходы будущих периодов, которые заведены, но по ним не видно нормальной ежемесячной жизни, как будто запись повисла сама по себе?", "validation_passed": true, "intent_match": false, "route_match": true, "causal_flags_match": false, "expected_intent_class": "anomaly_probe", "actual_intent_class": "cross_entity", "expected_route_hint": "store_feature_risk", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": true, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "gZT6epMO1Vz9Yq", "request_count_for_case": 1 }, { "case_id": "V1121-B6-03", "raw_question": "Покажи кейсы по 97 счету, где срок документа и срок списания визуально противоречат друг другу.", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "rule_based_account_control", "actual_intent_class": "rule_based_account_control", "expected_route_hint": "store_feature_risk", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "ELh5esCGqt7MjA", "request_count_for_case": 1 }, { "case_id": "V1121-B7-01", "raw_question": "Есть ли основные средства, по которым параметры карточки выглядят так, будто амортизацию им задали не по логике объекта, а \"как получилось\"?", "validation_passed": true, "intent_match": true, "route_match": true, "causal_flags_match": true, "expected_intent_class": "rule_based_account_control", "actual_intent_class": "rule_based_account_control", "expected_route_hint": "store_feature_risk", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": false, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "JalgiJUrUL9DHW", "request_count_for_case": 1 }, { "case_id": "V1121-B7-02", "raw_question": "Покажи объекты ОС, где риск не в сумме, а в том, что карточка и логика начисления выглядят подозрительно и могут аукнуться позже.", "validation_passed": true, "intent_match": false, "route_match": true, "causal_flags_match": false, "expected_intent_class": "rule_based_account_control", "actual_intent_class": "anomaly_probe", "expected_route_hint": "store_feature_risk", "actual_route_hint": "store_feature_risk", "expected_requires": { "needs_cross_entity_join": false, "needs_causal_chain": false }, "actual_requires": { "needs_cross_entity_join": false, "needs_causal_chain": true, "needs_exact_object_trace": false, "needs_ranking": false, "needs_anomaly_summary": true, "needs_runtime_truth": false, "needs_period_cut": false, "needs_evidence": false }, "confidence_overall": "medium", "trace_id": "0lNcvDtvYn5C63", "request_count_for_case": 1 } ] }