NODEDC_1C/llm_normalizer/data/eval_cases/eval-h3k8TyTFuu.report.json

1272 lines
48 KiB
JSON
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"run_id": "eval-h3k8TyTFuu",
"timestamp": "2026-03-23T16:30:36.413Z",
"mode": "single-pass-strict",
"use_mock": false,
"prompt_version": "normalizer_v1_1_2_1",
"dataset": {
"source": "file",
"file": "normalizer_eval_v1_1_2_1_30cases.json"
},
"cases_total": 30,
"metrics": {
"schema_validation_pass_rate": 100,
"intent_class_accuracy": 70,
"route_hint_accuracy": 80,
"causal_flag_accuracy": 60,
"high_confidence_error_rate": 3.33
},
"baseline_metrics": {
"schema_validation_pass_rate": 100,
"intent_class_accuracy": 72.73,
"route_hint_accuracy": 90.91,
"causal_flag_accuracy": 81.82,
"high_confidence_error_rate": 9.09
},
"baseline_delta": {
"schema_validation_pass_rate": 0,
"intent_class_accuracy": -2.73,
"route_hint_accuracy": -10.91,
"causal_flag_accuracy": -21.82,
"high_confidence_error_rate": -5.76
},
"class_accuracy": {
"cross_entity": {
"total": 14,
"passed": 14,
"accuracy_percent": 100
},
"anomaly_probe": {
"total": 6,
"passed": 3,
"accuracy_percent": 50
},
"heavy_analytical": {
"total": 5,
"passed": 1,
"accuracy_percent": 20
},
"rule_based_account_control": {
"total": 5,
"passed": 3,
"accuracy_percent": 60
}
},
"budget": {
"requests_total": 30,
"retries_used": 0,
"guidance": {
"forensic_calls_max": 10,
"final_eval_calls_max": 30,
"target_total_calls_max": 40,
"hard_cap_calls_max": 45
}
},
"mismatches": [
{
"case_id": "V1121-B1-06",
"expected_intent_class": "anomaly_probe",
"actual_intent_class": "cross_entity",
"expected_route_hint": "store_feature_risk",
"actual_route_hint": "store_feature_risk",
"expected_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": true,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"comment": "Route chosen correctly, but intent_class drifted into a neighboring taxonomy bucket.",
"trace_id": "UKCM7zvsU6WUSG"
},
{
"case_id": "V1121-B2-01",
"expected_intent_class": "heavy_analytical",
"actual_intent_class": "cross_entity",
"expected_route_hint": "batch_refresh_then_store",
"actual_route_hint": "hybrid_store_plus_live",
"expected_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": true,
"needs_runtime_truth": false,
"needs_period_cut": true,
"needs_evidence": false
},
"comment": "Both intent and route misclassified; likely lexical ambiguity in causal vs risk wording.",
"trace_id": "DjcpTNQM8KgQCi"
},
{
"case_id": "V1121-B2-03",
"expected_intent_class": "heavy_analytical",
"actual_intent_class": "cross_entity",
"expected_route_hint": "batch_refresh_then_store",
"actual_route_hint": "store_feature_risk",
"expected_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": true,
"needs_anomaly_summary": false,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"comment": "Both intent and route misclassified; likely lexical ambiguity in causal vs risk wording.",
"trace_id": "_ghrU-zDCd7_58"
},
{
"case_id": "V1121-B2-06",
"expected_intent_class": "heavy_analytical",
"actual_intent_class": "cross_entity",
"expected_route_hint": "batch_refresh_then_store",
"actual_route_hint": "hybrid_store_plus_live",
"expected_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": false,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"comment": "Both intent and route misclassified; likely lexical ambiguity in causal vs risk wording.",
"trace_id": "vfAte4AVljlOJD"
},
{
"case_id": "V1121-B3-01",
"expected_intent_class": "cross_entity",
"actual_intent_class": "cross_entity",
"expected_route_hint": "hybrid_store_plus_live",
"actual_route_hint": "store_feature_risk",
"expected_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": true,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"comment": "Intent understood, but route_hint selected a weaker execution route.",
"trace_id": "DKB3P_fnbRPEQx"
},
{
"case_id": "V1121-B3-02",
"expected_intent_class": "cross_entity",
"actual_intent_class": "cross_entity",
"expected_route_hint": "hybrid_store_plus_live",
"actual_route_hint": "store_feature_risk",
"expected_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": false,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"comment": "Intent understood, but route_hint selected a weaker execution route.",
"trace_id": "dG5iqslpfsCAs-"
},
{
"case_id": "V1121-B3-03",
"expected_intent_class": "anomaly_probe",
"actual_intent_class": "anomaly_probe",
"expected_route_hint": "store_feature_risk",
"actual_route_hint": "store_feature_risk",
"expected_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false
},
"actual_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": true,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"comment": "Causal flags are inconsistent with expected relationship depth.",
"trace_id": "R7Qmh0qYqvLtFM"
},
{
"case_id": "V1121-B3-05",
"expected_intent_class": "heavy_analytical",
"actual_intent_class": "cross_entity",
"expected_route_hint": "batch_refresh_then_store",
"actual_route_hint": "hybrid_store_plus_live",
"expected_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": true,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"comment": "Both intent and route misclassified; likely lexical ambiguity in causal vs risk wording.",
"trace_id": "68el6FsqLTj0Wr"
},
{
"case_id": "V1121-B4-02",
"expected_intent_class": "anomaly_probe",
"actual_intent_class": "anomaly_probe",
"expected_route_hint": "store_feature_risk",
"actual_route_hint": "store_feature_risk",
"expected_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": true,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"comment": "Causal flags are inconsistent with expected relationship depth.",
"trace_id": "OeCwt50KwUWf0j"
},
{
"case_id": "V1121-B5-01",
"expected_intent_class": "rule_based_account_control",
"actual_intent_class": "cross_entity",
"expected_route_hint": "store_feature_risk",
"actual_route_hint": "store_feature_risk",
"expected_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": false,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"comment": "Route chosen correctly, but intent_class drifted into a neighboring taxonomy bucket.",
"trace_id": "6QkkgoRcMhK0Gi"
},
{
"case_id": "V1121-B5-02",
"expected_intent_class": "anomaly_probe",
"actual_intent_class": "anomaly_probe",
"expected_route_hint": "store_feature_risk",
"actual_route_hint": "store_feature_risk",
"expected_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": true,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"comment": "Causal flags are inconsistent with expected relationship depth.",
"trace_id": "0lcF5KNdyHtHss"
},
{
"case_id": "V1121-B5-03",
"expected_intent_class": "anomaly_probe",
"actual_intent_class": "cross_entity",
"expected_route_hint": "store_feature_risk",
"actual_route_hint": "store_feature_risk",
"expected_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": true,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"comment": "Route chosen correctly, but intent_class drifted into a neighboring taxonomy bucket.",
"trace_id": "dEMSXmj7nUFesp"
},
{
"case_id": "V1121-B6-02",
"expected_intent_class": "anomaly_probe",
"actual_intent_class": "cross_entity",
"expected_route_hint": "store_feature_risk",
"actual_route_hint": "store_feature_risk",
"expected_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": true,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"comment": "Route chosen correctly, but intent_class drifted into a neighboring taxonomy bucket.",
"trace_id": "gZT6epMO1Vz9Yq"
},
{
"case_id": "V1121-B7-02",
"expected_intent_class": "rule_based_account_control",
"actual_intent_class": "anomaly_probe",
"expected_route_hint": "store_feature_risk",
"actual_route_hint": "store_feature_risk",
"expected_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false
},
"actual_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": true,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"comment": "Route chosen correctly, but intent_class drifted into a neighboring taxonomy bucket.",
"trace_id": "0lNcvDtvYn5C63"
}
],
"bad_confidence_cases": [
{
"case_id": "V1121-B2-06",
"confidence_overall": "high",
"intent_match": false,
"route_match": false,
"causal_match": false,
"trace_id": "vfAte4AVljlOJD"
}
],
"results": [
{
"case_id": "V1121-B1-01",
"raw_question": "По каким поставщикам у нас на конец месяца остались хвосты, которые уже не похожи на обычную задержку документов, а выглядят как реальная проблема в цепочке?",
"validation_passed": true,
"intent_match": true,
"route_match": true,
"causal_flags_match": true,
"expected_intent_class": "cross_entity",
"actual_intent_class": "cross_entity",
"expected_route_hint": "hybrid_store_plus_live",
"actual_route_hint": "hybrid_store_plus_live",
"expected_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": false,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "7L4ARgMdcABam4",
"request_count_for_case": 1
},
{
"case_id": "V1121-B1-02",
"raw_question": "Где по покупателям у нас висит история \"отгрузили - денег нет - закрытия нет\", и по каким контрагентам это уже требует ручной проверки?",
"validation_passed": true,
"intent_match": true,
"route_match": true,
"causal_flags_match": true,
"expected_intent_class": "cross_entity",
"actual_intent_class": "cross_entity",
"expected_route_hint": "hybrid_store_plus_live",
"actual_route_hint": "hybrid_store_plus_live",
"expected_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": true,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "fzUNOFmwmU2v_A",
"request_count_for_case": 1
},
{
"case_id": "V1121-B1-03",
"raw_question": "Покажи контрагентов, по которым сальдо у нас, скорее всего, не совпадет с их актом сверки, если его запросить прямо сейчас.",
"validation_passed": true,
"intent_match": true,
"route_match": true,
"causal_flags_match": true,
"expected_intent_class": "cross_entity",
"actual_intent_class": "cross_entity",
"expected_route_hint": "hybrid_store_plus_live",
"actual_route_hint": "hybrid_store_plus_live",
"expected_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": false,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "clytLLPPo6L-ZM",
"request_count_for_case": 1
},
{
"case_id": "V1121-B1-04",
"raw_question": "Где у нас есть оплаты, но не хватает документов, которые должны были закрыть взаиморасчеты?",
"validation_passed": true,
"intent_match": true,
"route_match": true,
"causal_flags_match": true,
"expected_intent_class": "cross_entity",
"actual_intent_class": "cross_entity",
"expected_route_hint": "hybrid_store_plus_live",
"actual_route_hint": "hybrid_store_plus_live",
"expected_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": false,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "2EPBctOTQBi61R",
"request_count_for_case": 1
},
{
"case_id": "V1121-B1-05",
"raw_question": "По каким контрагентам, наоборот, документы есть, а нормального закрытия оплатами не видно?",
"validation_passed": true,
"intent_match": true,
"route_match": true,
"causal_flags_match": true,
"expected_intent_class": "cross_entity",
"actual_intent_class": "cross_entity",
"expected_route_hint": "hybrid_store_plus_live",
"actual_route_hint": "hybrid_store_plus_live",
"expected_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": false,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "pH8V994Y8zzGru",
"request_count_for_case": 1
},
{
"case_id": "V1121-B1-06",
"raw_question": "Есть ли такие зависшие авансы, которые уже давно надо было либо закрыть, либо хотя бы перепроверить руками?",
"validation_passed": true,
"intent_match": false,
"route_match": true,
"causal_flags_match": false,
"expected_intent_class": "anomaly_probe",
"actual_intent_class": "cross_entity",
"expected_route_hint": "store_feature_risk",
"actual_route_hint": "store_feature_risk",
"expected_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": true,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "UKCM7zvsU6WUSG",
"request_count_for_case": 1
},
{
"case_id": "V1121-B2-01",
"raw_question": "Какие реализации на конец периода выглядят так, будто они зависли и будут портить картину по выручке, если их не проверить заранее?",
"validation_passed": true,
"intent_match": false,
"route_match": false,
"causal_flags_match": false,
"expected_intent_class": "heavy_analytical",
"actual_intent_class": "cross_entity",
"expected_route_hint": "batch_refresh_then_store",
"actual_route_hint": "hybrid_store_plus_live",
"expected_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": true,
"needs_runtime_truth": false,
"needs_period_cut": true,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "DjcpTNQM8KgQCi",
"request_count_for_case": 1
},
{
"case_id": "V1121-B2-02",
"raw_question": "По каким отгрузкам видно, что проблема не просто в том, что клиент не оплатил, а в том, что сама связка документов собрана криво?",
"validation_passed": true,
"intent_match": true,
"route_match": true,
"causal_flags_match": true,
"expected_intent_class": "cross_entity",
"actual_intent_class": "cross_entity",
"expected_route_hint": "hybrid_store_plus_live",
"actual_route_hint": "hybrid_store_plus_live",
"expected_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": false,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "b8JTHbxO6YuBij",
"request_count_for_case": 1
},
{
"case_id": "V1121-B2-03",
"raw_question": "Покажи реализации, где хвост выглядит особенно неприятно: сумма не маленькая, возраст хвоста уже заметный, и при этом не видно нормального завершения цепочки.",
"validation_passed": true,
"intent_match": false,
"route_match": false,
"causal_flags_match": false,
"expected_intent_class": "heavy_analytical",
"actual_intent_class": "cross_entity",
"expected_route_hint": "batch_refresh_then_store",
"actual_route_hint": "store_feature_risk",
"expected_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": true,
"needs_anomaly_summary": false,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "_ghrU-zDCd7_58",
"request_count_for_case": 1
},
{
"case_id": "V1121-B2-04",
"raw_question": "Где по 90/62 история похожа на \"вроде все проведено, но если копнуть, закрытие держится на кривой связке\"?",
"validation_passed": true,
"intent_match": true,
"route_match": true,
"causal_flags_match": true,
"expected_intent_class": "cross_entity",
"actual_intent_class": "cross_entity",
"expected_route_hint": "hybrid_store_plus_live",
"actual_route_hint": "hybrid_store_plus_live",
"expected_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": false,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "SBMs3pLfp4FCdz",
"request_count_for_case": 1
},
{
"case_id": "V1121-B2-05",
"raw_question": "Есть ли случаи, где реализация попала в период, а подтверждающие документы или оплата до сих пор живут в какой-то полуразобранной логике?",
"validation_passed": true,
"intent_match": true,
"route_match": true,
"causal_flags_match": true,
"expected_intent_class": "cross_entity",
"actual_intent_class": "cross_entity",
"expected_route_hint": "hybrid_store_plus_live",
"actual_route_hint": "hybrid_store_plus_live",
"expected_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": false,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "elTaYImxTBzijg",
"request_count_for_case": 1
},
{
"case_id": "V1121-B2-06",
"raw_question": "По каким продажам на конец месяца видно, что бухгалтер потом будет долго распутывать, почему все это не сошлось нормально?",
"validation_passed": true,
"intent_match": false,
"route_match": false,
"causal_flags_match": false,
"expected_intent_class": "heavy_analytical",
"actual_intent_class": "cross_entity",
"expected_route_hint": "batch_refresh_then_store",
"actual_route_hint": "hybrid_store_plus_live",
"expected_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": false,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"confidence_overall": "high",
"trace_id": "vfAte4AVljlOJD",
"request_count_for_case": 1
},
{
"case_id": "V1121-B3-01",
"raw_question": "Какие банковские движения выглядят так, будто выписка есть, а нормального отражения в учете под ней не хватает?",
"validation_passed": true,
"intent_match": true,
"route_match": false,
"causal_flags_match": true,
"expected_intent_class": "cross_entity",
"actual_intent_class": "cross_entity",
"expected_route_hint": "hybrid_store_plus_live",
"actual_route_hint": "store_feature_risk",
"expected_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": true,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "DKB3P_fnbRPEQx",
"request_count_for_case": 1
},
{
"case_id": "V1121-B3-02",
"raw_question": "Где по банку можно заподозрить, что документ и проводка вроде есть, но логика операции все равно не собрана в нормальную цепочку?",
"validation_passed": true,
"intent_match": true,
"route_match": false,
"causal_flags_match": true,
"expected_intent_class": "cross_entity",
"actual_intent_class": "cross_entity",
"expected_route_hint": "hybrid_store_plus_live",
"actual_route_hint": "store_feature_risk",
"expected_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": false,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "dG5iqslpfsCAs-",
"request_count_for_case": 1
},
{
"case_id": "V1121-B3-03",
"raw_question": "Есть ли движения по счету 51, которые выглядят корректно по сумме, но по смыслу оставляют после себя подозрительный хвост?",
"validation_passed": true,
"intent_match": true,
"route_match": true,
"causal_flags_match": false,
"expected_intent_class": "anomaly_probe",
"actual_intent_class": "anomaly_probe",
"expected_route_hint": "store_feature_risk",
"actual_route_hint": "store_feature_risk",
"expected_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false
},
"actual_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": true,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "R7Qmh0qYqvLtFM",
"request_count_for_case": 1
},
{
"case_id": "V1121-B3-04",
"raw_question": "Покажи банковские кейсы, где, скорее всего, проблема не в платеже как таковом, а в том, что он не туда лег или не тем документом закрылся.",
"validation_passed": true,
"intent_match": true,
"route_match": true,
"causal_flags_match": true,
"expected_intent_class": "cross_entity",
"actual_intent_class": "cross_entity",
"expected_route_hint": "hybrid_store_plus_live",
"actual_route_hint": "hybrid_store_plus_live",
"expected_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": false,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "ZcOCCDGyPv7OXQ",
"request_count_for_case": 1
},
{
"case_id": "V1121-B3-05",
"raw_question": "Где банк и бухгалтерский контур, скорее всего, расходятся не по одной строке, а по паттерну, который уже начинает повторяться?",
"validation_passed": true,
"intent_match": false,
"route_match": false,
"causal_flags_match": false,
"expected_intent_class": "heavy_analytical",
"actual_intent_class": "cross_entity",
"expected_route_hint": "batch_refresh_then_store",
"actual_route_hint": "hybrid_store_plus_live",
"expected_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": true,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "68el6FsqLTj0Wr",
"request_count_for_case": 1
},
{
"case_id": "V1121-B4-01",
"raw_question": "Какие товарные позиции выглядят так, будто их уже продавали, а нормального прихода под них в базе не видно?",
"validation_passed": true,
"intent_match": true,
"route_match": true,
"causal_flags_match": true,
"expected_intent_class": "cross_entity",
"actual_intent_class": "cross_entity",
"expected_route_hint": "hybrid_store_plus_live",
"actual_route_hint": "hybrid_store_plus_live",
"expected_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": false,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "QjumNdr-CETXfz",
"request_count_for_case": 1
},
{
"case_id": "V1121-B4-02",
"raw_question": "Где по товарам у нас отрицательные или подозрительные остатки, которые, скорее всего, связаны не с жизнью, а с ошибкой в учете?",
"validation_passed": true,
"intent_match": true,
"route_match": true,
"causal_flags_match": false,
"expected_intent_class": "anomaly_probe",
"actual_intent_class": "anomaly_probe",
"expected_route_hint": "store_feature_risk",
"actual_route_hint": "store_feature_risk",
"expected_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": true,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "OeCwt50KwUWf0j",
"request_count_for_case": 1
},
{
"case_id": "V1121-B4-03",
"raw_question": "Есть ли случаи, где приход и реализация вроде есть оба, но даты между ними выглядят так, будто кто-то завел документы задним числом или с ошибкой?",
"validation_passed": true,
"intent_match": true,
"route_match": true,
"causal_flags_match": true,
"expected_intent_class": "cross_entity",
"actual_intent_class": "cross_entity",
"expected_route_hint": "hybrid_store_plus_live",
"actual_route_hint": "hybrid_store_plus_live",
"expected_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": false,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "57yG2XnBCG74ZO",
"request_count_for_case": 1
},
{
"case_id": "V1121-B4-04",
"raw_question": "Покажи товарные хвосты, которые сильнее всего искажают картину периода и требуют проверки до закрытия месяца.",
"validation_passed": true,
"intent_match": true,
"route_match": true,
"causal_flags_match": true,
"expected_intent_class": "heavy_analytical",
"actual_intent_class": "heavy_analytical",
"expected_route_hint": "batch_refresh_then_store",
"actual_route_hint": "batch_refresh_then_store",
"expected_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false
},
"actual_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false,
"needs_exact_object_trace": false,
"needs_ranking": true,
"needs_anomaly_summary": true,
"needs_runtime_truth": false,
"needs_period_cut": true,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "AvpSzYOujxuRJZ",
"request_count_for_case": 1
},
{
"case_id": "V1121-B4-05",
"raw_question": "Где по складу и реализации видно, что себестоимость продажи подтверждена слабо или вообще опирается на кривую цепочку?",
"validation_passed": true,
"intent_match": true,
"route_match": true,
"causal_flags_match": true,
"expected_intent_class": "cross_entity",
"actual_intent_class": "cross_entity",
"expected_route_hint": "hybrid_store_plus_live",
"actual_route_hint": "hybrid_store_plus_live",
"expected_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": false,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "n7E9BbeQif1ag5",
"request_count_for_case": 1
},
{
"case_id": "V1121-B5-01",
"raw_question": "Что сейчас лежит на 10 счете так, будто это уже давно надо было либо списать, либо хотя бы проверить, почему оно до сих пор висит?",
"validation_passed": true,
"intent_match": false,
"route_match": true,
"causal_flags_match": false,
"expected_intent_class": "rule_based_account_control",
"actual_intent_class": "cross_entity",
"expected_route_hint": "store_feature_risk",
"actual_route_hint": "store_feature_risk",
"expected_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": false,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "6QkkgoRcMhK0Gi",
"request_count_for_case": 1
},
{
"case_id": "V1121-B5-02",
"raw_question": "Есть ли материалы, по которым остаток выглядит нелогично: движения были, хозяйственная логика слабая, а в учете все еще что-то торчит?",
"validation_passed": true,
"intent_match": true,
"route_match": true,
"causal_flags_match": false,
"expected_intent_class": "anomaly_probe",
"actual_intent_class": "anomaly_probe",
"expected_route_hint": "store_feature_risk",
"actual_route_hint": "store_feature_risk",
"expected_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": true,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "0lcF5KNdyHtHss",
"request_count_for_case": 1
},
{
"case_id": "V1121-B5-03",
"raw_question": "Покажи позиции по материалам, где возможен эффект \"вроде сумма не огромная, но учетная логика выглядит криво\".",
"validation_passed": true,
"intent_match": false,
"route_match": true,
"causal_flags_match": false,
"expected_intent_class": "anomaly_probe",
"actual_intent_class": "cross_entity",
"expected_route_hint": "store_feature_risk",
"actual_route_hint": "store_feature_risk",
"expected_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": true,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "dEMSXmj7nUFesp",
"request_count_for_case": 1
},
{
"case_id": "V1121-B6-01",
"raw_question": "Какие записи на 97 счете больше всего похожи на ошибку в датах начала, конца или самом сроке списания?",
"validation_passed": true,
"intent_match": true,
"route_match": true,
"causal_flags_match": true,
"expected_intent_class": "rule_based_account_control",
"actual_intent_class": "rule_based_account_control",
"expected_route_hint": "store_feature_risk",
"actual_route_hint": "store_feature_risk",
"expected_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false
},
"actual_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": false,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "CWLmVHuj0_EbgR",
"request_count_for_case": 1
},
{
"case_id": "V1121-B6-02",
"raw_question": "Есть ли такие расходы будущих периодов, которые заведены, но по ним не видно нормальной ежемесячной жизни, как будто запись повисла сама по себе?",
"validation_passed": true,
"intent_match": false,
"route_match": true,
"causal_flags_match": false,
"expected_intent_class": "anomaly_probe",
"actual_intent_class": "cross_entity",
"expected_route_hint": "store_feature_risk",
"actual_route_hint": "store_feature_risk",
"expected_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false
},
"actual_requires": {
"needs_cross_entity_join": true,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": true,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "gZT6epMO1Vz9Yq",
"request_count_for_case": 1
},
{
"case_id": "V1121-B6-03",
"raw_question": "Покажи кейсы по 97 счету, где срок документа и срок списания визуально противоречат друг другу.",
"validation_passed": true,
"intent_match": true,
"route_match": true,
"causal_flags_match": true,
"expected_intent_class": "rule_based_account_control",
"actual_intent_class": "rule_based_account_control",
"expected_route_hint": "store_feature_risk",
"actual_route_hint": "store_feature_risk",
"expected_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false
},
"actual_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": false,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "ELh5esCGqt7MjA",
"request_count_for_case": 1
},
{
"case_id": "V1121-B7-01",
"raw_question": "Есть ли основные средства, по которым параметры карточки выглядят так, будто амортизацию им задали не по логике объекта, а \"как получилось\"?",
"validation_passed": true,
"intent_match": true,
"route_match": true,
"causal_flags_match": true,
"expected_intent_class": "rule_based_account_control",
"actual_intent_class": "rule_based_account_control",
"expected_route_hint": "store_feature_risk",
"actual_route_hint": "store_feature_risk",
"expected_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false
},
"actual_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": false,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "JalgiJUrUL9DHW",
"request_count_for_case": 1
},
{
"case_id": "V1121-B7-02",
"raw_question": "Покажи объекты ОС, где риск не в сумме, а в том, что карточка и логика начисления выглядят подозрительно и могут аукнуться позже.",
"validation_passed": true,
"intent_match": false,
"route_match": true,
"causal_flags_match": false,
"expected_intent_class": "rule_based_account_control",
"actual_intent_class": "anomaly_probe",
"expected_route_hint": "store_feature_risk",
"actual_route_hint": "store_feature_risk",
"expected_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": false
},
"actual_requires": {
"needs_cross_entity_join": false,
"needs_causal_chain": true,
"needs_exact_object_trace": false,
"needs_ranking": false,
"needs_anomaly_summary": true,
"needs_runtime_truth": false,
"needs_period_cut": false,
"needs_evidence": false
},
"confidence_overall": "medium",
"trace_id": "0lNcvDtvYn5C63",
"request_count_for_case": 1
}
]
}