1272 lines
48 KiB
JSON
1272 lines
48 KiB
JSON
{
|
||
"run_id": "eval-h3k8TyTFuu",
|
||
"timestamp": "2026-03-23T16:30:36.413Z",
|
||
"mode": "single-pass-strict",
|
||
"use_mock": false,
|
||
"prompt_version": "normalizer_v1_1_2_1",
|
||
"dataset": {
|
||
"source": "file",
|
||
"file": "normalizer_eval_v1_1_2_1_30cases.json"
|
||
},
|
||
"cases_total": 30,
|
||
"metrics": {
|
||
"schema_validation_pass_rate": 100,
|
||
"intent_class_accuracy": 70,
|
||
"route_hint_accuracy": 80,
|
||
"causal_flag_accuracy": 60,
|
||
"high_confidence_error_rate": 3.33
|
||
},
|
||
"baseline_metrics": {
|
||
"schema_validation_pass_rate": 100,
|
||
"intent_class_accuracy": 72.73,
|
||
"route_hint_accuracy": 90.91,
|
||
"causal_flag_accuracy": 81.82,
|
||
"high_confidence_error_rate": 9.09
|
||
},
|
||
"baseline_delta": {
|
||
"schema_validation_pass_rate": 0,
|
||
"intent_class_accuracy": -2.73,
|
||
"route_hint_accuracy": -10.91,
|
||
"causal_flag_accuracy": -21.82,
|
||
"high_confidence_error_rate": -5.76
|
||
},
|
||
"class_accuracy": {
|
||
"cross_entity": {
|
||
"total": 14,
|
||
"passed": 14,
|
||
"accuracy_percent": 100
|
||
},
|
||
"anomaly_probe": {
|
||
"total": 6,
|
||
"passed": 3,
|
||
"accuracy_percent": 50
|
||
},
|
||
"heavy_analytical": {
|
||
"total": 5,
|
||
"passed": 1,
|
||
"accuracy_percent": 20
|
||
},
|
||
"rule_based_account_control": {
|
||
"total": 5,
|
||
"passed": 3,
|
||
"accuracy_percent": 60
|
||
}
|
||
},
|
||
"budget": {
|
||
"requests_total": 30,
|
||
"retries_used": 0,
|
||
"guidance": {
|
||
"forensic_calls_max": 10,
|
||
"final_eval_calls_max": 30,
|
||
"target_total_calls_max": 40,
|
||
"hard_cap_calls_max": 45
|
||
}
|
||
},
|
||
"mismatches": [
|
||
{
|
||
"case_id": "V1121-B1-06",
|
||
"expected_intent_class": "anomaly_probe",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "store_feature_risk",
|
||
"actual_route_hint": "store_feature_risk",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": true,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"comment": "Route chosen correctly, but intent_class drifted into a neighboring taxonomy bucket.",
|
||
"trace_id": "UKCM7zvsU6WUSG"
|
||
},
|
||
{
|
||
"case_id": "V1121-B2-01",
|
||
"expected_intent_class": "heavy_analytical",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "batch_refresh_then_store",
|
||
"actual_route_hint": "hybrid_store_plus_live",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": true,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": true,
|
||
"needs_evidence": false
|
||
},
|
||
"comment": "Both intent and route misclassified; likely lexical ambiguity in causal vs risk wording.",
|
||
"trace_id": "DjcpTNQM8KgQCi"
|
||
},
|
||
{
|
||
"case_id": "V1121-B2-03",
|
||
"expected_intent_class": "heavy_analytical",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "batch_refresh_then_store",
|
||
"actual_route_hint": "store_feature_risk",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": true,
|
||
"needs_anomaly_summary": false,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"comment": "Both intent and route misclassified; likely lexical ambiguity in causal vs risk wording.",
|
||
"trace_id": "_ghrU-zDCd7_58"
|
||
},
|
||
{
|
||
"case_id": "V1121-B2-06",
|
||
"expected_intent_class": "heavy_analytical",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "batch_refresh_then_store",
|
||
"actual_route_hint": "hybrid_store_plus_live",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": false,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"comment": "Both intent and route misclassified; likely lexical ambiguity in causal vs risk wording.",
|
||
"trace_id": "vfAte4AVljlOJD"
|
||
},
|
||
{
|
||
"case_id": "V1121-B3-01",
|
||
"expected_intent_class": "cross_entity",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "hybrid_store_plus_live",
|
||
"actual_route_hint": "store_feature_risk",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": true,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"comment": "Intent understood, but route_hint selected a weaker execution route.",
|
||
"trace_id": "DKB3P_fnbRPEQx"
|
||
},
|
||
{
|
||
"case_id": "V1121-B3-02",
|
||
"expected_intent_class": "cross_entity",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "hybrid_store_plus_live",
|
||
"actual_route_hint": "store_feature_risk",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": false,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"comment": "Intent understood, but route_hint selected a weaker execution route.",
|
||
"trace_id": "dG5iqslpfsCAs-"
|
||
},
|
||
{
|
||
"case_id": "V1121-B3-03",
|
||
"expected_intent_class": "anomaly_probe",
|
||
"actual_intent_class": "anomaly_probe",
|
||
"expected_route_hint": "store_feature_risk",
|
||
"actual_route_hint": "store_feature_risk",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": true,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"comment": "Causal flags are inconsistent with expected relationship depth.",
|
||
"trace_id": "R7Qmh0qYqvLtFM"
|
||
},
|
||
{
|
||
"case_id": "V1121-B3-05",
|
||
"expected_intent_class": "heavy_analytical",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "batch_refresh_then_store",
|
||
"actual_route_hint": "hybrid_store_plus_live",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": true,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"comment": "Both intent and route misclassified; likely lexical ambiguity in causal vs risk wording.",
|
||
"trace_id": "68el6FsqLTj0Wr"
|
||
},
|
||
{
|
||
"case_id": "V1121-B4-02",
|
||
"expected_intent_class": "anomaly_probe",
|
||
"actual_intent_class": "anomaly_probe",
|
||
"expected_route_hint": "store_feature_risk",
|
||
"actual_route_hint": "store_feature_risk",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": true,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"comment": "Causal flags are inconsistent with expected relationship depth.",
|
||
"trace_id": "OeCwt50KwUWf0j"
|
||
},
|
||
{
|
||
"case_id": "V1121-B5-01",
|
||
"expected_intent_class": "rule_based_account_control",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "store_feature_risk",
|
||
"actual_route_hint": "store_feature_risk",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": false,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"comment": "Route chosen correctly, but intent_class drifted into a neighboring taxonomy bucket.",
|
||
"trace_id": "6QkkgoRcMhK0Gi"
|
||
},
|
||
{
|
||
"case_id": "V1121-B5-02",
|
||
"expected_intent_class": "anomaly_probe",
|
||
"actual_intent_class": "anomaly_probe",
|
||
"expected_route_hint": "store_feature_risk",
|
||
"actual_route_hint": "store_feature_risk",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": true,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"comment": "Causal flags are inconsistent with expected relationship depth.",
|
||
"trace_id": "0lcF5KNdyHtHss"
|
||
},
|
||
{
|
||
"case_id": "V1121-B5-03",
|
||
"expected_intent_class": "anomaly_probe",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "store_feature_risk",
|
||
"actual_route_hint": "store_feature_risk",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": true,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"comment": "Route chosen correctly, but intent_class drifted into a neighboring taxonomy bucket.",
|
||
"trace_id": "dEMSXmj7nUFesp"
|
||
},
|
||
{
|
||
"case_id": "V1121-B6-02",
|
||
"expected_intent_class": "anomaly_probe",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "store_feature_risk",
|
||
"actual_route_hint": "store_feature_risk",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": true,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"comment": "Route chosen correctly, but intent_class drifted into a neighboring taxonomy bucket.",
|
||
"trace_id": "gZT6epMO1Vz9Yq"
|
||
},
|
||
{
|
||
"case_id": "V1121-B7-02",
|
||
"expected_intent_class": "rule_based_account_control",
|
||
"actual_intent_class": "anomaly_probe",
|
||
"expected_route_hint": "store_feature_risk",
|
||
"actual_route_hint": "store_feature_risk",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": true,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"comment": "Route chosen correctly, but intent_class drifted into a neighboring taxonomy bucket.",
|
||
"trace_id": "0lNcvDtvYn5C63"
|
||
}
|
||
],
|
||
"bad_confidence_cases": [
|
||
{
|
||
"case_id": "V1121-B2-06",
|
||
"confidence_overall": "high",
|
||
"intent_match": false,
|
||
"route_match": false,
|
||
"causal_match": false,
|
||
"trace_id": "vfAte4AVljlOJD"
|
||
}
|
||
],
|
||
"results": [
|
||
{
|
||
"case_id": "V1121-B1-01",
|
||
"raw_question": "По каким поставщикам у нас на конец месяца остались хвосты, которые уже не похожи на обычную задержку документов, а выглядят как реальная проблема в цепочке?",
|
||
"validation_passed": true,
|
||
"intent_match": true,
|
||
"route_match": true,
|
||
"causal_flags_match": true,
|
||
"expected_intent_class": "cross_entity",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "hybrid_store_plus_live",
|
||
"actual_route_hint": "hybrid_store_plus_live",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": false,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "7L4ARgMdcABam4",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B1-02",
|
||
"raw_question": "Где по покупателям у нас висит история \"отгрузили - денег нет - закрытия нет\", и по каким контрагентам это уже требует ручной проверки?",
|
||
"validation_passed": true,
|
||
"intent_match": true,
|
||
"route_match": true,
|
||
"causal_flags_match": true,
|
||
"expected_intent_class": "cross_entity",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "hybrid_store_plus_live",
|
||
"actual_route_hint": "hybrid_store_plus_live",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": true,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "fzUNOFmwmU2v_A",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B1-03",
|
||
"raw_question": "Покажи контрагентов, по которым сальдо у нас, скорее всего, не совпадет с их актом сверки, если его запросить прямо сейчас.",
|
||
"validation_passed": true,
|
||
"intent_match": true,
|
||
"route_match": true,
|
||
"causal_flags_match": true,
|
||
"expected_intent_class": "cross_entity",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "hybrid_store_plus_live",
|
||
"actual_route_hint": "hybrid_store_plus_live",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": false,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "clytLLPPo6L-ZM",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B1-04",
|
||
"raw_question": "Где у нас есть оплаты, но не хватает документов, которые должны были закрыть взаиморасчеты?",
|
||
"validation_passed": true,
|
||
"intent_match": true,
|
||
"route_match": true,
|
||
"causal_flags_match": true,
|
||
"expected_intent_class": "cross_entity",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "hybrid_store_plus_live",
|
||
"actual_route_hint": "hybrid_store_plus_live",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": false,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "2EPBctOTQBi61R",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B1-05",
|
||
"raw_question": "По каким контрагентам, наоборот, документы есть, а нормального закрытия оплатами не видно?",
|
||
"validation_passed": true,
|
||
"intent_match": true,
|
||
"route_match": true,
|
||
"causal_flags_match": true,
|
||
"expected_intent_class": "cross_entity",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "hybrid_store_plus_live",
|
||
"actual_route_hint": "hybrid_store_plus_live",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": false,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "pH8V994Y8zzGru",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B1-06",
|
||
"raw_question": "Есть ли такие зависшие авансы, которые уже давно надо было либо закрыть, либо хотя бы перепроверить руками?",
|
||
"validation_passed": true,
|
||
"intent_match": false,
|
||
"route_match": true,
|
||
"causal_flags_match": false,
|
||
"expected_intent_class": "anomaly_probe",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "store_feature_risk",
|
||
"actual_route_hint": "store_feature_risk",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": true,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "UKCM7zvsU6WUSG",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B2-01",
|
||
"raw_question": "Какие реализации на конец периода выглядят так, будто они зависли и будут портить картину по выручке, если их не проверить заранее?",
|
||
"validation_passed": true,
|
||
"intent_match": false,
|
||
"route_match": false,
|
||
"causal_flags_match": false,
|
||
"expected_intent_class": "heavy_analytical",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "batch_refresh_then_store",
|
||
"actual_route_hint": "hybrid_store_plus_live",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": true,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": true,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "DjcpTNQM8KgQCi",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B2-02",
|
||
"raw_question": "По каким отгрузкам видно, что проблема не просто в том, что клиент не оплатил, а в том, что сама связка документов собрана криво?",
|
||
"validation_passed": true,
|
||
"intent_match": true,
|
||
"route_match": true,
|
||
"causal_flags_match": true,
|
||
"expected_intent_class": "cross_entity",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "hybrid_store_plus_live",
|
||
"actual_route_hint": "hybrid_store_plus_live",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": false,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "b8JTHbxO6YuBij",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B2-03",
|
||
"raw_question": "Покажи реализации, где хвост выглядит особенно неприятно: сумма не маленькая, возраст хвоста уже заметный, и при этом не видно нормального завершения цепочки.",
|
||
"validation_passed": true,
|
||
"intent_match": false,
|
||
"route_match": false,
|
||
"causal_flags_match": false,
|
||
"expected_intent_class": "heavy_analytical",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "batch_refresh_then_store",
|
||
"actual_route_hint": "store_feature_risk",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": true,
|
||
"needs_anomaly_summary": false,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "_ghrU-zDCd7_58",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B2-04",
|
||
"raw_question": "Где по 90/62 история похожа на \"вроде все проведено, но если копнуть, закрытие держится на кривой связке\"?",
|
||
"validation_passed": true,
|
||
"intent_match": true,
|
||
"route_match": true,
|
||
"causal_flags_match": true,
|
||
"expected_intent_class": "cross_entity",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "hybrid_store_plus_live",
|
||
"actual_route_hint": "hybrid_store_plus_live",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": false,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "SBMs3pLfp4FCdz",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B2-05",
|
||
"raw_question": "Есть ли случаи, где реализация попала в период, а подтверждающие документы или оплата до сих пор живут в какой-то полуразобранной логике?",
|
||
"validation_passed": true,
|
||
"intent_match": true,
|
||
"route_match": true,
|
||
"causal_flags_match": true,
|
||
"expected_intent_class": "cross_entity",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "hybrid_store_plus_live",
|
||
"actual_route_hint": "hybrid_store_plus_live",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": false,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "elTaYImxTBzijg",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B2-06",
|
||
"raw_question": "По каким продажам на конец месяца видно, что бухгалтер потом будет долго распутывать, почему все это не сошлось нормально?",
|
||
"validation_passed": true,
|
||
"intent_match": false,
|
||
"route_match": false,
|
||
"causal_flags_match": false,
|
||
"expected_intent_class": "heavy_analytical",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "batch_refresh_then_store",
|
||
"actual_route_hint": "hybrid_store_plus_live",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": false,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "high",
|
||
"trace_id": "vfAte4AVljlOJD",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B3-01",
|
||
"raw_question": "Какие банковские движения выглядят так, будто выписка есть, а нормального отражения в учете под ней не хватает?",
|
||
"validation_passed": true,
|
||
"intent_match": true,
|
||
"route_match": false,
|
||
"causal_flags_match": true,
|
||
"expected_intent_class": "cross_entity",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "hybrid_store_plus_live",
|
||
"actual_route_hint": "store_feature_risk",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": true,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "DKB3P_fnbRPEQx",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B3-02",
|
||
"raw_question": "Где по банку можно заподозрить, что документ и проводка вроде есть, но логика операции все равно не собрана в нормальную цепочку?",
|
||
"validation_passed": true,
|
||
"intent_match": true,
|
||
"route_match": false,
|
||
"causal_flags_match": true,
|
||
"expected_intent_class": "cross_entity",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "hybrid_store_plus_live",
|
||
"actual_route_hint": "store_feature_risk",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": false,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "dG5iqslpfsCAs-",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B3-03",
|
||
"raw_question": "Есть ли движения по счету 51, которые выглядят корректно по сумме, но по смыслу оставляют после себя подозрительный хвост?",
|
||
"validation_passed": true,
|
||
"intent_match": true,
|
||
"route_match": true,
|
||
"causal_flags_match": false,
|
||
"expected_intent_class": "anomaly_probe",
|
||
"actual_intent_class": "anomaly_probe",
|
||
"expected_route_hint": "store_feature_risk",
|
||
"actual_route_hint": "store_feature_risk",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": true,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "R7Qmh0qYqvLtFM",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B3-04",
|
||
"raw_question": "Покажи банковские кейсы, где, скорее всего, проблема не в платеже как таковом, а в том, что он не туда лег или не тем документом закрылся.",
|
||
"validation_passed": true,
|
||
"intent_match": true,
|
||
"route_match": true,
|
||
"causal_flags_match": true,
|
||
"expected_intent_class": "cross_entity",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "hybrid_store_plus_live",
|
||
"actual_route_hint": "hybrid_store_plus_live",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": false,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "ZcOCCDGyPv7OXQ",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B3-05",
|
||
"raw_question": "Где банк и бухгалтерский контур, скорее всего, расходятся не по одной строке, а по паттерну, который уже начинает повторяться?",
|
||
"validation_passed": true,
|
||
"intent_match": false,
|
||
"route_match": false,
|
||
"causal_flags_match": false,
|
||
"expected_intent_class": "heavy_analytical",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "batch_refresh_then_store",
|
||
"actual_route_hint": "hybrid_store_plus_live",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": true,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "68el6FsqLTj0Wr",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B4-01",
|
||
"raw_question": "Какие товарные позиции выглядят так, будто их уже продавали, а нормального прихода под них в базе не видно?",
|
||
"validation_passed": true,
|
||
"intent_match": true,
|
||
"route_match": true,
|
||
"causal_flags_match": true,
|
||
"expected_intent_class": "cross_entity",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "hybrid_store_plus_live",
|
||
"actual_route_hint": "hybrid_store_plus_live",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": false,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "QjumNdr-CETXfz",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B4-02",
|
||
"raw_question": "Где по товарам у нас отрицательные или подозрительные остатки, которые, скорее всего, связаны не с жизнью, а с ошибкой в учете?",
|
||
"validation_passed": true,
|
||
"intent_match": true,
|
||
"route_match": true,
|
||
"causal_flags_match": false,
|
||
"expected_intent_class": "anomaly_probe",
|
||
"actual_intent_class": "anomaly_probe",
|
||
"expected_route_hint": "store_feature_risk",
|
||
"actual_route_hint": "store_feature_risk",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": true,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "OeCwt50KwUWf0j",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B4-03",
|
||
"raw_question": "Есть ли случаи, где приход и реализация вроде есть оба, но даты между ними выглядят так, будто кто-то завел документы задним числом или с ошибкой?",
|
||
"validation_passed": true,
|
||
"intent_match": true,
|
||
"route_match": true,
|
||
"causal_flags_match": true,
|
||
"expected_intent_class": "cross_entity",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "hybrid_store_plus_live",
|
||
"actual_route_hint": "hybrid_store_plus_live",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": false,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "57yG2XnBCG74ZO",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B4-04",
|
||
"raw_question": "Покажи товарные хвосты, которые сильнее всего искажают картину периода и требуют проверки до закрытия месяца.",
|
||
"validation_passed": true,
|
||
"intent_match": true,
|
||
"route_match": true,
|
||
"causal_flags_match": true,
|
||
"expected_intent_class": "heavy_analytical",
|
||
"actual_intent_class": "heavy_analytical",
|
||
"expected_route_hint": "batch_refresh_then_store",
|
||
"actual_route_hint": "batch_refresh_then_store",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": true,
|
||
"needs_anomaly_summary": true,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": true,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "AvpSzYOujxuRJZ",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B4-05",
|
||
"raw_question": "Где по складу и реализации видно, что себестоимость продажи подтверждена слабо или вообще опирается на кривую цепочку?",
|
||
"validation_passed": true,
|
||
"intent_match": true,
|
||
"route_match": true,
|
||
"causal_flags_match": true,
|
||
"expected_intent_class": "cross_entity",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "hybrid_store_plus_live",
|
||
"actual_route_hint": "hybrid_store_plus_live",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": false,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "n7E9BbeQif1ag5",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B5-01",
|
||
"raw_question": "Что сейчас лежит на 10 счете так, будто это уже давно надо было либо списать, либо хотя бы проверить, почему оно до сих пор висит?",
|
||
"validation_passed": true,
|
||
"intent_match": false,
|
||
"route_match": true,
|
||
"causal_flags_match": false,
|
||
"expected_intent_class": "rule_based_account_control",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "store_feature_risk",
|
||
"actual_route_hint": "store_feature_risk",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": false,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "6QkkgoRcMhK0Gi",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B5-02",
|
||
"raw_question": "Есть ли материалы, по которым остаток выглядит нелогично: движения были, хозяйственная логика слабая, а в учете все еще что-то торчит?",
|
||
"validation_passed": true,
|
||
"intent_match": true,
|
||
"route_match": true,
|
||
"causal_flags_match": false,
|
||
"expected_intent_class": "anomaly_probe",
|
||
"actual_intent_class": "anomaly_probe",
|
||
"expected_route_hint": "store_feature_risk",
|
||
"actual_route_hint": "store_feature_risk",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": true,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "0lcF5KNdyHtHss",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B5-03",
|
||
"raw_question": "Покажи позиции по материалам, где возможен эффект \"вроде сумма не огромная, но учетная логика выглядит криво\".",
|
||
"validation_passed": true,
|
||
"intent_match": false,
|
||
"route_match": true,
|
||
"causal_flags_match": false,
|
||
"expected_intent_class": "anomaly_probe",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "store_feature_risk",
|
||
"actual_route_hint": "store_feature_risk",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": true,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "dEMSXmj7nUFesp",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B6-01",
|
||
"raw_question": "Какие записи на 97 счете больше всего похожи на ошибку в датах начала, конца или самом сроке списания?",
|
||
"validation_passed": true,
|
||
"intent_match": true,
|
||
"route_match": true,
|
||
"causal_flags_match": true,
|
||
"expected_intent_class": "rule_based_account_control",
|
||
"actual_intent_class": "rule_based_account_control",
|
||
"expected_route_hint": "store_feature_risk",
|
||
"actual_route_hint": "store_feature_risk",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": false,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "CWLmVHuj0_EbgR",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B6-02",
|
||
"raw_question": "Есть ли такие расходы будущих периодов, которые заведены, но по ним не видно нормальной ежемесячной жизни, как будто запись повисла сама по себе?",
|
||
"validation_passed": true,
|
||
"intent_match": false,
|
||
"route_match": true,
|
||
"causal_flags_match": false,
|
||
"expected_intent_class": "anomaly_probe",
|
||
"actual_intent_class": "cross_entity",
|
||
"expected_route_hint": "store_feature_risk",
|
||
"actual_route_hint": "store_feature_risk",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": true,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": true,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "gZT6epMO1Vz9Yq",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B6-03",
|
||
"raw_question": "Покажи кейсы по 97 счету, где срок документа и срок списания визуально противоречат друг другу.",
|
||
"validation_passed": true,
|
||
"intent_match": true,
|
||
"route_match": true,
|
||
"causal_flags_match": true,
|
||
"expected_intent_class": "rule_based_account_control",
|
||
"actual_intent_class": "rule_based_account_control",
|
||
"expected_route_hint": "store_feature_risk",
|
||
"actual_route_hint": "store_feature_risk",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": false,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "ELh5esCGqt7MjA",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B7-01",
|
||
"raw_question": "Есть ли основные средства, по которым параметры карточки выглядят так, будто амортизацию им задали не по логике объекта, а \"как получилось\"?",
|
||
"validation_passed": true,
|
||
"intent_match": true,
|
||
"route_match": true,
|
||
"causal_flags_match": true,
|
||
"expected_intent_class": "rule_based_account_control",
|
||
"actual_intent_class": "rule_based_account_control",
|
||
"expected_route_hint": "store_feature_risk",
|
||
"actual_route_hint": "store_feature_risk",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": false,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "JalgiJUrUL9DHW",
|
||
"request_count_for_case": 1
|
||
},
|
||
{
|
||
"case_id": "V1121-B7-02",
|
||
"raw_question": "Покажи объекты ОС, где риск не в сумме, а в том, что карточка и логика начисления выглядят подозрительно и могут аукнуться позже.",
|
||
"validation_passed": true,
|
||
"intent_match": false,
|
||
"route_match": true,
|
||
"causal_flags_match": false,
|
||
"expected_intent_class": "rule_based_account_control",
|
||
"actual_intent_class": "anomaly_probe",
|
||
"expected_route_hint": "store_feature_risk",
|
||
"actual_route_hint": "store_feature_risk",
|
||
"expected_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": false
|
||
},
|
||
"actual_requires": {
|
||
"needs_cross_entity_join": false,
|
||
"needs_causal_chain": true,
|
||
"needs_exact_object_trace": false,
|
||
"needs_ranking": false,
|
||
"needs_anomaly_summary": true,
|
||
"needs_runtime_truth": false,
|
||
"needs_period_cut": false,
|
||
"needs_evidence": false
|
||
},
|
||
"confidence_overall": "medium",
|
||
"trace_id": "0lNcvDtvYn5C63",
|
||
"request_count_for_case": 1
|
||
}
|
||
]
|
||
}
|