From d323dcd509a3e190f29d0904bfa4dbd7b1930c4f Mon Sep 17 00:00:00 2001 From: dctouch Date: Tue, 21 Apr 2026 19:10:29 +0300 Subject: [PATCH] =?UTF-8?q?ARCH:=20=D1=80=D0=B0=D0=B7=D1=80=D0=B5=D1=88?= =?UTF-8?q?=D0=B8=D1=82=D1=8C=20net-flow=20discovery=20=D0=BF=D0=B5=D1=80?= =?UTF-8?q?=D0=B5=D0=BE=D0=BF=D1=80=D0=B5=D0=B4=D0=B5=D0=BB=D1=8F=D1=82?= =?UTF-8?q?=D1=8C=20stale=20lifecycle=20carryover?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...phase21_net_followup_after_broad_eval.json | 71 +++++++++++ .../assistantMcpDiscoveryResponsePolicy.js | 94 ++++++++++++--- .../assistantMcpDiscoveryResponsePolicy.ts | 114 +++++++++++++++--- ...ssistantMcpDiscoveryResponsePolicy.test.ts | 66 +++++++++- 4 files changed, 304 insertions(+), 41 deletions(-) create mode 100644 docs/orchestration/address_truth_harness_phase21_net_followup_after_broad_eval.json diff --git a/docs/orchestration/address_truth_harness_phase21_net_followup_after_broad_eval.json b/docs/orchestration/address_truth_harness_phase21_net_followup_after_broad_eval.json new file mode 100644 index 0000000..9fbb0cf --- /dev/null +++ b/docs/orchestration/address_truth_harness_phase21_net_followup_after_broad_eval.json @@ -0,0 +1,71 @@ +{ + "schema_version": "domain_truth_harness_spec_v1", + "scenario_id": "address_truth_harness_phase21_net_followup_after_broad_eval", + "domain": "address_phase21_net_followup_after_broad_eval", + "title": "Phase 21 net-flow follow-up after broad evaluation replay", + "description": "Targeted AGENT replay for the assistant-stage1-LpuYsX0SRP regression where a net cash-flow question about Группа СВК inside an existing dialogue chain was wrongly kept on the counterparty lifecycle contour instead of applying the guarded MCP discovery answer.", + "bindings": {}, + "steps": [ + { + "step_id": "step_01_company_activity_lifecycle", + "title": "Activity lifecycle answer seeds broad counterparty context", + "question": "а по Альтернативе Плюс сколько лет активности в базе 1С?", + "allowed_reply_types": [ + "partial_coverage", + "factual", + "factual_with_explanation" + ], + "required_answer_patterns_any": [ + "(?i)лет", + "(?i)активност", + "(?i)1с", + "(?i)не получил|не подтвержден|проверил доступный контур" + ], + "criticality": "critical", + "semantic_tags": [ + "company_activity_lifecycle", + "context_seed" + ] + }, + { + "step_id": "step_02_broad_company_evaluation", + "title": "Broad evaluation sits between lifecycle and net-flow question", + "question": "Как ты оценишь деятельность компании?", + "required_answer_patterns_any": [ + "(?i)активн", + "(?i)заказчик|контрагент|деятельност|оценк" + ], + "criticality": "warning", + "semantic_tags": [ + "broad_evaluation_bridge" + ] + }, + { + "step_id": "step_03_net_flow_after_broad_eval", + "title": "Net-flow follow-up overrides stale lifecycle carryover and answers with inflow outflow and net", + "question": "какое нетто по деньгам с Группа СВК за 2020 год: сколько получили и сколько заплатили?", + "allowed_reply_types": [ + "partial_coverage", + "factual_with_explanation" + ], + "required_answer_patterns_all": [ + "(?i)свк", + "(?i)получил|входящ|поступ", + "(?i)заплат|исходящ|списан|плат[её]ж", + "(?i)нетто|сальдо|разниц", + "(?i)2020|период", + "(?i)руб" + ], + "forbidden_answer_patterns": [ + "(?i)активных заказчиков", + "(?i)лет в базе", + "(?i)последняя активность" + ], + "criticality": "critical", + "semantic_tags": [ + "counterparty_net_cash_flow", + "stale_lifecycle_override" + ] + } + ] +} diff --git a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryResponsePolicy.js b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryResponsePolicy.js index df26456..a017356 100644 --- a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryResponsePolicy.js +++ b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryResponsePolicy.js @@ -89,6 +89,51 @@ function isDiscoveryReadyAddressCandidate(input, entryPoint) { turnInput?.should_run_discovery === true && (source === "address_lane" || source === "address_exact" || source === "address_query_runtime_v1")); } +function isDetectedIntentAlignedWithTurnMeaning(detectedIntent, turnMeaning) { + const normalizedIntent = String(detectedIntent ?? "").trim().toLowerCase(); + if (!normalizedIntent) { + return false; + } + const askedDomain = String(toNonEmptyString(turnMeaning?.asked_domain_family) ?? "").trim().toLowerCase(); + const askedAction = String(toNonEmptyString(turnMeaning?.asked_action_family) ?? "").trim().toLowerCase(); + if (normalizedIntent === "counterparty_activity_lifecycle") { + return (askedDomain === "counterparty_lifecycle" || + askedAction === "activity_duration" || + askedAction === "age_or_activity_duration"); + } + if (normalizedIntent === "supplier_payouts_profile") { + return askedDomain === "counterparty_value" && askedAction === "payout"; + } + if (normalizedIntent === "customer_revenue_and_payments") { + return askedDomain === "counterparty_value" && (askedAction === "turnover" || askedAction === "counterparty_value_or_turnover"); + } + if (normalizedIntent === "receivables_confirmed_as_of_date") { + return askedDomain === "receivables" || askedAction === "confirmed_snapshot"; + } + if (normalizedIntent === "payables_confirmed_as_of_date") { + return askedDomain === "payables" || askedAction === "confirmed_snapshot"; + } + if (normalizedIntent === "vat_liability_confirmed_for_tax_period") { + return askedDomain === "vat" && askedAction === "confirmed_tax_period"; + } + if (normalizedIntent === "vat_payable_confirmed_as_of_date") { + return askedDomain === "vat" && askedAction === "confirmed_snapshot"; + } + if (normalizedIntent === "vat_payable_forecast") { + return askedDomain === "vat" && askedAction === "forecast"; + } + if (normalizedIntent === "list_documents_by_counterparty") { + return askedAction === "list_documents" || askedDomain === "counterparty_documents" || askedDomain === "counterparty"; + } + if (normalizedIntent === "inventory_on_hand_as_of_date" || normalizedIntent === "inventory_aging_by_purchase_date") { + return askedDomain === "inventory" && askedAction === "confirmed_snapshot"; + } + return false; +} +function readDiscoveryTurnMeaning(entryPoint) { + const turnInput = toRecordObject(entryPoint?.turn_input); + return toRecordObject(turnInput?.turn_meaning_ref); +} function hasAlignedFactualAddressReply(input, entryPoint) { if (!isDiscoveryReadyAddressCandidate(input, entryPoint)) { return false; @@ -97,27 +142,35 @@ function hasAlignedFactualAddressReply(input, entryPoint) { return false; } const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent); - const turnInput = toRecordObject(entryPoint?.turn_input); - const turnMeaning = toRecordObject(turnInput?.turn_meaning_ref); - const askedDomain = toNonEmptyString(turnMeaning?.asked_domain_family); - const askedAction = toNonEmptyString(turnMeaning?.asked_action_family); - if (detectedIntent === "counterparty_activity_lifecycle") { - return askedDomain === "counterparty_lifecycle" || askedAction === "activity_duration"; - } - if (detectedIntent === "supplier_payouts_profile") { - return askedDomain === "counterparty_value" && askedAction === "payout"; - } - if (detectedIntent === "customer_revenue_and_payments") { - return askedDomain === "counterparty_value" && askedAction === "turnover"; - } - return false; + return isDetectedIntentAlignedWithTurnMeaning(detectedIntent, readDiscoveryTurnMeaning(entryPoint)); } -function hasMatchedFactualAddressContinuationTarget(input) { +function hasSemanticConflictWithDiscoveryTurnMeaning(input, entryPoint) { + if (!isDiscoveryReadyAddressCandidate(input, entryPoint)) { + return false; + } if (toNonEmptyString(input.currentReplyType) !== "factual") { return false; } const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent); - const dialogContinuationContract = toRecordObject(input.addressRuntimeMeta?.dialogContinuationContract); + const turnMeaning = readDiscoveryTurnMeaning(entryPoint); + const askedDomain = toNonEmptyString(turnMeaning?.asked_domain_family); + const askedAction = toNonEmptyString(turnMeaning?.asked_action_family); + const unsupportedFamily = toNonEmptyString(turnMeaning?.unsupported_but_understood_family); + if (!detectedIntent || (!askedDomain && !askedAction && !unsupportedFamily)) { + return false; + } + return !isDetectedIntentAlignedWithTurnMeaning(detectedIntent, turnMeaning); +} +function hasMatchedFactualAddressContinuationTarget(input, entryPoint) { + if (toNonEmptyString(input.currentReplyType) !== "factual") { + return false; + } + if (hasSemanticConflictWithDiscoveryTurnMeaning(input, entryPoint)) { + return false; + } + const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent); + const dialogContinuationContract = toRecordObject(input.addressRuntimeMeta?.dialogContinuationContract) ?? + toRecordObject(input.addressRuntimeMeta?.dialog_continuation_contract_v2); const targetIntent = toNonEmptyString(dialogContinuationContract?.target_intent); return Boolean(detectedIntent && targetIntent && detectedIntent === targetIntent); } @@ -128,6 +181,9 @@ function hasFullConfirmedFactualAddressReply(input, entryPoint) { if (toNonEmptyString(input.currentReplyType) !== "factual") { return false; } + if (hasSemanticConflictWithDiscoveryTurnMeaning(input, entryPoint)) { + return false; + } const truthGateStatus = toNonEmptyString(input.addressRuntimeMeta?.truth_gate_contract_status); if (truthGateStatus === "full_confirmed") { return true; @@ -150,7 +206,8 @@ function applyAssistantMcpDiscoveryResponsePolicy(input) { const discoveryReadyDeepCandidate = isDiscoveryReadyDeepCandidate(input, entryPoint); const discoveryReadyAddressCandidate = isDiscoveryReadyAddressCandidate(input, entryPoint); const alignedFactualAddressReply = hasAlignedFactualAddressReply(input, entryPoint); - const matchedFactualAddressContinuationTarget = hasMatchedFactualAddressContinuationTarget(input); + const semanticConflictWithDiscoveryTurnMeaning = hasSemanticConflictWithDiscoveryTurnMeaning(input, entryPoint); + const matchedFactualAddressContinuationTarget = hasMatchedFactualAddressContinuationTarget(input, entryPoint); const fullConfirmedFactualAddressReply = hasFullConfirmedFactualAddressReply(input, entryPoint); if (!entryPoint) { pushReason(reasonCodes, "mcp_discovery_response_policy_no_entry_point"); @@ -170,6 +227,9 @@ function applyAssistantMcpDiscoveryResponsePolicy(input) { if (alignedFactualAddressReply) { pushReason(reasonCodes, "mcp_discovery_response_policy_keep_aligned_factual_address_reply"); } + if (semanticConflictWithDiscoveryTurnMeaning) { + pushReason(reasonCodes, "mcp_discovery_response_policy_semantic_conflict_allows_candidate_override"); + } if (matchedFactualAddressContinuationTarget) { pushReason(reasonCodes, "mcp_discovery_response_policy_keep_factual_address_continuation_target"); } diff --git a/llm_normalizer/backend/src/services/assistantMcpDiscoveryResponsePolicy.ts b/llm_normalizer/backend/src/services/assistantMcpDiscoveryResponsePolicy.ts index e1410b6..9b9e656 100644 --- a/llm_normalizer/backend/src/services/assistantMcpDiscoveryResponsePolicy.ts +++ b/llm_normalizer/backend/src/services/assistantMcpDiscoveryResponsePolicy.ts @@ -152,6 +152,61 @@ function isDiscoveryReadyAddressCandidate( ); } +function isDetectedIntentAlignedWithTurnMeaning( + detectedIntent: string | null, + turnMeaning: Record | null +): boolean { + const normalizedIntent = String(detectedIntent ?? "").trim().toLowerCase(); + if (!normalizedIntent) { + return false; + } + const askedDomain = String(toNonEmptyString(turnMeaning?.asked_domain_family) ?? "").trim().toLowerCase(); + const askedAction = String(toNonEmptyString(turnMeaning?.asked_action_family) ?? "").trim().toLowerCase(); + + if (normalizedIntent === "counterparty_activity_lifecycle") { + return ( + askedDomain === "counterparty_lifecycle" || + askedAction === "activity_duration" || + askedAction === "age_or_activity_duration" + ); + } + if (normalizedIntent === "supplier_payouts_profile") { + return askedDomain === "counterparty_value" && askedAction === "payout"; + } + if (normalizedIntent === "customer_revenue_and_payments") { + return askedDomain === "counterparty_value" && (askedAction === "turnover" || askedAction === "counterparty_value_or_turnover"); + } + if (normalizedIntent === "receivables_confirmed_as_of_date") { + return askedDomain === "receivables" || askedAction === "confirmed_snapshot"; + } + if (normalizedIntent === "payables_confirmed_as_of_date") { + return askedDomain === "payables" || askedAction === "confirmed_snapshot"; + } + if (normalizedIntent === "vat_liability_confirmed_for_tax_period") { + return askedDomain === "vat" && askedAction === "confirmed_tax_period"; + } + if (normalizedIntent === "vat_payable_confirmed_as_of_date") { + return askedDomain === "vat" && askedAction === "confirmed_snapshot"; + } + if (normalizedIntent === "vat_payable_forecast") { + return askedDomain === "vat" && askedAction === "forecast"; + } + if (normalizedIntent === "list_documents_by_counterparty") { + return askedAction === "list_documents" || askedDomain === "counterparty_documents" || askedDomain === "counterparty"; + } + if (normalizedIntent === "inventory_on_hand_as_of_date" || normalizedIntent === "inventory_aging_by_purchase_date") { + return askedDomain === "inventory" && askedAction === "confirmed_snapshot"; + } + return false; +} + +function readDiscoveryTurnMeaning( + entryPoint: AssistantMcpDiscoveryRuntimeEntryPointContract | null +): Record | null { + const turnInput = toRecordObject(entryPoint?.turn_input); + return toRecordObject(turnInput?.turn_meaning_ref); +} + function hasAlignedFactualAddressReply( input: ApplyAssistantMcpDiscoveryResponsePolicyInput, entryPoint: AssistantMcpDiscoveryRuntimeEntryPointContract | null @@ -162,33 +217,45 @@ function hasAlignedFactualAddressReply( if (toNonEmptyString(input.currentReplyType) !== "factual") { return false; } - const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent); - const turnInput = toRecordObject(entryPoint?.turn_input); - const turnMeaning = toRecordObject(turnInput?.turn_meaning_ref); - const askedDomain = toNonEmptyString(turnMeaning?.asked_domain_family); - const askedAction = toNonEmptyString(turnMeaning?.asked_action_family); - - if (detectedIntent === "counterparty_activity_lifecycle") { - return askedDomain === "counterparty_lifecycle" || askedAction === "activity_duration"; - } - if (detectedIntent === "supplier_payouts_profile") { - return askedDomain === "counterparty_value" && askedAction === "payout"; - } - if (detectedIntent === "customer_revenue_and_payments") { - return askedDomain === "counterparty_value" && askedAction === "turnover"; - } - return false; + return isDetectedIntentAlignedWithTurnMeaning(detectedIntent, readDiscoveryTurnMeaning(entryPoint)); } -function hasMatchedFactualAddressContinuationTarget( - input: ApplyAssistantMcpDiscoveryResponsePolicyInput +function hasSemanticConflictWithDiscoveryTurnMeaning( + input: ApplyAssistantMcpDiscoveryResponsePolicyInput, + entryPoint: AssistantMcpDiscoveryRuntimeEntryPointContract | null ): boolean { + if (!isDiscoveryReadyAddressCandidate(input, entryPoint)) { + return false; + } if (toNonEmptyString(input.currentReplyType) !== "factual") { return false; } const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent); - const dialogContinuationContract = toRecordObject(input.addressRuntimeMeta?.dialogContinuationContract); + const turnMeaning = readDiscoveryTurnMeaning(entryPoint); + const askedDomain = toNonEmptyString(turnMeaning?.asked_domain_family); + const askedAction = toNonEmptyString(turnMeaning?.asked_action_family); + const unsupportedFamily = toNonEmptyString(turnMeaning?.unsupported_but_understood_family); + if (!detectedIntent || (!askedDomain && !askedAction && !unsupportedFamily)) { + return false; + } + return !isDetectedIntentAlignedWithTurnMeaning(detectedIntent, turnMeaning); +} + +function hasMatchedFactualAddressContinuationTarget( + input: ApplyAssistantMcpDiscoveryResponsePolicyInput, + entryPoint: AssistantMcpDiscoveryRuntimeEntryPointContract | null +): boolean { + if (toNonEmptyString(input.currentReplyType) !== "factual") { + return false; + } + if (hasSemanticConflictWithDiscoveryTurnMeaning(input, entryPoint)) { + return false; + } + const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent); + const dialogContinuationContract = + toRecordObject(input.addressRuntimeMeta?.dialogContinuationContract) ?? + toRecordObject(input.addressRuntimeMeta?.dialog_continuation_contract_v2); const targetIntent = toNonEmptyString(dialogContinuationContract?.target_intent); return Boolean(detectedIntent && targetIntent && detectedIntent === targetIntent); } @@ -203,6 +270,9 @@ function hasFullConfirmedFactualAddressReply( if (toNonEmptyString(input.currentReplyType) !== "factual") { return false; } + if (hasSemanticConflictWithDiscoveryTurnMeaning(input, entryPoint)) { + return false; + } const truthGateStatus = toNonEmptyString(input.addressRuntimeMeta?.truth_gate_contract_status); if (truthGateStatus === "full_confirmed") { return true; @@ -229,7 +299,8 @@ export function applyAssistantMcpDiscoveryResponsePolicy( const discoveryReadyDeepCandidate = isDiscoveryReadyDeepCandidate(input, entryPoint); const discoveryReadyAddressCandidate = isDiscoveryReadyAddressCandidate(input, entryPoint); const alignedFactualAddressReply = hasAlignedFactualAddressReply(input, entryPoint); - const matchedFactualAddressContinuationTarget = hasMatchedFactualAddressContinuationTarget(input); + const semanticConflictWithDiscoveryTurnMeaning = hasSemanticConflictWithDiscoveryTurnMeaning(input, entryPoint); + const matchedFactualAddressContinuationTarget = hasMatchedFactualAddressContinuationTarget(input, entryPoint); const fullConfirmedFactualAddressReply = hasFullConfirmedFactualAddressReply(input, entryPoint); if (!entryPoint) { @@ -250,6 +321,9 @@ export function applyAssistantMcpDiscoveryResponsePolicy( if (alignedFactualAddressReply) { pushReason(reasonCodes, "mcp_discovery_response_policy_keep_aligned_factual_address_reply"); } + if (semanticConflictWithDiscoveryTurnMeaning) { + pushReason(reasonCodes, "mcp_discovery_response_policy_semantic_conflict_allows_candidate_override"); + } if (matchedFactualAddressContinuationTarget) { pushReason(reasonCodes, "mcp_discovery_response_policy_keep_factual_address_continuation_target"); } diff --git a/llm_normalizer/backend/tests/assistantMcpDiscoveryResponsePolicy.test.ts b/llm_normalizer/backend/tests/assistantMcpDiscoveryResponsePolicy.test.ts index 8dfecd3..cdaa1bb 100644 --- a/llm_normalizer/backend/tests/assistantMcpDiscoveryResponsePolicy.test.ts +++ b/llm_normalizer/backend/tests/assistantMcpDiscoveryResponsePolicy.test.ts @@ -176,8 +176,8 @@ describe("assistant MCP discovery response policy", () => { adapter_status: "ready", should_run_discovery: true, turn_meaning_ref: { - asked_domain_family: "counterparty_lifecycle", - asked_action_family: "activity_duration" + asked_domain_family: "counterparty_value", + asked_action_family: "turnover" } } }) @@ -209,8 +209,8 @@ describe("assistant MCP discovery response policy", () => { adapter_status: "ready", should_run_discovery: true, turn_meaning_ref: { - asked_domain_family: "counterparty_value", - asked_action_family: "turnover" + asked_domain_family: "receivables", + asked_action_family: "confirmed_snapshot" } } }) @@ -223,6 +223,64 @@ describe("assistant MCP discovery response policy", () => { expect(result.reason_codes).toContain("mcp_discovery_response_policy_keep_full_confirmed_factual_address_reply"); }); + it("overrides a stale full-confirmed lifecycle reply when discovery proves a different net-flow question", () => { + const result = applyAssistantMcpDiscoveryResponsePolicy({ + currentReply: "Коротко: активных заказчиков в 2020 году — 1.", + currentReplySource: "address_query_runtime_v1", + currentReplyType: "factual", + addressRuntimeMeta: { + detected_intent: "counterparty_activity_lifecycle", + truth_gate_contract_status: "full_confirmed", + assistant_truth_answer_policy_v1: { + truth_gate: { + coverage_status: "full", + grounding_status: "grounded", + source_truth_gate_status: "full_confirmed" + } + }, + dialog_continuation_contract_v2: { + target_intent: "counterparty_activity_lifecycle" + }, + assistant_mcp_discovery_entry_point_v1: entryPoint({ + turn_input: { + adapter_status: "ready", + should_run_discovery: true, + turn_meaning_ref: { + asked_domain_family: "counterparty_value", + asked_action_family: "net_value_flow", + explicit_entity_candidates: ["Группа СВК"], + explicit_organization_scope: "ООО Альтернатива Плюс", + explicit_date_scope: "2020", + unsupported_but_understood_family: "counterparty_bidirectional_value_flow_or_netting" + } + }, + bridge: { + bridge_status: "answer_draft_ready", + user_facing_response_allowed: true, + business_fact_answer_allowed: true, + requires_user_clarification: false, + answer_draft: { + answer_mode: "confirmed_with_bounded_inference", + headline: "По данным 1С найдены строки входящих и исходящих денежных движений.", + confirmed_lines: ["Получили 47 628 853,03 руб.; заплатили 43 763 351,53 руб.; нетто 3 865 501,50 руб."], + inference_lines: [], + unknown_lines: ["Полное сальдо вне проверенного окна не подтверждено."], + limitation_lines: [], + next_step_line: null + } + } + }) + } + }); + + expect(result.applied).toBe(true); + expect(result.decision).toBe("apply_candidate"); + expect(result.reply_source).toBe("mcp_discovery_response_candidate_guarded"); + expect(result.reply_text).toContain("47 628 853,03"); + expect(result.reason_codes).toContain("mcp_discovery_response_policy_semantic_conflict_allows_candidate_override"); + expect(result.reason_codes).not.toContain("mcp_discovery_response_policy_keep_full_confirmed_factual_address_reply"); + }); + it("keeps address lane answers when discovery was not requested for the current turn", () => { const result = applyAssistantMcpDiscoveryResponsePolicy({ currentReply: "supported exact route answer",