diff --git a/llm_normalizer/backend/src/services/assistantMcpDiscoveryTurnInputAdapter.ts b/llm_normalizer/backend/src/services/assistantMcpDiscoveryTurnInputAdapter.ts index 3cbcd29..9b3b611 100644 --- a/llm_normalizer/backend/src/services/assistantMcpDiscoveryTurnInputAdapter.ts +++ b/llm_normalizer/backend/src/services/assistantMcpDiscoveryTurnInputAdapter.ts @@ -359,13 +359,13 @@ function hasMetadataObjectHint(text: string): boolean { } function hasDocumentEvidenceFollowupSignal(text: string): boolean { - return /(?:\u043f\u043e\s+\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442(?:\u0430\u043c|\u044b)?|\u0434\u0430\u0432\u0430\u0439\s+\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442(?:\u044b)?|\u0438\u0449\u0438\s+\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442(?:\u044b)?|\u043f\u043e\u043a\u0430\u0436\u0438\s+\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442(?:\u044b)?|document(?:s)?\s+(?:then|next)?|(?:then|next)\s+documents?|go\s+to\s+documents?)/iu.test( + return /(?:\u043f\u043e\s+\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442(?:\u0430\u043c|\u044b)?|\u0434\u0430\u0432\u0430\u0439\s+\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442(?:\u044b)?|\u0438\u0449\u0438\s+\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442(?:\u044b)?|\u043f\u043e\u043a\u0430\u0436\u0438\s+\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442(?:\u044b)?|(?:\u043f\u043e\u043a\u0430\u0436\u0438|\u043a\u0430\u043a\u0438\u0435|\u0441\u043f\u0438\u0441\u043e\u043a|\u0434\u0430\u0439|\u0438\u0449\u0438)\s+(?:\u0441\u0447(?:[еe]т|\u0435\u0442)[-\u2011 ]?\u0444\u0430\u043a\u0442\u0443\u0440(?:\u044b|\u0430)?|\u043d\u0430\u043a\u043b\u0430\u0434\u043d(?:\u044b\u0435|\u0430\u044f)?|\u0430\u043a\u0442(?:\u044b)?|\u0440\u0435\u0430\u043b\u0438\u0437\u0430\u0446(?:\u0438\u0438|\u0438\u044e)|invoice(?:s)?|bill(?:s)?|waybill(?:s)?)|document(?:s)?\s+(?:then|next)?|(?:then|next)\s+documents?|go\s+to\s+documents?)/iu.test( text ); } function hasMovementEvidenceFollowupSignal(text: string): boolean { - return /(?:\u043f\u043e\s+\u0434\u0432\u0438\u0436\u0435\u043d(?:\u0438\u044f\u043c|\u0438\u044f)?|\u0434\u0430\u0432\u0430\u0439\s+\u0434\u0432\u0438\u0436\u0435\u043d(?:\u0438\u044f|\u0438\u0435)|\u0438\u0449\u0438\s+\u0434\u0432\u0438\u0436\u0435\u043d(?:\u0438\u044f|\u0438\u0435)|\u043f\u043e\u043a\u0430\u0436\u0438\s+\u0434\u0432\u0438\u0436\u0435\u043d(?:\u0438\u044f|\u0438\u0435)|\u0431\u0430\u043d\u043a\u043e\u0432\u0441\u043a(?:\u0438\u0435|\u0438\u0439)\s+\u0434\u0432\u0438\u0436\u0435\u043d(?:\u0438\u044f|\u0438\u0435)|movement(?:s)?\s+(?:then|next)?|(?:then|next)\s+movements?|go\s+to\s+movements?)/iu.test( + return /(?:\u043f\u043e\s+\u0434\u0432\u0438\u0436\u0435\u043d(?:\u0438\u044f\u043c|\u0438\u044f)?|\u0434\u0430\u0432\u0430\u0439\s+\u0434\u0432\u0438\u0436\u0435\u043d(?:\u0438\u044f|\u0438\u0435)|\u0438\u0449\u0438\s+\u0434\u0432\u0438\u0436\u0435\u043d(?:\u0438\u044f|\u0438\u0435)|\u043f\u043e\u043a\u0430\u0436\u0438\s+\u0434\u0432\u0438\u0436\u0435\u043d(?:\u0438\u044f|\u0438\u0435)|\u0431\u0430\u043d\u043a\u043e\u0432\u0441\u043a(?:\u0438\u0435|\u0438\u0439)\s+\u0434\u0432\u0438\u0436\u0435\u043d(?:\u0438\u044f|\u0438\u0435)|(?:\u043f\u043e\u043a\u0430\u0436\u0438|\u043a\u0430\u043a\u0438\u0435|\u0441\u043f\u0438\u0441\u043e\u043a|\u0434\u0430\u0439|\u0438\u0449\u0438)\s+(?:\u043f\u043b\u0430\u0442[еe]\u0436(?:\u0438|\u0438)?|\u043e\u043f\u0435\u0440\u0430\u0446(?:\u0438\u0438|\u0438\u044e)|\u043f\u0440\u043e\u0432\u043e\u0434\u043a(?:\u0438|\u0430)|\u0441\u043f\u0438\u0441\u0430\u043d(?:\u0438\u044f|\u0438\u0435)|\u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d(?:\u0438\u044f|\u0438\u0435)|payment(?:s)?|transaction(?:s)?|operation(?:s)?|posting(?:s)?|bank\s+operation(?:s)?)|movement(?:s)?\s+(?:then|next)?|(?:then|next)\s+movements?|go\s+to\s+movements?)/iu.test( text ); } @@ -485,6 +485,8 @@ export function buildAssistantMcpDiscoveryTurnInput( const monthlyAggregationSignal = hasMonthlyAggregationSignal(rawText); const explicitDateScopeLiteralDetected = hasExplicitDateScopeLiteral(rawText); const rawDateScope = collectDateScopeFromRawText(rawText); + const metadataDocumentHintSignal = hasDocumentEvidenceFollowupSignal(rawText); + const metadataMovementHintSignal = hasMovementEvidenceFollowupSignal(rawText); const rawDomain = toNonEmptyString(assistantTurnMeaning?.asked_domain_family); const rawAction = toNonEmptyString(assistantTurnMeaning?.asked_action_family); @@ -513,7 +515,7 @@ export function buildAssistantMcpDiscoveryTurnInput( followupSeed.counterparty && !rawLifecycleSignal && !rawValueFlowSignal && - hasDocumentEvidenceFollowupSignal(rawText) + metadataDocumentHintSignal ); const metadataAmbiguityResolvedDocumentFollowupApplicable = Boolean( followupSeed.pilotScope === "metadata_inspection_v1" && @@ -521,7 +523,7 @@ export function buildAssistantMcpDiscoveryTurnInput( followupSeed.counterparty && !rawLifecycleSignal && !rawValueFlowSignal && - hasDocumentEvidenceFollowupSignal(rawText) + metadataDocumentHintSignal ); const metadataGroundedMovementFollowupApplicable = Boolean( followupSeed.pilotScope === "metadata_inspection_v1" && @@ -529,16 +531,14 @@ export function buildAssistantMcpDiscoveryTurnInput( !followupSeed.metadataAmbiguityDetected && followupSeed.counterparty && !rawLifecycleSignal && - !rawValueFlowSignal && - hasMovementEvidenceFollowupSignal(rawText) + metadataMovementHintSignal ); const metadataAmbiguityResolvedMovementFollowupApplicable = Boolean( followupSeed.pilotScope === "metadata_inspection_v1" && followupSeed.metadataAmbiguityDetected && followupSeed.counterparty && !rawLifecycleSignal && - !rawValueFlowSignal && - hasMovementEvidenceFollowupSignal(rawText) + metadataMovementHintSignal ); const metadataGroundedLaneContinuationApplicable = Boolean( followupSeed.pilotScope === "metadata_inspection_v1" && @@ -549,8 +549,8 @@ export function buildAssistantMcpDiscoveryTurnInput( !rawLifecycleSignal && !rawValueFlowSignal && !rawMetadataSignal && - !hasDocumentEvidenceFollowupSignal(rawText) && - !hasMovementEvidenceFollowupSignal(rawText) && + !metadataDocumentHintSignal && + !metadataMovementHintSignal && hasMetadataDownstreamContinuationSignal(rawText) ); const metadataGroundedDocumentLaneApplicable = @@ -592,7 +592,9 @@ export function buildAssistantMcpDiscoveryTurnInput( !lifecycleSignal && (rawBidirectionalValueFlowSignal || seededAction === "net_value_flow"); const valueFlowSignal = - !lifecycleSignal && (rawValueFlowSignal || seededDomain === "counterparty_value"); + !lifecycleSignal && + !metadataGroundedMovementLaneApplicable && + (rawValueFlowSignal || seededDomain === "counterparty_value"); const payoutSignal = valueFlowSignal && !bidirectionalValueFlowSignal && diff --git a/llm_normalizer/backend/tests/assistantMcpDiscoveryTurnInputAdapter.test.ts b/llm_normalizer/backend/tests/assistantMcpDiscoveryTurnInputAdapter.test.ts index ba08c19..7b14b2f 100644 --- a/llm_normalizer/backend/tests/assistantMcpDiscoveryTurnInputAdapter.test.ts +++ b/llm_normalizer/backend/tests/assistantMcpDiscoveryTurnInputAdapter.test.ts @@ -423,6 +423,67 @@ describe("assistant MCP discovery turn input adapter", () => { expect(result.reason_codes).toContain("mcp_discovery_metadata_ambiguity_resolved_to_movement_lane"); }); + it("resolves ambiguous metadata surface into document lane from semantic document hints", () => { + const result = buildAssistantMcpDiscoveryTurnInput({ + userMessage: "покажи счет-фактуры", + followupContext: { + previous_discovery_pilot_scope: "metadata_inspection_v1", + previous_discovery_metadata_ambiguity_detected: true, + previous_filters: { + counterparty: "SVK", + period_from: "2020-01-01", + period_to: "2020-12-31" + }, + previous_anchor_type: "counterparty", + previous_anchor_value: "SVK" + } + }); + + expect(result.adapter_status).toBe("ready"); + expect(result.should_run_discovery).toBe(true); + expect(result.semantic_data_need).toBe("document evidence"); + expect(result.turn_meaning_ref).toMatchObject({ + asked_domain_family: "documents", + asked_action_family: "list_documents", + explicit_entity_candidates: ["SVK"], + explicit_date_scope: "2020", + unsupported_but_understood_family: "document_evidence", + stale_replay_forbidden: true + }); + expect(result.reason_codes).toContain("mcp_discovery_metadata_ambiguity_resolved_to_document_lane"); + }); + + it("resolves ambiguous metadata surface into movement lane from semantic movement hints without turning it into value-flow", () => { + const result = buildAssistantMcpDiscoveryTurnInput({ + userMessage: "покажи платежи", + followupContext: { + previous_discovery_pilot_scope: "metadata_inspection_v1", + previous_discovery_metadata_ambiguity_detected: true, + previous_filters: { + counterparty: "SVK", + period_from: "2020-01-01", + period_to: "2020-12-31" + }, + previous_anchor_type: "counterparty", + previous_anchor_value: "SVK" + } + }); + + expect(result.adapter_status).toBe("ready"); + expect(result.should_run_discovery).toBe(true); + expect(result.semantic_data_need).toBe("movement evidence"); + expect(result.turn_meaning_ref).toMatchObject({ + asked_domain_family: "movements", + asked_action_family: "list_movements", + explicit_entity_candidates: ["SVK"], + explicit_date_scope: "2020", + unsupported_but_understood_family: "movement_evidence", + stale_replay_forbidden: true + }); + expect(result.reason_codes).toContain("mcp_discovery_metadata_ambiguity_resolved_to_movement_lane"); + expect(result.reason_codes).not.toContain("mcp_discovery_value_flow_signal_detected"); + }); + it("switches the checked year on a short payout follow-up while keeping prior discovery counterparty", () => { const result = buildAssistantMcpDiscoveryTurnInput({ userMessage: "а теперь за 2021?",