diff --git a/llm_normalizer/backend/dist/services/assistantEvidencePlanner.js b/llm_normalizer/backend/dist/services/assistantEvidencePlanner.js index d0b2a0d..e1a5951 100644 --- a/llm_normalizer/backend/dist/services/assistantEvidencePlanner.js +++ b/llm_normalizer/backend/dist/services/assistantEvidencePlanner.js @@ -37,12 +37,29 @@ function uniqueStrings(values) { function isExplicitDate(value) { return Boolean(value && /^\d{4}-\d{2}-\d{2}$/.test(value)); } -function providedAxesFromMeaning(meaning, graph) { +const SUBJECT_AXIS_SET = new Set([ + "counterparty", + "business_entity", + "item", + "supplier", + "buyer", + "document", + "contract" +]); +function providedAxesFromMeaning(meaning, graph, requiredAxes) { const result = []; if ((meaning?.explicit_entity_candidates?.length ?? 0) > 0) { result.push("counterparty"); result.push("business_entity"); } + if ((graph?.subject_candidates.length ?? 0) > 0) { + result.push("business_entity"); + for (const axis of requiredAxes) { + if (SUBJECT_AXIS_SET.has(axis)) { + result.push(axis); + } + } + } if (toNonEmptyString(meaning?.explicit_organization_scope)) { result.push("organization"); } @@ -126,7 +143,7 @@ function buildAssistantEvidencePlanner(input) { const plan = input.discoveryPlan; const turnMeaning = plan.turn_meaning_ref; const requiredAxes = uniqueStrings(plan.required_axes); - const providedAxes = providedAxesFromMeaning(turnMeaning, graph); + const providedAxes = providedAxesFromMeaning(turnMeaning, graph, requiredAxes); const graphClarificationGaps = uniqueStrings(graph?.clarification_gaps ?? []); const additionalAxisGaps = uniqueStrings(input.additionalMissingAxes ?? []).filter((axis) => !providedAxes.includes(axis) && (requiredAxes.includes(axis) || USER_ACTIONABLE_AXIS_SET.has(axis))); const axisGaps = uniqueStrings([...additionalAxisGaps, ...missingAxes(requiredAxes, providedAxes)]); diff --git a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryDataNeedGraph.js b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryDataNeedGraph.js index 4be636e..3799cfa 100644 --- a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryDataNeedGraph.js +++ b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryDataNeedGraph.js @@ -178,6 +178,23 @@ function hasOpenScopeOneSidedValueTotalHintUtf8Safe(rawUtterance, action) { } return false; } +function inferredOpenScopeOneSidedValueAction(rawUtterance) { + if (hasOpenScopeOneSidedValueTotalHintUtf8Safe(rawUtterance, "turnover") || + hasOpenScopeOneSidedValueTotalHint(rawUtterance, "turnover")) { + return "turnover"; + } + if (hasOpenScopeOneSidedValueTotalHintUtf8Safe(rawUtterance, "payout") || + hasOpenScopeOneSidedValueTotalHint(rawUtterance, "payout")) { + return "payout"; + } + return null; +} +function hasDetailedBusinessOverviewMoneyBreakdownHint(rawUtterance) { + if (!rawUtterance) { + return false; + } + return /(?:\u0440\u0430\u0441\u043a\u0440\u043e\p{L}*|\u043f\u043e\u0434\u0440\u043e\u0431\p{L}*|\u0440\u0430\u0437\u0431\u0435\p{L}*|\u0440\u0430\u0437\u0432\u0435\u0440\u043d\p{L}*|\u043a\u0442\u043e\s+\u0433\u043b\u0430\u0432\p{L}*|\u0433\u043b\u0430\u0432\p{L}*\s+\u043a\u043b\u0438\u0435\u043d\p{L}*|\u0433\u043b\u0430\u0432\p{L}*\s+\u043f\u043e\u0441\u0442\u0430\u0432\p{L}*|\u0447\u0438\u0441\u0442\p{L}*\s+\u0434\u0435\u043d\u0435\u0436\p{L}*\s+\u043f\u043e\u0442\u043e\p{L}*|\u043f\u043e\u043b\u0443\u0447\p{L}*[\s\S]{0,80}\u0437\u0430\u043f\u043b\u0430\u0442\p{L}*|\u0432\u0445\u043e\u0434\p{L}*[\s\S]{0,80}\u0438\u0441\u0445\u043e\u0434\p{L}*|detail|breakdown|drill\s*down|main\s+customer|main\s+supplier)/iu.test(rawUtterance); +} function supportsOrganizationScopedOpenTotal(action) { return action === "turnover" || action === "payout"; } @@ -393,7 +410,7 @@ function buildAssistantMcpDiscoveryDataNeedGraph(input) { const semanticDataNeed = lower(input.semanticDataNeed); const turnMeaning = input.turnMeaning ?? null; const domain = lower(turnMeaning?.asked_domain_family); - const action = lower(turnMeaning?.asked_action_family); + const rawAction = lower(turnMeaning?.asked_action_family); const unsupported = lower(turnMeaning?.unsupported_but_understood_family); const rawUtterance = lower(input.rawUtterance); const rawQuestionSignal = lower([input.rawUtterance, turnMeaning?.raw_message, turnMeaning?.effective_message].join(" ")); @@ -406,12 +423,19 @@ function buildAssistantMcpDiscoveryDataNeedGraph(input) { const subjectCandidates = (turnMeaning?.explicit_entity_candidates ?? []) .map((item) => toNonEmptyString(item)) .filter((item) => Boolean(item)); - const businessFactFamily = businessFactFamilyFor({ + const initialBusinessFactFamily = businessFactFamilyFor({ semanticDataNeed, domain, - action, + action: rawAction, unsupported }); + const oneSidedValueAction = inferredOpenScopeOneSidedValueAction(rawQuestionSignal); + const detailedBusinessOverviewMoneyBreakdownHint = hasDetailedBusinessOverviewMoneyBreakdownHint(rawQuestionSignal); + const oneSidedBusinessOverviewValueFlowOverride = initialBusinessFactFamily === "business_overview" && + Boolean(oneSidedValueAction) && + !detailedBusinessOverviewMoneyBreakdownHint; + const action = oneSidedBusinessOverviewValueFlowOverride ? oneSidedValueAction : rawAction; + const businessFactFamily = oneSidedBusinessOverviewValueFlowOverride ? "value_flow" : initialBusinessFactFamily; const aggregationNeed = aggregationNeedFor(aggregationAxis); const comparisonNeed = comparisonNeedFor(action); const allTimeScopeHint = hasAllTimeScopeHint(rawUtterance); @@ -529,6 +553,9 @@ function buildAssistantMcpDiscoveryDataNeedGraph(input) { if (directBusinessOverviewMoneyAnswerHint) { pushReason(reasonCodes, "data_need_graph_business_overview_direct_money_answer"); } + if (oneSidedBusinessOverviewValueFlowOverride) { + pushReason(reasonCodes, "data_need_graph_business_overview_one_sided_money_total_routed_to_value_flow"); + } if (clarificationGaps.includes("organization")) { pushReason(reasonCodes, "data_need_graph_open_scope_total_needs_organization"); } @@ -542,7 +569,7 @@ function buildAssistantMcpDiscoveryDataNeedGraph(input) { metadata_scope_hint: metadataScopeHint, subject_resolution_optional: subjectResolutionOptional || undefined, business_fact_family: businessFactFamily, - action_family: toNonEmptyString(turnMeaning?.asked_action_family), + action_family: toNonEmptyString(action), aggregation_need: aggregationNeed, time_scope_need: timeScopeNeed, comparison_need: comparisonNeed, diff --git a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPlanner.js b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPlanner.js index dd9e348..38a35b2 100644 --- a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPlanner.js +++ b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPlanner.js @@ -109,10 +109,16 @@ function hasReasonCode(graph, reasonCode) { function aggregationAxis(meaning) { return toNonEmptyString(meaning?.asked_aggregation_axis)?.toLowerCase() ?? null; } -function addScopeAxes(axes, meaning) { +function addScopeAxes(axes, meaning, graph) { if (hasEntity(meaning)) { pushUnique(axes, "counterparty"); } + if (hasSubjectCandidates(graph)) { + const factFamily = lower(graph?.business_fact_family); + if (factFamily === "value_flow" || factFamily === "activity_lifecycle") { + pushUnique(axes, "counterparty"); + } + } if (toNonEmptyString(meaning?.explicit_organization_scope)) { pushUnique(axes, "organization"); } @@ -498,7 +504,7 @@ function recipeFor(input) { const combined = `${domain} ${action} ${unsupported}`.trim(); const axes = []; const requestedAggregationAxis = aggregationAxis(meaning); - addScopeAxes(axes, meaning); + addScopeAxes(axes, meaning, dataNeedGraph); addMetadataScopeAxis(axes, meaning); addTimeScopeAxes(axes, dataNeedGraph); if (graphClarificationGaps.includes("lane_family_choice")) { diff --git a/llm_normalizer/backend/src/services/assistantEvidencePlanner.ts b/llm_normalizer/backend/src/services/assistantEvidencePlanner.ts index a853c27..564617a 100644 --- a/llm_normalizer/backend/src/services/assistantEvidencePlanner.ts +++ b/llm_normalizer/backend/src/services/assistantEvidencePlanner.ts @@ -117,15 +117,34 @@ function isExplicitDate(value: string | null): boolean { return Boolean(value && /^\d{4}-\d{2}-\d{2}$/.test(value)); } +const SUBJECT_AXIS_SET = new Set([ + "counterparty", + "business_entity", + "item", + "supplier", + "buyer", + "document", + "contract" +]); + function providedAxesFromMeaning( meaning: AssistantMcpDiscoveryTurnMeaningRef | null, - graph: AssistantMcpDiscoveryDataNeedGraphContract | null + graph: AssistantMcpDiscoveryDataNeedGraphContract | null, + requiredAxes: string[] ): string[] { const result: string[] = []; if ((meaning?.explicit_entity_candidates?.length ?? 0) > 0) { result.push("counterparty"); result.push("business_entity"); } + if ((graph?.subject_candidates.length ?? 0) > 0) { + result.push("business_entity"); + for (const axis of requiredAxes) { + if (SUBJECT_AXIS_SET.has(axis)) { + result.push(axis); + } + } + } if (toNonEmptyString(meaning?.explicit_organization_scope)) { result.push("organization"); } @@ -223,7 +242,7 @@ export function buildAssistantEvidencePlanner( const plan = input.discoveryPlan; const turnMeaning = plan.turn_meaning_ref; const requiredAxes = uniqueStrings(plan.required_axes); - const providedAxes = providedAxesFromMeaning(turnMeaning, graph); + const providedAxes = providedAxesFromMeaning(turnMeaning, graph, requiredAxes); const graphClarificationGaps = uniqueStrings(graph?.clarification_gaps ?? []); const additionalAxisGaps = uniqueStrings(input.additionalMissingAxes ?? []).filter( (axis) => !providedAxes.includes(axis) && (requiredAxes.includes(axis) || USER_ACTIONABLE_AXIS_SET.has(axis)), diff --git a/llm_normalizer/backend/src/services/assistantMcpDiscoveryDataNeedGraph.ts b/llm_normalizer/backend/src/services/assistantMcpDiscoveryDataNeedGraph.ts index c832651..3e9c52d 100644 --- a/llm_normalizer/backend/src/services/assistantMcpDiscoveryDataNeedGraph.ts +++ b/llm_normalizer/backend/src/services/assistantMcpDiscoveryDataNeedGraph.ts @@ -267,6 +267,31 @@ function hasOpenScopeOneSidedValueTotalHintUtf8Safe(rawUtterance: string, action return false; } +function inferredOpenScopeOneSidedValueAction(rawUtterance: string): "turnover" | "payout" | null { + if ( + hasOpenScopeOneSidedValueTotalHintUtf8Safe(rawUtterance, "turnover") || + hasOpenScopeOneSidedValueTotalHint(rawUtterance, "turnover") + ) { + return "turnover"; + } + if ( + hasOpenScopeOneSidedValueTotalHintUtf8Safe(rawUtterance, "payout") || + hasOpenScopeOneSidedValueTotalHint(rawUtterance, "payout") + ) { + return "payout"; + } + return null; +} + +function hasDetailedBusinessOverviewMoneyBreakdownHint(rawUtterance: string): boolean { + if (!rawUtterance) { + return false; + } + return /(?:\u0440\u0430\u0441\u043a\u0440\u043e\p{L}*|\u043f\u043e\u0434\u0440\u043e\u0431\p{L}*|\u0440\u0430\u0437\u0431\u0435\p{L}*|\u0440\u0430\u0437\u0432\u0435\u0440\u043d\p{L}*|\u043a\u0442\u043e\s+\u0433\u043b\u0430\u0432\p{L}*|\u0433\u043b\u0430\u0432\p{L}*\s+\u043a\u043b\u0438\u0435\u043d\p{L}*|\u0433\u043b\u0430\u0432\p{L}*\s+\u043f\u043e\u0441\u0442\u0430\u0432\p{L}*|\u0447\u0438\u0441\u0442\p{L}*\s+\u0434\u0435\u043d\u0435\u0436\p{L}*\s+\u043f\u043e\u0442\u043e\p{L}*|\u043f\u043e\u043b\u0443\u0447\p{L}*[\s\S]{0,80}\u0437\u0430\u043f\u043b\u0430\u0442\p{L}*|\u0432\u0445\u043e\u0434\p{L}*[\s\S]{0,80}\u0438\u0441\u0445\u043e\u0434\p{L}*|detail|breakdown|drill\s*down|main\s+customer|main\s+supplier)/iu.test( + rawUtterance + ); +} + function supportsOrganizationScopedOpenTotal(action: string): boolean { return action === "turnover" || action === "payout"; } @@ -522,7 +547,7 @@ export function buildAssistantMcpDiscoveryDataNeedGraph( const semanticDataNeed = lower(input.semanticDataNeed); const turnMeaning = input.turnMeaning ?? null; const domain = lower(turnMeaning?.asked_domain_family); - const action = lower(turnMeaning?.asked_action_family); + const rawAction = lower(turnMeaning?.asked_action_family); const unsupported = lower(turnMeaning?.unsupported_but_understood_family); const rawUtterance = lower(input.rawUtterance); const rawQuestionSignal = lower([input.rawUtterance, turnMeaning?.raw_message, turnMeaning?.effective_message].join(" ")); @@ -535,12 +560,20 @@ export function buildAssistantMcpDiscoveryDataNeedGraph( const subjectCandidates = (turnMeaning?.explicit_entity_candidates ?? []) .map((item) => toNonEmptyString(item)) .filter((item): item is string => Boolean(item)); - const businessFactFamily = businessFactFamilyFor({ + const initialBusinessFactFamily = businessFactFamilyFor({ semanticDataNeed, domain, - action, + action: rawAction, unsupported }); + const oneSidedValueAction = inferredOpenScopeOneSidedValueAction(rawQuestionSignal); + const detailedBusinessOverviewMoneyBreakdownHint = hasDetailedBusinessOverviewMoneyBreakdownHint(rawQuestionSignal); + const oneSidedBusinessOverviewValueFlowOverride = + initialBusinessFactFamily === "business_overview" && + Boolean(oneSidedValueAction) && + !detailedBusinessOverviewMoneyBreakdownHint; + const action = oneSidedBusinessOverviewValueFlowOverride ? oneSidedValueAction! : rawAction; + const businessFactFamily = oneSidedBusinessOverviewValueFlowOverride ? "value_flow" : initialBusinessFactFamily; const aggregationNeed = aggregationNeedFor(aggregationAxis); const comparisonNeed = comparisonNeedFor(action); const allTimeScopeHint = hasAllTimeScopeHint(rawUtterance); @@ -669,6 +702,9 @@ export function buildAssistantMcpDiscoveryDataNeedGraph( if (directBusinessOverviewMoneyAnswerHint) { pushReason(reasonCodes, "data_need_graph_business_overview_direct_money_answer"); } + if (oneSidedBusinessOverviewValueFlowOverride) { + pushReason(reasonCodes, "data_need_graph_business_overview_one_sided_money_total_routed_to_value_flow"); + } if (clarificationGaps.includes("organization")) { pushReason(reasonCodes, "data_need_graph_open_scope_total_needs_organization"); } @@ -683,7 +719,7 @@ export function buildAssistantMcpDiscoveryDataNeedGraph( metadata_scope_hint: metadataScopeHint, subject_resolution_optional: subjectResolutionOptional || undefined, business_fact_family: businessFactFamily, - action_family: toNonEmptyString(turnMeaning?.asked_action_family), + action_family: toNonEmptyString(action), aggregation_need: aggregationNeed, time_scope_need: timeScopeNeed, comparison_need: comparisonNeed, diff --git a/llm_normalizer/backend/src/services/assistantMcpDiscoveryPlanner.ts b/llm_normalizer/backend/src/services/assistantMcpDiscoveryPlanner.ts index 55d5908..9903752 100644 --- a/llm_normalizer/backend/src/services/assistantMcpDiscoveryPlanner.ts +++ b/llm_normalizer/backend/src/services/assistantMcpDiscoveryPlanner.ts @@ -253,10 +253,20 @@ function aggregationAxis(meaning: AssistantMcpDiscoveryTurnMeaningRef | null | u return toNonEmptyString(meaning?.asked_aggregation_axis)?.toLowerCase() ?? null; } -function addScopeAxes(axes: string[], meaning: AssistantMcpDiscoveryTurnMeaningRef | null | undefined): void { +function addScopeAxes( + axes: string[], + meaning: AssistantMcpDiscoveryTurnMeaningRef | null | undefined, + graph: AssistantMcpDiscoveryDataNeedGraphContract | null | undefined +): void { if (hasEntity(meaning)) { pushUnique(axes, "counterparty"); } + if (hasSubjectCandidates(graph)) { + const factFamily = lower(graph?.business_fact_family); + if (factFamily === "value_flow" || factFamily === "activity_lifecycle") { + pushUnique(axes, "counterparty"); + } + } if (toNonEmptyString(meaning?.explicit_organization_scope)) { pushUnique(axes, "organization"); } @@ -741,7 +751,7 @@ function recipeFor(input: AssistantMcpDiscoveryPlannerInput): PlannerRecipe { const combined = `${domain} ${action} ${unsupported}`.trim(); const axes: string[] = []; const requestedAggregationAxis = aggregationAxis(meaning); - addScopeAxes(axes, meaning); + addScopeAxes(axes, meaning, dataNeedGraph); addMetadataScopeAxis(axes, meaning); addTimeScopeAxes(axes, dataNeedGraph); diff --git a/llm_normalizer/backend/tests/assistantMcpDiscoveryDataNeedGraph.test.ts b/llm_normalizer/backend/tests/assistantMcpDiscoveryDataNeedGraph.test.ts index bbb6ad4..3a6471b 100644 --- a/llm_normalizer/backend/tests/assistantMcpDiscoveryDataNeedGraph.test.ts +++ b/llm_normalizer/backend/tests/assistantMcpDiscoveryDataNeedGraph.test.ts @@ -336,6 +336,37 @@ describe("assistant MCP discovery data need graph", () => { ]); expect(result.reason_codes).toContain("data_need_graph_open_scope_total_without_subject"); }); + + it("routes broad-overview one-sided incoming totals to value-flow instead of company overview", () => { + const result = buildAssistantMcpDiscoveryDataNeedGraph({ + semanticDataNeed: "business overview evidence with bounded analyst interpretation", + rawUtterance: + "А всего сколько денег пришло в ООО Альтернатива Плюс за 2020, без топов и без контрагентов?", + turnMeaning: { + asked_domain_family: "business_overview", + asked_action_family: "broad_evaluation", + unsupported_but_understood_family: "broad_business_evaluation", + explicit_date_scope: "2020", + explicit_organization_scope: "ООО Альтернатива Плюс" + } + }); + + expect(result.business_fact_family).toBe("value_flow"); + expect(result.action_family).toBe("turnover"); + expect(result.comparison_need).toBeNull(); + expect(result.ranking_need).toBeNull(); + expect(result.clarification_gaps).toEqual([]); + expect(result.decomposition_candidates).toEqual([ + "collect_scoped_movements", + "aggregate_checked_amounts", + "probe_coverage" + ]); + expect(result.reason_codes).toContain("data_need_graph_open_scope_total_without_subject"); + expect(result.reason_codes).toContain( + "data_need_graph_business_overview_one_sided_money_total_routed_to_value_flow" + ); + }); + it("treats a generic incoming total as an understood open-scope ask that still needs organization", () => { const result = buildAssistantMcpDiscoveryDataNeedGraph({ semanticDataNeed: "counterparty value-flow evidence", diff --git a/llm_normalizer/backend/tests/assistantMcpDiscoveryPlanner.test.ts b/llm_normalizer/backend/tests/assistantMcpDiscoveryPlanner.test.ts index 6343c05..f6acb6e 100644 --- a/llm_normalizer/backend/tests/assistantMcpDiscoveryPlanner.test.ts +++ b/llm_normalizer/backend/tests/assistantMcpDiscoveryPlanner.test.ts @@ -157,6 +157,68 @@ describe("assistant MCP discovery planner", () => { expect(result.evidence_plan.evidence_axes.missing_axes).not.toContain("all_time_scope"); }); + it("uses graph subject candidates to satisfy the selected subject evidence axis", () => { + const result = planAssistantMcpDiscovery({ + dataNeedGraph: { + schema_version: "assistant_data_need_graph_v1", + policy_owner: "assistantMcpDiscoveryDataNeedGraph", + subject_candidates: ["SVK"], + business_fact_family: "value_flow", + action_family: "net_value_flow", + aggregation_need: null, + time_scope_need: "explicit_period", + comparison_need: "incoming_vs_outgoing", + ranking_need: null, + proof_expectation: "coverage_checked_fact", + clarification_gaps: [], + decomposition_candidates: [], + forbidden_overclaim_flags: ["no_raw_model_claims"], + reason_codes: ["data_need_graph_built"] + }, + turnMeaning: { + asked_action_family: "net_value_flow", + explicit_date_scope: "2020" + } + }); + + expect(result.selected_chain_id).toBe("value_flow_comparison"); + expect(result.evidence_plan.evidence_axes.required_axes).toContain("counterparty"); + expect(result.evidence_plan.evidence_axes.provided_axes).toContain("business_entity"); + expect(result.evidence_plan.evidence_axes.provided_axes).toContain("counterparty"); + expect(result.evidence_plan.evidence_axes.missing_axes).not.toContain("counterparty"); + }); + + it("does not coerce inventory item subjects into counterparty evidence axes", () => { + const result = planAssistantMcpDiscovery({ + dataNeedGraph: { + schema_version: "assistant_data_need_graph_v1", + policy_owner: "assistantMcpDiscoveryDataNeedGraph", + subject_candidates: ["Столешница 600"], + business_fact_family: "inventory_purchase_provenance", + action_family: "purchase_provenance", + aggregation_need: null, + time_scope_need: null, + comparison_need: null, + ranking_need: null, + proof_expectation: "coverage_checked_fact", + clarification_gaps: [], + decomposition_candidates: [], + forbidden_overclaim_flags: ["no_raw_model_claims"], + reason_codes: ["data_need_graph_built"] + }, + turnMeaning: { + asked_action_family: "purchase_provenance" + } + }); + + expect(result.selected_chain_id).toBe("inventory_purchase_provenance"); + expect(result.evidence_plan.evidence_axes.required_axes).toContain("item"); + expect(result.evidence_plan.evidence_axes.provided_axes).toContain("business_entity"); + expect(result.evidence_plan.evidence_axes.provided_axes).toContain("item"); + expect(result.evidence_plan.evidence_axes.provided_axes).not.toContain("counterparty"); + expect(result.evidence_plan.evidence_axes.missing_axes).not.toContain("item"); + }); + it("keeps representative graph-selected chains aligned with top catalog template matches", () => { const graph = ( businessFactFamily: string,