diff --git a/docs/orchestration/address_truth_harness_phase40_open_scope_all_time_followup.json b/docs/orchestration/address_truth_harness_phase40_open_scope_all_time_followup.json new file mode 100644 index 0000000..5e7c935 --- /dev/null +++ b/docs/orchestration/address_truth_harness_phase40_open_scope_all_time_followup.json @@ -0,0 +1,78 @@ +{ + "schema_version": "domain_truth_harness_spec_v1", + "scenario_id": "address_truth_harness_phase40_open_scope_all_time_followup", + "domain": "address_phase40_open_scope_all_time_followup", + "title": "Phase 40 open-scope all-time follow-up after year total", + "description": "Targeted AGENT replay for Big Block D where a bounded organization-scoped yearly incoming total must pivot into an all-time open-scope total without carrying a stale follow-up date from the previous turn.", + "bindings": {}, + "steps": [ + { + "step_id": "step_01_year_total_for_org", + "title": "Organization-scoped yearly total returns a bounded incoming amount for 2017", + "question": "Сколько входящих денег за 2017 год по ООО Альтернатива Плюс?", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)2017", + "(?i)входящ|получ|поступ", + "(?i)руб|₽" + ], + "required_answer_patterns_any": [ + "(?i)альтернатива", + "(?i)проверенн|найденн" + ], + "forbidden_answer_patterns": [ + "(?i)уточните организацию", + "(?i)уточните контрагента", + "(?i)не найден контрагент", + "(?i)по какому контрагенту", + "(?i)не найдено контрагента" + ], + "criticality": "critical", + "semantic_tags": [ + "value_flow_total", + "incoming", + "organization_scoped", + "year_specific", + "bounded_autonomy" + ] + }, + { + "step_id": "step_02_all_time_followup_reuses_org_without_stale_year", + "title": "All-time follow-up keeps the organization but drops the stale year filter", + "question": "сколько вообще денег мы заработали за все время?", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)входящ|получ|поступ|заработ", + "(?i)руб|₽" + ], + "required_answer_patterns_any": [ + "(?i)за все время", + "(?i)за все доступное время", + "(?i)весь проверенн", + "(?i)подтвержден", + "(?i)проверенн|найденн", + "(?i)по данным 1с" + ], + "forbidden_answer_patterns": [ + "(?i)уточните организацию", + "(?i)уточните контрагента", + "(?i)не найден контрагент", + "(?i)по какому контрагенту", + "(?i)не найдено контрагента", + "(?i)не получил", + "(?i)не смог", + "(?i)не удалось", + "(?i)не подтвержденного факта" + ], + "criticality": "critical", + "semantic_tags": [ + "value_flow_total", + "incoming", + "open_scope", + "all_time_scope", + "organization_followup_reuse", + "bounded_autonomy" + ] + } + ] +} diff --git a/docs/orchestration/address_truth_harness_phase41_saved_chain_all_time_revenue_followup.json b/docs/orchestration/address_truth_harness_phase41_saved_chain_all_time_revenue_followup.json new file mode 100644 index 0000000..80a0f73 --- /dev/null +++ b/docs/orchestration/address_truth_harness_phase41_saved_chain_all_time_revenue_followup.json @@ -0,0 +1,104 @@ +{ + "schema_version": "domain_truth_harness_spec_v1", + "scenario_id": "address_truth_harness_phase41_saved_chain_all_time_revenue_followup", + "domain": "address_phase41_saved_chain_all_time_revenue_followup", + "title": "Phase 41 saved-chain all-time revenue follow-up", + "description": "Targeted AGENT replay for the saved-session seam from assistant-stage1-RFWTAQ-aXR where the assistant must survive the natural chain 'самый доходный год -> а за 2017 мы скок заработали -> сколько вообще денег мы заработали за все время' without collapsing into an empty partial answer.", + "bindings": {}, + "steps": [ + { + "step_id": "step_01_top_revenue_year", + "title": "The assistant identifies the top confirmed revenue year in the active self-scope contour", + "question": "какой у нас самый доходный год", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)20\\d\\d", + "(?i)денежн|поступ|доходн|выручк", + "(?i)руб|₽" + ], + "required_answer_patterns_any": [ + "(?i)самый доходный год", + "(?i)топ-?3", + "(?i)подтвержден" + ], + "forbidden_answer_patterns": [ + "(?i)уточните организацию", + "(?i)уточните контрагента", + "(?i)не найден контрагент" + ], + "criticality": "critical", + "semantic_tags": [ + "value_flow_ranking", + "self_scope", + "saved_chain", + "bounded_autonomy" + ] + }, + { + "step_id": "step_02_year_followup_2017", + "title": "Short year follow-up reuses the same contour and returns a bounded 2017 incoming total", + "question": "а за 2017 мы скок заработали?", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)2017", + "(?i)входящ|получ|поступ|заработ", + "(?i)руб|₽" + ], + "required_answer_patterns_any": [ + "(?i)подтвержден", + "(?i)проверенн|найденн", + "(?i)по данным 1с", + "(?i)в рамках проверенного периода" + ], + "forbidden_answer_patterns": [ + "(?i)уточните организацию", + "(?i)уточните контрагента", + "(?i)не найден контрагент", + "(?i)не получил", + "(?i)не смог", + "(?i)не удалось" + ], + "criticality": "critical", + "semantic_tags": [ + "value_flow_total", + "year_switch", + "self_scope", + "saved_chain", + "bounded_autonomy" + ] + }, + { + "step_id": "step_03_all_time_followup", + "title": "All-time follow-up stays in the same money contour and does not fall into an empty partial answer", + "question": "сколько вообще денег мы заработали за все время?", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)входящ|получ|поступ|заработ", + "(?i)руб|₽" + ], + "required_answer_patterns_any": [ + "(?i)за все время", + "(?i)за все доступное время", + "(?i)подтвержден", + "(?i)по данным 1с" + ], + "forbidden_answer_patterns": [ + "(?i)уточните организацию", + "(?i)уточните контрагента", + "(?i)не найден контрагент", + "(?i)не получил", + "(?i)не смог", + "(?i)не удалось", + "(?i)подтвержденного факта для ответа не получил" + ], + "criticality": "critical", + "semantic_tags": [ + "value_flow_total", + "all_time_scope", + "self_scope", + "saved_chain", + "bounded_autonomy" + ] + } + ] +} diff --git a/llm_normalizer/backend/dist/services/assistantContinuityPolicy.js b/llm_normalizer/backend/dist/services/assistantContinuityPolicy.js index 1337180..f88e6f5 100644 --- a/llm_normalizer/backend/dist/services/assistantContinuityPolicy.js +++ b/llm_normalizer/backend/dist/services/assistantContinuityPolicy.js @@ -6,7 +6,10 @@ exports.readAssistantMcpDiscoveryEntityCandidates = readAssistantMcpDiscoveryEnt exports.readAssistantMcpDiscoveryPilotScope = readAssistantMcpDiscoveryPilotScope; exports.readAssistantMcpDiscoveryRankingNeed = readAssistantMcpDiscoveryRankingNeed; exports.readAssistantMcpDiscoveryMetadataRouteFamily = readAssistantMcpDiscoveryMetadataRouteFamily; +exports.readAssistantMcpDiscoveryMetadataRouteFamilySelectionBasis = readAssistantMcpDiscoveryMetadataRouteFamilySelectionBasis; exports.readAssistantMcpDiscoveryMetadataSelectedEntitySet = readAssistantMcpDiscoveryMetadataSelectedEntitySet; +exports.readAssistantMcpDiscoveryMetadataSelectedSurfaceObjects = readAssistantMcpDiscoveryMetadataSelectedSurfaceObjects; +exports.readAssistantMcpDiscoveryMetadataRecommendedNextPrimitive = readAssistantMcpDiscoveryMetadataRecommendedNextPrimitive; exports.readAssistantMcpDiscoveryMetadataAmbiguityDetected = readAssistantMcpDiscoveryMetadataAmbiguityDetected; exports.readAssistantMcpDiscoveryMetadataAmbiguityEntitySets = readAssistantMcpDiscoveryMetadataAmbiguityEntitySets; exports.formatIsoDateForReply = formatIsoDateForReply; @@ -154,9 +157,22 @@ function readAssistantMcpDiscoveryRankingNeed(debug, toNonEmptyString = fallback function readAssistantMcpDiscoveryMetadataRouteFamily(debug, toNonEmptyString = fallbackToNonEmptyString) { return toNonEmptyString(readAssistantMcpDiscoveryDerivedMetadataSurface(debug)?.downstream_route_family); } +function readAssistantMcpDiscoveryMetadataRouteFamilySelectionBasis(debug, toNonEmptyString = fallbackToNonEmptyString) { + return toNonEmptyString(readAssistantMcpDiscoveryDerivedMetadataSurface(debug)?.route_family_selection_basis); +} function readAssistantMcpDiscoveryMetadataSelectedEntitySet(debug, toNonEmptyString = fallbackToNonEmptyString) { return toNonEmptyString(readAssistantMcpDiscoveryDerivedMetadataSurface(debug)?.selected_entity_set); } +function readAssistantMcpDiscoveryMetadataSelectedSurfaceObjects(debug, toNonEmptyString = fallbackToNonEmptyString) { + const values = readAssistantMcpDiscoveryDerivedMetadataSurface(debug)?.selected_surface_objects; + if (!Array.isArray(values)) { + return []; + } + return values.map((item) => toNonEmptyString(item)).filter((item) => Boolean(item)); +} +function readAssistantMcpDiscoveryMetadataRecommendedNextPrimitive(debug, toNonEmptyString = fallbackToNonEmptyString) { + return toNonEmptyString(readAssistantMcpDiscoveryDerivedMetadataSurface(debug)?.recommended_next_primitive); +} function readAssistantMcpDiscoveryMetadataAmbiguityDetected(debug) { return (readAssistantMcpDiscoveryDerivedMetadataSurface(debug)?.ambiguity_detected === true || readAssistantMcpDiscoveryTurnMeaningMetadataAmbiguityEntitySets(debug).length > 0); diff --git a/llm_normalizer/backend/dist/services/assistantMcpCatalogIndex.js b/llm_normalizer/backend/dist/services/assistantMcpCatalogIndex.js index f2d5c9e..924a05f 100644 --- a/llm_normalizer/backend/dist/services/assistantMcpCatalogIndex.js +++ b/llm_normalizer/backend/dist/services/assistantMcpCatalogIndex.js @@ -2,6 +2,8 @@ Object.defineProperty(exports, "__esModule", { value: true }); exports.ASSISTANT_MCP_CATALOG_PLAN_REVIEW_SCHEMA_VERSION = exports.ASSISTANT_MCP_CATALOG_INDEX_SCHEMA_VERSION = void 0; exports.searchAssistantMcpCatalogPrimitivesByDecompositionCandidates = searchAssistantMcpCatalogPrimitivesByDecompositionCandidates; +exports.searchAssistantMcpCatalogPrimitivesByFactAxis = searchAssistantMcpCatalogPrimitivesByFactAxis; +exports.searchAssistantMcpCatalogPrimitivesByMetadataSurface = searchAssistantMcpCatalogPrimitivesByMetadataSurface; exports.buildAssistantMcpCatalogIndex = buildAssistantMcpCatalogIndex; exports.getAssistantMcpCatalogPrimitive = getAssistantMcpCatalogPrimitive; exports.reviewAssistantMcpDiscoveryPlanAgainstCatalog = reviewAssistantMcpDiscoveryPlanAgainstCatalog; @@ -13,6 +15,9 @@ const PRIMITIVE_CONTRACTS = [ primitive_id: "search_business_entity", purpose: "Find candidate 1C business entities by user wording before a fact query is executed.", decomposition_hints: ["search_business_entity"], + supported_fact_families: ["entity_grounding"], + supported_action_families: ["search_business_entity"], + planning_tags: ["subject_resolution"], required_axes_any_of: [["business_entity"], ["counterparty"], ["organization"], ["contract"], ["item"]], optional_axes: ["period", "document", "account"], output_fact_kinds: ["entity_candidates", "entity_ambiguity"], @@ -24,6 +29,9 @@ const PRIMITIVE_CONTRACTS = [ primitive_id: "inspect_1c_metadata", purpose: "Inspect available 1C schema/catalog/document/register surface before selecting a query lane.", decomposition_hints: ["inspect_metadata_surface"], + supported_fact_families: ["schema_surface"], + supported_action_families: ["inspect_catalog", "inspect_documents", "inspect_registers", "inspect_fields", "inspect_surface"], + planning_tags: ["metadata", "surface_inspection"], required_axes_any_of: [["metadata_scope"], ["domain_family"], ["document"], ["register"]], optional_axes: ["business_entity", "account", "counterparty"], output_fact_kinds: ["available_fields", "available_entity_sets", "known_limitations"], @@ -35,6 +43,9 @@ const PRIMITIVE_CONTRACTS = [ primitive_id: "resolve_entity_reference", purpose: "Resolve a user-visible entity name to a concrete 1C reference candidate.", decomposition_hints: ["resolve_entity_reference"], + supported_fact_families: ["entity_grounding", "value_flow", "document_evidence", "movement_evidence", "activity_lifecycle"], + supported_action_families: ["search_business_entity", "turnover", "payout", "net_value_flow", "list_documents", "list_movements", "activity_duration"], + planning_tags: ["subject_resolution"], required_axes_any_of: [["business_entity"], ["counterparty"], ["organization"], ["contract"], ["item"]], optional_axes: ["period", "inn", "document"], output_fact_kinds: ["resolved_entity_ref", "entity_conflict"], @@ -51,6 +62,9 @@ const PRIMITIVE_CONTRACTS = [ "collect_outgoing_movements", "fetch_scoped_movements" ], + supported_fact_families: ["value_flow", "movement_evidence"], + supported_action_families: ["turnover", "payout", "net_value_flow", "list_movements"], + planning_tags: ["movement", "comparison", "ranking", "aggregation", "monthly_aggregation"], required_axes_any_of: [["period", "account"], ["period", "counterparty"], ["period", "organization"]], optional_axes: ["contract", "document", "amount", "item", "warehouse"], output_fact_kinds: ["movement_rows", "turnover", "balance_delta"], @@ -62,6 +76,9 @@ const PRIMITIVE_CONTRACTS = [ primitive_id: "query_documents", purpose: "Fetch documents related to a scoped entity, period, contract, or movement explanation.", decomposition_hints: ["fetch_scoped_documents", "fetch_supporting_documents"], + supported_fact_families: ["document_evidence", "activity_lifecycle"], + supported_action_families: ["list_documents", "activity_duration"], + planning_tags: ["document"], required_axes_any_of: [["document"], ["counterparty"], ["contract"], ["period", "organization"]], optional_axes: ["account", "amount", "item", "warehouse"], output_fact_kinds: ["document_rows", "document_dates", "document_amounts"], @@ -73,6 +90,9 @@ const PRIMITIVE_CONTRACTS = [ primitive_id: "aggregate_by_axis", purpose: "Aggregate already-scoped 1C evidence by a business axis such as counterparty, contract, or period.", decomposition_hints: ["aggregate_checked_amounts", "aggregate_ranked_axis_values", "aggregate_by_month"], + supported_fact_families: ["value_flow"], + supported_action_families: ["turnover", "payout", "net_value_flow"], + planning_tags: ["aggregation", "ranking", "monthly_aggregation"], required_axes_any_of: [["aggregate_axis", "period"], ["aggregate_axis", "counterparty"], ["aggregate_axis", "account"]], optional_axes: ["organization", "contract", "document", "amount"], output_fact_kinds: ["aggregate_totals", "ranked_axis_values"], @@ -84,6 +104,9 @@ const PRIMITIVE_CONTRACTS = [ primitive_id: "drilldown_related_objects", purpose: "Drill from a known entity or document into related contracts, documents, movements, or payments.", decomposition_hints: ["drilldown_related_objects"], + supported_fact_families: [], + supported_action_families: [], + planning_tags: ["drilldown"], required_axes_any_of: [["business_entity"], ["document"], ["contract"], ["counterparty"]], optional_axes: ["period", "account", "amount"], output_fact_kinds: ["related_objects", "relationship_edges"], @@ -95,6 +118,29 @@ const PRIMITIVE_CONTRACTS = [ primitive_id: "probe_coverage", purpose: "Check whether the selected MCP/schema route can prove the requested fact or only support a bounded inference.", decomposition_hints: ["probe_coverage"], + supported_fact_families: [ + "schema_surface", + "entity_grounding", + "value_flow", + "document_evidence", + "movement_evidence", + "activity_lifecycle" + ], + supported_action_families: [ + "inspect_catalog", + "inspect_documents", + "inspect_registers", + "inspect_fields", + "inspect_surface", + "search_business_entity", + "turnover", + "payout", + "net_value_flow", + "list_documents", + "list_movements", + "activity_duration" + ], + planning_tags: ["coverage"], required_axes_any_of: [["coverage_target"], ["domain_family"], ["primitive_id"]], optional_axes: ["period", "organization", "counterparty", "document", "account"], output_fact_kinds: ["coverage_status", "known_gaps"], @@ -106,6 +152,9 @@ const PRIMITIVE_CONTRACTS = [ primitive_id: "explain_evidence_basis", purpose: "Produce a machine-readable explanation of which checked MCP evidence supports, limits, or fails the answer.", decomposition_hints: ["explain_evidence_basis"], + supported_fact_families: ["activity_lifecycle"], + supported_action_families: ["activity_duration"], + planning_tags: ["explanation"], required_axes_any_of: [["evidence_basis"], ["primitive_id"], ["source_rows_summary"]], optional_axes: ["coverage_target", "domain_family"], output_fact_kinds: ["confirmed_facts", "inferred_facts", "unknown_facts"], @@ -141,6 +190,146 @@ function missingAxisGroups(axisSet, groups) { function normalizeDecompositionStep(value) { return value.trim().toLowerCase(); } +function normalizePlanningToken(value) { + return value.trim().toLowerCase(); +} +function countAxisOverlap(axisSet, groups) { + let best = 0; + for (const group of groups) { + const overlap = group.filter((axis) => axisSet.has(axis)).length; + if (overlap > best) { + best = overlap; + } + } + return best; +} +function tagSetFromFactAxisInput(input) { + const tags = new Set(); + const requiredAxes = input.required_axes ?? []; + if (input.business_fact_family === "schema_surface") { + tags.add("metadata"); + tags.add("surface_inspection"); + } + if (input.business_fact_family === "entity_grounding" || + input.has_subject_candidates || + requiredAxes.some((axis) => ["business_entity", "counterparty", "contract", "item"].includes(axis))) { + tags.add("subject_resolution"); + } + if (input.business_fact_family === "document_evidence") { + tags.add("document"); + } + if (input.business_fact_family === "movement_evidence") { + tags.add("movement"); + } + if (input.business_fact_family === "value_flow") { + tags.add("movement"); + } + if (input.comparison_need) { + tags.add("comparison"); + } + if (input.ranking_need) { + tags.add("ranking"); + tags.add("aggregation"); + } + if ((input.aggregation_need ?? "") === "by_month" || requiredAxes.includes("calendar_month")) { + tags.add("monthly_aggregation"); + tags.add("aggregation"); + } + if (requiredAxes.includes("aggregate_axis")) { + tags.add("aggregation"); + } + if (requiredAxes.includes("coverage_target")) { + tags.add("coverage"); + } + if (input.business_fact_family === "activity_lifecycle" || requiredAxes.includes("evidence_basis")) { + tags.add("explanation"); + } + return tags; +} +function matchesPlanningToken(value, candidates) { + const normalizedValue = normalizePlanningToken(value ?? ""); + return normalizedValue.length > 0 && candidates.some((candidate) => normalizePlanningToken(candidate) === normalizedValue); +} +function normalizeMetadataSurfaceToken(value) { + return value.trim().toLowerCase().replace(/[\s_.-]+/g, ""); +} +function metadataSurfaceSuggestsDocument(value) { + const token = normalizeMetadataSurfaceToken(value); + return (token.includes("документ") || + token.includes("document") || + token.includes("invoice") || + token.includes("waybill") || + token.includes("накладн") || + token.includes("счетфактур") || + token.includes("счётфактур") || + token.includes("акт")); +} +function metadataSurfaceSuggestsMovement(value) { + const token = normalizeMetadataSurfaceToken(value); + return (token.includes("регистр") || + token.includes("register") || + token.includes("movement") || + token.includes("движени") || + token.includes("операц") || + token.includes("проводк") || + token.includes("bank")); +} +function metadataSurfaceSuggestsCatalog(value) { + const token = normalizeMetadataSurfaceToken(value); + return (token.includes("справочник") || + token.includes("catalog") || + token.includes("directory")); +} +function tagSetFromMetadataSurfaceInput(input) { + const tags = new Set(); + const routeFamily = normalizePlanningToken(input.downstream_route_family ?? ""); + const recommendedPrimitive = normalizePlanningToken(input.recommended_next_primitive ?? ""); + const surfaceValues = [ + input.selected_entity_set ?? "", + ...(input.selected_surface_objects ?? []) + ]; + if (routeFamily === "document_evidence" || recommendedPrimitive === "query_documents") { + tags.add("document"); + } + if (routeFamily === "movement_evidence" || recommendedPrimitive === "query_movements") { + tags.add("movement"); + } + if (routeFamily === "catalog_drilldown" || recommendedPrimitive === "drilldown_related_objects") { + tags.add("drilldown"); + } + for (const value of surfaceValues) { + if (metadataSurfaceSuggestsDocument(value)) { + tags.add("document"); + } + if (metadataSurfaceSuggestsMovement(value)) { + tags.add("movement"); + } + if (metadataSurfaceSuggestsCatalog(value)) { + tags.add("drilldown"); + } + } + const requiredAxisSet = toStringSet(input.required_axes ?? []); + if (requiredAxisSet.has("coverage_target")) { + tags.add("coverage"); + } + if (requiredAxisSet.has("counterparty") || + requiredAxisSet.has("business_entity") || + requiredAxisSet.has("contract")) { + tags.add("subject_resolution"); + } + return tags; +} +function factFamiliesFromMetadataSurfaceInput(input) { + const families = new Set(); + const routeFamily = normalizePlanningToken(input.downstream_route_family ?? ""); + if (routeFamily === "document_evidence") { + families.add("document_evidence"); + } + if (routeFamily === "movement_evidence") { + families.add("movement_evidence"); + } + return families; +} function searchAssistantMcpCatalogPrimitivesByDecompositionCandidates(input) { const allowAggregateByAxis = input.allow_aggregate_by_axis !== false; const result = []; @@ -165,6 +354,121 @@ function searchAssistantMcpCatalogPrimitivesByDecompositionCandidates(input) { } return result; } +function searchAssistantMcpCatalogPrimitivesByFactAxis(input) { + const allowAggregateByAxis = input.allow_aggregate_by_axis !== false; + const requiredAxisSet = toStringSet(input.required_axes ?? []); + const desiredTags = tagSetFromFactAxisInput(input); + const scored = []; + for (const contract of PRIMITIVE_CONTRACTS) { + if (contract.primitive_id === "aggregate_by_axis" && !allowAggregateByAxis) { + continue; + } + if (contract.primitive_id === "search_business_entity" && input.business_fact_family !== "entity_grounding") { + continue; + } + if (input.business_fact_family === "schema_surface" && contract.primitive_id !== "inspect_1c_metadata") { + continue; + } + const factMatch = matchesPlanningToken(input.business_fact_family, contract.supported_fact_families); + const actionMatch = matchesPlanningToken(input.action_family, contract.supported_action_families); + const tagMatches = contract.planning_tags.filter((tag) => desiredTags.has(normalizePlanningToken(tag))); + if (contract.primitive_id === "search_business_entity" && !desiredTags.has("subject_resolution")) { + continue; + } + if (contract.primitive_id === "resolve_entity_reference" && !desiredTags.has("subject_resolution")) { + continue; + } + if (contract.primitive_id === "aggregate_by_axis" && !desiredTags.has("aggregation")) { + continue; + } + if (contract.primitive_id === "explain_evidence_basis" && !desiredTags.has("explanation")) { + continue; + } + if (!factMatch && !actionMatch && tagMatches.length <= 0) { + continue; + } + const hasCompatibleAxisGroup = requiredAxisSet.size > 0 && hasAnyAxisGroup(requiredAxisSet, contract.required_axes_any_of); + const axisOverlap = requiredAxisSet.size > 0 ? countAxisOverlap(requiredAxisSet, contract.required_axes_any_of) : 0; + let score = 0; + if (factMatch) { + score += 5; + } + if (actionMatch) { + score += 3; + } + score += tagMatches.length * 2; + if (hasCompatibleAxisGroup) { + score += 2; + } + else if (axisOverlap > 0) { + score += 1; + } + if (score <= 0) { + continue; + } + scored.push({ + primitive: contract.primitive_id, + score + }); + } + return scored + .sort((left, right) => right.score - left.score) + .map((item) => item.primitive); +} +function searchAssistantMcpCatalogPrimitivesByMetadataSurface(input) { + const allowAggregateByAxis = input.allow_aggregate_by_axis !== false; + const requiredAxisSet = toStringSet(input.required_axes ?? []); + const desiredTags = tagSetFromMetadataSurfaceInput(input); + const desiredFactFamilies = factFamiliesFromMetadataSurfaceInput(input); + const recommendedPrimitive = normalizePlanningToken(input.recommended_next_primitive ?? ""); + const scored = []; + for (const contract of PRIMITIVE_CONTRACTS) { + if (contract.primitive_id === "aggregate_by_axis" && !allowAggregateByAxis) { + continue; + } + if (contract.primitive_id === "search_business_entity") { + continue; + } + const tagMatches = contract.planning_tags.filter((tag) => desiredTags.has(normalizePlanningToken(tag))); + const factMatch = contract.supported_fact_families.some((family) => desiredFactFamilies.has(normalizePlanningToken(family))); + const primitiveRecommended = normalizePlanningToken(contract.primitive_id) === recommendedPrimitive; + if (contract.primitive_id === "resolve_entity_reference" && !desiredTags.has("subject_resolution")) { + continue; + } + if (contract.primitive_id === "aggregate_by_axis" && !desiredTags.has("aggregation")) { + continue; + } + if (!primitiveRecommended && !factMatch && tagMatches.length <= 0) { + continue; + } + const hasCompatibleAxisGroup = requiredAxisSet.size > 0 && hasAnyAxisGroup(requiredAxisSet, contract.required_axes_any_of); + const axisOverlap = requiredAxisSet.size > 0 ? countAxisOverlap(requiredAxisSet, contract.required_axes_any_of) : 0; + let score = 0; + if (primitiveRecommended) { + score += 6; + } + if (factMatch) { + score += 5; + } + score += tagMatches.length * 2; + if (hasCompatibleAxisGroup) { + score += 2; + } + else if (axisOverlap > 0) { + score += 1; + } + if (score <= 0) { + continue; + } + scored.push({ + primitive: contract.primitive_id, + score + }); + } + return scored + .sort((left, right) => right.score - left.score) + .map((item) => item.primitive); +} function buildAssistantMcpCatalogIndex() { const reasonCodes = []; const missingContracts = assistantMcpDiscoveryPolicy_1.ASSISTANT_MCP_DISCOVERY_PRIMITIVES.filter((primitive) => !PRIMITIVE_CONTRACT_MAP.has(primitive)); diff --git a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryAnswerAdapter.js b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryAnswerAdapter.js index 639e0d7..1f983e2 100644 --- a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryAnswerAdapter.js +++ b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryAnswerAdapter.js @@ -89,7 +89,9 @@ function isEntityResolutionPilot(pilot) { } function isMetadataLaneChoiceClarification(pilot) { return (pilot.reason_codes.includes("planner_selected_metadata_lane_clarification_recipe") || - pilot.dry_run.reason_codes.includes("planner_selected_metadata_lane_clarification_recipe")); + pilot.reason_codes.includes("planner_selected_metadata_lane_clarification_from_data_need_graph") || + pilot.dry_run.reason_codes.includes("planner_selected_metadata_lane_clarification_recipe") || + pilot.dry_run.reason_codes.includes("planner_selected_metadata_lane_clarification_from_data_need_graph")); } function askedActionFamily(pilot) { const action = pilot.evidence.query_plan.turn_meaning_ref?.asked_action_family; diff --git a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryDataNeedGraph.js b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryDataNeedGraph.js index e4257c4..47419cf 100644 --- a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryDataNeedGraph.js +++ b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryDataNeedGraph.js @@ -67,10 +67,20 @@ function aggregationNeedFor(axis) { } return `by_${axis}`; } +function hasAllTimeScopeHint(rawUtterance) { + if (!rawUtterance) { + return false; + } + return /(?:\u0437\u0430\s+\u0432\u0441[\u0435\u0451]\s+\u0432\u0440\u0435\u043c\u044f|\u0437\u0430\s+\u0432\u0435\u0441\u044c\s+\u043f\u0435\u0440\u0438\u043e\u0434|\u0437\u0430\s+\u0432\u0441\u044e\s+\u0438\u0441\u0442\u043e\u0440\u0438(?:\u044e|\u0438)|\u0437\u0430\s+\u043b\u044e\u0431\u043e\u0439\s+\u043f\u0435\u0440\u0438\u043e\u0434|for\s+all\s+time|all\s+time|entire\s+period|full\s+history|any\s+period)/iu.test(rawUtterance); +} function timeScopeNeedFor(input) { if (input.explicitDateScope) { return "explicit_period"; } + if (input.allTimeScopeHint && + (input.family === "value_flow" || input.family === "movement_evidence" || input.family === "document_evidence")) { + return "all_time_scope"; + } if (input.family === "value_flow" || input.family === "movement_evidence" || input.family === "document_evidence") { return "period_required"; } @@ -254,6 +264,7 @@ function buildAssistantMcpDiscoveryDataNeedGraph(input) { const aggregationNeed = aggregationNeedFor(aggregationAxis); const comparisonNeed = comparisonNeedFor(action); const rankingNeed = rankingNeedFromRawUtterance(rawUtterance) ?? seededRankingNeed; + const allTimeScopeHint = hasAllTimeScopeHint(rawUtterance); const oneSidedOpenScopeTotalHint = hasOpenScopeOneSidedValueTotalHintUtf8Safe(rawUtterance, action); const openScopeWithoutSubject = subjectCandidates.length === 0 && allowsOpenScopeWithoutSubject({ @@ -279,7 +290,8 @@ function buildAssistantMcpDiscoveryDataNeedGraph(input) { } const timeScopeNeed = timeScopeNeedFor({ family: businessFactFamily, - explicitDateScope + explicitDateScope, + allTimeScopeHint }); if (timeScopeNeed === "period_required" && !explicitDateScope) { pushUnique(clarificationGaps, "period"); @@ -312,6 +324,9 @@ function buildAssistantMcpDiscoveryDataNeedGraph(input) { if (openScopeWithoutSubject && !rankingNeed && !comparisonNeed) { pushReason(reasonCodes, "data_need_graph_open_scope_total_without_subject"); } + if (allTimeScopeHint) { + pushReason(reasonCodes, "data_need_graph_all_time_scope_hint"); + } if (clarificationGaps.includes("organization")) { pushReason(reasonCodes, "data_need_graph_open_scope_total_needs_organization"); } diff --git a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPilotExecutor.js b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPilotExecutor.js index dbc44cf..e166ecc 100644 --- a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPilotExecutor.js +++ b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPilotExecutor.js @@ -824,6 +824,29 @@ function metadataRouteFamilyForEntitySet(entitySet) { } return null; } +function metadataRouteFamilyForEntitySetRelaxed(entitySet) { + const strict = metadataRouteFamilyForEntitySet(entitySet); + if (strict) { + return strict; + } + const raw = String(entitySet ?? "").trim(); + if (!raw) { + return null; + } + if (raw.includes("Документ") || raw.includes("Документ")) { + return "document_evidence"; + } + if (raw.includes("РегистрНакопления") || + raw.includes("РегистрСведений") || + raw.includes("РегистрНакопления") || + raw.includes("РегистрСведений")) { + return "movement_evidence"; + } + if (raw.includes("Справочник") || raw.includes("Справочник")) { + return "catalog_drilldown"; + } + return null; +} function metadataNextPrimitiveForRouteFamily(routeFamily) { if (routeFamily === "document_evidence") { return "query_documents"; @@ -871,6 +894,50 @@ function metadataObjectsForEntitySet(entitySet, matchedObjects) { } return matchedObjects.filter((item) => item.startsWith(`${entitySet}.`) || item.includes(entitySet)); } +function emptyMetadataSurfaceFamilyScores() { + return { + document_evidence: 0, + movement_evidence: 0, + catalog_drilldown: 0 + }; +} +function metadataSurfaceFamilyScores(matchedObjects) { + const scores = emptyMetadataSurfaceFamilyScores(); + for (const objectName of matchedObjects) { + const entitySet = inferMetadataEntitySetFromObjectName(objectName); + const routeFamily = entitySet ? metadataRouteFamilyForEntitySetRelaxed(entitySet) : null; + if (routeFamily) { + scores[routeFamily] += 1; + } + } + return scores; +} +function metadataObjectsForRouteFamily(routeFamily, matchedObjects) { + if (!routeFamily) { + return []; + } + return matchedObjects.filter((objectName) => { + const entitySet = inferMetadataEntitySetFromObjectName(objectName); + return entitySet ? metadataRouteFamilyForEntitySetRelaxed(entitySet) === routeFamily : false; + }); +} +function selectMetadataRouteFamilyFromSurfaceScores(scores) { + const ranked = Object.entries(scores) + .filter(([, score]) => score > 0) + .sort((left, right) => right[1] - left[1]); + const top = ranked[0]; + const second = ranked[1]; + if (!top) { + return null; + } + if (!second) { + return top[0]; + } + const absoluteMargin = top[1] - second[1]; + const relativeRatio = second[1] > 0 ? top[1] / second[1] : Number.POSITIVE_INFINITY; + const clearlyDominant = absoluteMargin >= 2 || relativeRatio >= 1.5; + return clearlyDominant ? top[0] : null; +} function deriveMetadataSurface(result, metadataScope, requestedMetaTypes) { if (!result || result.error || result.rows.length <= 0) { return null; @@ -888,13 +955,28 @@ function deriveMetadataSurface(result, metadataScope, requestedMetaTypes) { } } const grounding = selectMetadataEntityGrounding(availableEntitySets, requestedMetaTypes); - const downstreamRouteFamily = grounding.selectedEntitySet - ? metadataRouteFamilyForEntitySet(grounding.selectedEntitySet) + const surfaceFamilyScores = metadataSurfaceFamilyScores(matchedObjects); + const selectedEntitySetRouteFamily = grounding.selectedEntitySet + ? metadataRouteFamilyForEntitySetRelaxed(grounding.selectedEntitySet) : null; + const scoredRouteFamily = selectedEntitySetRouteFamily === null ? selectMetadataRouteFamilyFromSurfaceScores(surfaceFamilyScores) : null; + const downstreamRouteFamily = selectedEntitySetRouteFamily ?? scoredRouteFamily; + const routeFamilySelectionBasis = selectedEntitySetRouteFamily + ? "selected_entity_set" + : scoredRouteFamily + ? "dominant_surface_objects" + : null; + const selectedSurfaceObjects = grounding.selectedEntitySet !== null + ? metadataObjectsForEntitySet(grounding.selectedEntitySet, matchedObjects) + : metadataObjectsForRouteFamily(downstreamRouteFamily, matchedObjects); const knownLimitations = []; - if (grounding.ambiguityDetected && grounding.ambiguityEntitySets.length > 0) { + const ambiguityRemainsUnresolved = grounding.ambiguityDetected && !downstreamRouteFamily; + if (ambiguityRemainsUnresolved && grounding.ambiguityEntitySets.length > 0) { knownLimitations.push(`Exact downstream metadata surface remains ambiguous across: ${grounding.ambiguityEntitySets.join(", ")}`); } + if (grounding.ambiguityDetected && downstreamRouteFamily && routeFamilySelectionBasis === "dominant_surface_objects") { + knownLimitations.push(`Metadata surface spans multiple object sets, but dominant confirmed objects point to ${downstreamRouteFamily}`); + } return { metadata_scope: metadataScope, requested_meta_types: requestedMetaTypes, @@ -902,11 +984,13 @@ function deriveMetadataSurface(result, metadataScope, requestedMetaTypes) { available_entity_sets: availableEntitySets, matched_objects: matchedObjects, selected_entity_set: grounding.selectedEntitySet, - selected_surface_objects: metadataObjectsForEntitySet(grounding.selectedEntitySet, matchedObjects), + selected_surface_objects: selectedSurfaceObjects, + surface_family_scores: surfaceFamilyScores, downstream_route_family: downstreamRouteFamily, + route_family_selection_basis: routeFamilySelectionBasis, recommended_next_primitive: metadataNextPrimitiveForRouteFamily(downstreamRouteFamily), - ambiguity_detected: grounding.ambiguityDetected, - ambiguity_entity_sets: grounding.ambiguityEntitySets, + ambiguity_detected: ambiguityRemainsUnresolved, + ambiguity_entity_sets: ambiguityRemainsUnresolved ? grounding.ambiguityEntitySets : [], available_fields: metadataAvailableFields(result.rows), known_limitations: knownLimitations, inference_basis: "confirmed_1c_metadata_surface_rows" @@ -928,13 +1012,18 @@ function buildMetadataConfirmedFacts(surface) { if (surface.selected_surface_objects.length > 0) { facts.push(`Selected metadata objects: ${surface.selected_surface_objects.slice(0, 8).join(", ")}`); } + if (surface.surface_family_scores.document_evidence > 0 || + surface.surface_family_scores.movement_evidence > 0 || + surface.surface_family_scores.catalog_drilldown > 0) { + facts.push(`Metadata surface family scores: document=${surface.surface_family_scores.document_evidence}, movement=${surface.surface_family_scores.movement_evidence}, catalog=${surface.surface_family_scores.catalog_drilldown}`); + } if (surface.available_fields.length > 0) { facts.push(`Available metadata fields/sections: ${surface.available_fields.slice(0, 12).join(", ")}`); } return facts; } function buildMetadataInferredFacts(surface) { - if (!surface || !surface.selected_entity_set || !surface.downstream_route_family || !surface.recommended_next_primitive) { + if (!surface || !surface.downstream_route_family || !surface.recommended_next_primitive) { return []; } return [ @@ -1737,6 +1826,9 @@ async function executeAssistantMcpDiscoveryPilot(planner, deps = DEFAULT_DEPS) { const derivedMetadataSurface = deriveMetadataSurface(metadataResult, metadataScope, requestedMetaTypes); if (derivedMetadataSurface) { pushReason(reasonCodes, "pilot_derived_metadata_surface_from_confirmed_rows"); + if (derivedMetadataSurface.route_family_selection_basis === "dominant_surface_objects") { + pushReason(reasonCodes, "pilot_selected_metadata_route_family_from_dominant_surface_objects"); + } } const evidence = (0, assistantMcpDiscoveryPolicy_1.resolveAssistantMcpDiscoveryEvidence)({ plan: planner.discovery_plan, diff --git a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPlanner.js b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPlanner.js index bff526a..113e372 100644 --- a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPlanner.js +++ b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPlanner.js @@ -64,35 +64,148 @@ function includesAny(text, tokens) { function isYearDateScope(meaning) { return /^\d{4}$/.test(toNonEmptyString(meaning?.explicit_date_scope) ?? ""); } -function primitivesFromGraphDecomposition(input) { - const decompositionCandidates = input.dataNeedGraph?.decomposition_candidates ?? []; - if (decompositionCandidates.length <= 0) { - return { primitives: input.fallbackPrimitives, reasonCodes: [] }; +function mergeCatalogPrimitivesWithFallback(catalogPrimitives, fallbackPrimitives) { + const result = []; + for (const primitive of fallbackPrimitives) { + if (catalogPrimitives.includes(primitive) && !result.includes(primitive)) { + result.push(primitive); + } } - const searchedPrimitives = (0, assistantMcpCatalogIndex_1.searchAssistantMcpCatalogPrimitivesByDecompositionCandidates)({ - decomposition_candidates: decompositionCandidates, - allow_aggregate_by_axis: input.allowAggregateByAxis + for (const primitive of catalogPrimitives) { + if (!result.includes(primitive)) { + result.push(primitive); + } + } + for (const primitive of fallbackPrimitives) { + if (!result.includes(primitive)) { + result.push(primitive); + } + } + return result; +} +function preferredPrimitiveFromMetadataSurface(surface) { + const recommendedPrimitive = surface?.recommended_next_primitive ?? null; + if (recommendedPrimitive) { + return recommendedPrimitive; + } + if (surface?.ambiguity_detected) { + return null; + } + if (surface?.downstream_route_family === "document_evidence") { + return "query_documents"; + } + if (surface?.downstream_route_family === "movement_evidence") { + return "query_movements"; + } + if (surface?.downstream_route_family === "catalog_drilldown") { + return "drilldown_related_objects"; + } + return null; +} +function filterCatalogPrimitivesByMetadataSurface(input) { + const preferredPrimitive = preferredPrimitiveFromMetadataSurface(input.metadataSurface); + const reasonCodes = []; + if (!preferredPrimitive || + input.metadataSurface?.ambiguity_detected || + !input.fallbackPrimitives.includes(preferredPrimitive)) { + return { + primitives: input.catalogPrimitives, + reasonCodes + }; + } + const laneSensitivePrimitives = new Set([ + "query_documents", + "query_movements", + "drilldown_related_objects" + ]); + const filteredPrimitives = input.catalogPrimitives.filter((primitive) => !laneSensitivePrimitives.has(primitive) || primitive === preferredPrimitive); + if (filteredPrimitives.length !== input.catalogPrimitives.length) { + reasonCodes.push("planner_filtered_catalog_primitives_by_confirmed_metadata_surface"); + } + if ((filteredPrimitives.includes(preferredPrimitive) || input.fallbackPrimitives.includes(preferredPrimitive)) && + input.metadataSurface?.selected_surface_objects.length) { + reasonCodes.push("planner_surface_aware_next_lane_from_confirmed_metadata_objects"); + } + return { + primitives: filteredPrimitives, + reasonCodes + }; +} +function selectPrimitivesFromGraphAndCatalog(input) { + const reasonCodes = []; + const decompositionCandidates = input.dataNeedGraph?.decomposition_candidates ?? []; + const decompositionPrimitives = decompositionCandidates.length > 0 + ? (0, assistantMcpCatalogIndex_1.searchAssistantMcpCatalogPrimitivesByDecompositionCandidates)({ + decomposition_candidates: decompositionCandidates, + allow_aggregate_by_axis: input.allowAggregateByAxis + }) + : []; + if (decompositionPrimitives.length > 0) { + reasonCodes.push("planner_selected_catalog_primitives_from_decomposition_candidates"); + } + const metadataSurfacePrimitives = input.metadataSurface + ? (0, assistantMcpCatalogIndex_1.searchAssistantMcpCatalogPrimitivesByMetadataSurface)({ + downstream_route_family: input.metadataSurface.downstream_route_family, + selected_entity_set: input.metadataSurface.selected_entity_set, + selected_surface_objects: input.metadataSurface.selected_surface_objects, + recommended_next_primitive: input.metadataSurface.recommended_next_primitive, + required_axes: input.requiredAxes, + allow_aggregate_by_axis: input.allowAggregateByAxis + }) + : []; + if (metadataSurfacePrimitives.length > 0) { + reasonCodes.push("planner_selected_catalog_primitives_from_metadata_surface_search"); + } + const factAxisPrimitives = input.dataNeedGraph + ? (0, assistantMcpCatalogIndex_1.searchAssistantMcpCatalogPrimitivesByFactAxis)({ + business_fact_family: input.dataNeedGraph.business_fact_family, + action_family: input.actionFamily ?? input.dataNeedGraph.action_family, + required_axes: input.requiredAxes, + comparison_need: input.dataNeedGraph.comparison_need, + ranking_need: input.dataNeedGraph.ranking_need, + aggregation_need: input.dataNeedGraph.aggregation_need, + has_subject_candidates: hasSubjectCandidates(input.dataNeedGraph), + allow_aggregate_by_axis: input.allowAggregateByAxis + }) + : []; + if (factAxisPrimitives.length > 0) { + reasonCodes.push("planner_selected_catalog_primitives_from_fact_axis_search"); + } + const combinedCatalogPrimitives = []; + for (const primitive of decompositionPrimitives) { + if (!combinedCatalogPrimitives.includes(primitive)) { + combinedCatalogPrimitives.push(primitive); + } + } + for (const primitive of metadataSurfacePrimitives) { + if (!combinedCatalogPrimitives.includes(primitive)) { + combinedCatalogPrimitives.push(primitive); + } + } + for (const primitive of factAxisPrimitives) { + if (!combinedCatalogPrimitives.includes(primitive)) { + combinedCatalogPrimitives.push(primitive); + } + } + const filteredCatalogPrimitives = filterCatalogPrimitivesByMetadataSurface({ + catalogPrimitives: combinedCatalogPrimitives, + fallbackPrimitives: input.fallbackPrimitives, + metadataSurface: input.metadataSurface }); - if (searchedPrimitives.length <= 0) { + reasonCodes.push(...filteredCatalogPrimitives.reasonCodes); + if (filteredCatalogPrimitives.primitives.length <= 0) { return { primitives: input.fallbackPrimitives, reasonCodes: ["planner_fell_back_to_recipe_primitives_after_empty_catalog_search"] }; } - const mergedPrimitives = [...searchedPrimitives]; - for (const primitive of input.fallbackPrimitives) { - if (!mergedPrimitives.includes(primitive)) { - mergedPrimitives.push(primitive); - } + const mergedPrimitives = mergeCatalogPrimitivesWithFallback(filteredCatalogPrimitives.primitives, input.fallbackPrimitives); + if (input.fallbackPrimitives.some((primitive) => !filteredCatalogPrimitives.primitives.includes(primitive))) { + reasonCodes.push("planner_completed_catalog_searched_chain_with_recipe_primitives"); } return { primitives: mergedPrimitives, - reasonCodes: mergedPrimitives.length === searchedPrimitives.length - ? ["planner_selected_catalog_primitives_from_decomposition_candidates"] - : [ - "planner_selected_catalog_primitives_from_decomposition_candidates", - "planner_completed_catalog_searched_chain_with_recipe_primitives" - ] + reasonCodes }; } function budgetOverrideFor(input, recipe) { @@ -110,6 +223,36 @@ function budgetOverrideFor(input, recipe) { } return {}; } +function routeFamilyFromThinMetadataSurfaceInput(input) { + const surface = input.metadataSurface ?? null; + if (!surface || surface.ambiguity_detected || !surface.downstream_route_family || !surface.recommended_next_primitive) { + return null; + } + const meaning = input.turnMeaning ?? null; + const dataNeedGraph = input.dataNeedGraph ?? null; + const graphFactFamily = lower(dataNeedGraph?.business_fact_family); + const domain = lower(meaning?.asked_domain_family); + const action = lower(meaning?.asked_action_family); + const unsupported = lower(meaning?.unsupported_but_understood_family); + const semanticNeed = lower(input.semanticDataNeed); + const combined = `${domain} ${action} ${unsupported} ${semanticNeed}`.trim(); + const explicitlyOtherFamily = includesAny(combined, ["value_flow", "turnover", "revenue", "payment", "payout", "net", "lifecycle", "activity", "duration", "metadata lane clarification"]); + if (explicitlyOtherFamily) { + return null; + } + if (graphFactFamily === "document_evidence" || includesAny(combined, ["document", "documents", "list_documents"])) { + return surface.downstream_route_family === "document_evidence" ? "document_evidence" : null; + } + if (graphFactFamily === "movement_evidence" || includesAny(combined, ["movement", "movements", "list_movements", "bank_operations"])) { + return surface.downstream_route_family === "movement_evidence" ? "movement_evidence" : null; + } + if (!graphFactFamily && !domain && !action) { + if (surface.downstream_route_family === "document_evidence" || surface.downstream_route_family === "movement_evidence") { + return surface.downstream_route_family; + } + } + return null; +} function recipeFor(input) { const meaning = input.turnMeaning ?? null; const dataNeedGraph = input.dataNeedGraph ?? null; @@ -139,18 +282,60 @@ function recipeFor(input) { reason: "planner_selected_metadata_lane_clarification_from_data_need_graph" }; } + const thinSurfaceRouteFamily = routeFamilyFromThinMetadataSurfaceInput(input); + if (thinSurfaceRouteFamily === "document_evidence") { + pushUnique(axes, "coverage_target"); + const primitiveSelection = selectPrimitivesFromGraphAndCatalog({ + dataNeedGraph, + fallbackPrimitives: ["resolve_entity_reference", "query_documents", "probe_coverage"], + requiredAxes: axes, + metadataSurface: input.metadataSurface, + actionFamily: action + }); + return { + semanticDataNeed: "document evidence", + chainId: "document_evidence", + chainSummary: "Ground the next checked document lane from the confirmed metadata surface, then fetch scoped document rows and probe coverage before answering.", + primitives: primitiveSelection.primitives, + axes, + reason: "planner_selected_document_from_confirmed_metadata_surface_ref", + extraReasons: primitiveSelection.reasonCodes + }; + } + if (thinSurfaceRouteFamily === "movement_evidence") { + pushUnique(axes, "coverage_target"); + const primitiveSelection = selectPrimitivesFromGraphAndCatalog({ + dataNeedGraph, + fallbackPrimitives: ["resolve_entity_reference", "query_movements", "probe_coverage"], + requiredAxes: axes, + metadataSurface: input.metadataSurface, + actionFamily: action + }); + return { + semanticDataNeed: "movement evidence", + chainId: "movement_evidence", + chainSummary: "Ground the next checked movement lane from the confirmed metadata surface, then fetch scoped movement rows and probe coverage before answering.", + primitives: primitiveSelection.primitives, + axes, + reason: "planner_selected_movement_from_confirmed_metadata_surface_ref", + extraReasons: primitiveSelection.reasonCodes + }; + } if (graphFactFamily === "value_flow") { if (dataNeedGraph?.comparison_need === "incoming_vs_outgoing" && !hasSubjectCandidates(dataNeedGraph)) { - const primitiveSelection = primitivesFromGraphDecomposition({ - dataNeedGraph, - fallbackPrimitives: ["query_movements", "probe_coverage"], - allowAggregateByAxis: false - }); pushUnique(axes, "amount"); pushUnique(axes, "coverage_target"); if (requestedAggregationAxis === "month" || graphAggregation === "by_month") { pushUnique(axes, "calendar_month"); } + const primitiveSelection = selectPrimitivesFromGraphAndCatalog({ + dataNeedGraph, + fallbackPrimitives: ["query_movements", "probe_coverage"], + requiredAxes: axes, + metadataSurface: input.metadataSurface, + actionFamily: action, + allowAggregateByAxis: false + }); return { semanticDataNeed: "bidirectional value-flow comparison evidence", chainId: "value_flow_comparison", @@ -162,14 +347,17 @@ function recipeFor(input) { }; } if (dataNeedGraph?.ranking_need && !hasSubjectCandidates(dataNeedGraph)) { - const primitiveSelection = primitivesFromGraphDecomposition({ - dataNeedGraph, - fallbackPrimitives: ["query_movements", "aggregate_by_axis", "probe_coverage"], - allowAggregateByAxis: true - }); pushUnique(axes, "aggregate_axis"); pushUnique(axes, "amount"); pushUnique(axes, "coverage_target"); + const primitiveSelection = selectPrimitivesFromGraphAndCatalog({ + dataNeedGraph, + fallbackPrimitives: ["query_movements", "aggregate_by_axis", "probe_coverage"], + requiredAxes: axes, + metadataSurface: input.metadataSurface, + actionFamily: action, + allowAggregateByAxis: true + }); return { semanticDataNeed: "ranked value-flow evidence", chainId: "value_flow_ranking", @@ -183,11 +371,6 @@ function recipeFor(input) { }; } if (openScopeTotalWithoutSubject) { - const primitiveSelection = primitivesFromGraphDecomposition({ - dataNeedGraph, - fallbackPrimitives: ["query_movements", "aggregate_by_axis", "probe_coverage"], - allowAggregateByAxis: true - }); pushUnique(axes, "organization"); pushUnique(axes, "aggregate_axis"); pushUnique(axes, "amount"); @@ -195,6 +378,13 @@ function recipeFor(input) { if (requestedAggregationAxis === "month" || graphAggregation === "by_month") { pushUnique(axes, "calendar_month"); } + const primitiveSelection = selectPrimitivesFromGraphAndCatalog({ + dataNeedGraph, + fallbackPrimitives: ["query_movements", "aggregate_by_axis", "probe_coverage"], + requiredAxes: axes, + actionFamily: action, + allowAggregateByAxis: true + }); return { semanticDataNeed: "organization-scoped value-flow evidence", chainId: "value_flow", @@ -207,17 +397,20 @@ function recipeFor(input) { extraReasons: primitiveSelection.reasonCodes }; } - const primitiveSelection = primitivesFromGraphDecomposition({ - dataNeedGraph, - fallbackPrimitives: ["resolve_entity_reference", "query_movements", "aggregate_by_axis", "probe_coverage"], - allowAggregateByAxis: true - }); pushUnique(axes, "aggregate_axis"); pushUnique(axes, "amount"); pushUnique(axes, "coverage_target"); if (requestedAggregationAxis === "month" || graphAggregation === "by_month") { pushUnique(axes, "calendar_month"); } + const primitiveSelection = selectPrimitivesFromGraphAndCatalog({ + dataNeedGraph, + fallbackPrimitives: ["resolve_entity_reference", "query_movements", "aggregate_by_axis", "probe_coverage"], + requiredAxes: axes, + metadataSurface: input.metadataSurface, + actionFamily: action, + allowAggregateByAxis: true + }); return { semanticDataNeed: "counterparty value-flow evidence", chainId: "value_flow", @@ -231,13 +424,16 @@ function recipeFor(input) { }; } if (graphFactFamily === "activity_lifecycle") { - const primitiveSelection = primitivesFromGraphDecomposition({ - dataNeedGraph, - fallbackPrimitives: ["resolve_entity_reference", "query_documents", "probe_coverage", "explain_evidence_basis"] - }); pushUnique(axes, "document_date"); pushUnique(axes, "coverage_target"); pushUnique(axes, "evidence_basis"); + const primitiveSelection = selectPrimitivesFromGraphAndCatalog({ + dataNeedGraph, + fallbackPrimitives: ["resolve_entity_reference", "query_documents", "probe_coverage", "explain_evidence_basis"], + requiredAxes: axes, + metadataSurface: input.metadataSurface, + actionFamily: action + }); return { semanticDataNeed: "counterparty lifecycle evidence", chainId: "lifecycle", @@ -249,11 +445,14 @@ function recipeFor(input) { }; } if (graphFactFamily === "schema_surface") { - const primitiveSelection = primitivesFromGraphDecomposition({ - dataNeedGraph, - fallbackPrimitives: ["inspect_1c_metadata"] - }); pushUnique(axes, "metadata_scope"); + const primitiveSelection = selectPrimitivesFromGraphAndCatalog({ + dataNeedGraph, + fallbackPrimitives: ["inspect_1c_metadata"], + requiredAxes: axes, + metadataSurface: input.metadataSurface, + actionFamily: action + }); return { semanticDataNeed: "1C metadata evidence", chainId: "metadata_inspection", @@ -265,11 +464,14 @@ function recipeFor(input) { }; } if (graphFactFamily === "movement_evidence") { - const primitiveSelection = primitivesFromGraphDecomposition({ - dataNeedGraph, - fallbackPrimitives: ["resolve_entity_reference", "query_movements", "probe_coverage"] - }); pushUnique(axes, "coverage_target"); + const primitiveSelection = selectPrimitivesFromGraphAndCatalog({ + dataNeedGraph, + fallbackPrimitives: ["resolve_entity_reference", "query_movements", "probe_coverage"], + requiredAxes: axes, + metadataSurface: input.metadataSurface, + actionFamily: action + }); return { semanticDataNeed: "movement evidence", chainId: "movement_evidence", @@ -281,11 +483,14 @@ function recipeFor(input) { }; } if (graphFactFamily === "document_evidence") { - const primitiveSelection = primitivesFromGraphDecomposition({ - dataNeedGraph, - fallbackPrimitives: ["resolve_entity_reference", "query_documents", "probe_coverage"] - }); pushUnique(axes, "coverage_target"); + const primitiveSelection = selectPrimitivesFromGraphAndCatalog({ + dataNeedGraph, + fallbackPrimitives: ["resolve_entity_reference", "query_documents", "probe_coverage"], + requiredAxes: axes, + metadataSurface: input.metadataSurface, + actionFamily: action + }); return { semanticDataNeed: "document evidence", chainId: "document_evidence", @@ -297,12 +502,15 @@ function recipeFor(input) { }; } if (graphFactFamily === "entity_grounding" || (!graphFactFamily && (dataNeedGraph?.subject_candidates.length ?? 0) > 0)) { - const primitiveSelection = primitivesFromGraphDecomposition({ - dataNeedGraph, - fallbackPrimitives: ["search_business_entity", "resolve_entity_reference", "probe_coverage"] - }); pushUnique(axes, "business_entity"); pushUnique(axes, "coverage_target"); + const primitiveSelection = selectPrimitivesFromGraphAndCatalog({ + dataNeedGraph, + fallbackPrimitives: ["search_business_entity", "resolve_entity_reference", "probe_coverage"], + requiredAxes: axes, + metadataSurface: input.metadataSurface, + actionFamily: action + }); return { semanticDataNeed: "entity discovery evidence", chainId: "entity_resolution", @@ -425,6 +633,7 @@ function planAssistantMcpDiscovery(input) { const budgetOverride = budgetOverrideFor(input, recipe); const semanticDataNeed = toNonEmptyString(input.semanticDataNeed) ?? recipe.semanticDataNeed; const dataNeedGraph = input.dataNeedGraph ?? null; + const metadataSurface = input.metadataSurface ?? null; const reasonCodes = []; pushReason(reasonCodes, recipe.reason); for (const reason of recipe.extraReasons ?? []) { @@ -433,6 +642,9 @@ function planAssistantMcpDiscovery(input) { if (dataNeedGraph) { pushReason(reasonCodes, "planner_consumed_data_need_graph_v1"); } + if (metadataSurface) { + pushReason(reasonCodes, "planner_consumed_metadata_surface_ref_v1"); + } if (budgetOverride.maxProbeCount) { pushReason(reasonCodes, "planner_enabled_chunked_coverage_probe_budget"); } @@ -480,6 +692,7 @@ function planAssistantMcpDiscovery(input) { planner_status: plannerStatus, semantic_data_need: semanticDataNeed, data_need_graph: dataNeedGraph, + metadata_surface_ref: metadataSurface, selected_chain_id: recipe.chainId, selected_chain_summary: recipe.chainSummary, proposed_primitives: recipe.primitives, diff --git a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryRuntimeBridge.js b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryRuntimeBridge.js index 5a4bcec..164f41e 100644 --- a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryRuntimeBridge.js +++ b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryRuntimeBridge.js @@ -52,6 +52,7 @@ async function runAssistantMcpDiscoveryRuntimeBridge(input) { const planner = (0, assistantMcpDiscoveryPlanner_1.planAssistantMcpDiscovery)({ semanticDataNeed: input.semanticDataNeed, dataNeedGraph: input.dataNeedGraph, + metadataSurface: input.metadataSurface, turnMeaning: input.turnMeaning }); const pilot = await (0, assistantMcpDiscoveryPilotExecutor_1.executeAssistantMcpDiscoveryPilot)(planner, input.deps); diff --git a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryRuntimeEntryPoint.js b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryRuntimeEntryPoint.js index ec1f90e..f04d016 100644 --- a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryRuntimeEntryPoint.js +++ b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryRuntimeEntryPoint.js @@ -63,6 +63,7 @@ async function runAssistantMcpDiscoveryRuntimeEntryPoint(input) { const bridge = await (0, assistantMcpDiscoveryRuntimeBridge_1.runAssistantMcpDiscoveryRuntimeBridge)({ semanticDataNeed: turnInput.semantic_data_need, dataNeedGraph: turnInput.data_need_graph, + metadataSurface: turnInput.metadata_surface_ref, turnMeaning: turnInput.turn_meaning_ref, deps: input.deps }); diff --git a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryTurnInputAdapter.js b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryTurnInputAdapter.js index f37eb9f..d33f624 100644 --- a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryTurnInputAdapter.js +++ b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryTurnInputAdapter.js @@ -145,6 +145,24 @@ function collectDateScopeFromFilters(filters) { } return periodFrom ?? periodTo ?? null; } +function normalizeMetadataRouteFamily(value) { + const text = toNonEmptyString(value); + if (text === "document_evidence" || text === "movement_evidence" || text === "catalog_drilldown") { + return text; + } + return null; +} +function normalizeMetadataRecommendedPrimitive(value) { + const text = toNonEmptyString(value); + if (text === "query_documents" || text === "query_movements" || text === "drilldown_related_objects") { + return text; + } + return null; +} +function normalizeMetadataRouteFamilySelectionBasis(value) { + const text = toNonEmptyString(value); + return text === "selected_entity_set" || text === "dominant_surface_objects" ? text : null; +} function mapPilotScopeToFollowupMeaning(pilotScope) { if (pilotScope === "counterparty_lifecycle_query_documents_v1") { return { @@ -273,12 +291,39 @@ function collectFollowupDiscoverySeed(followupContext) { rankingNeed: toNonEmptyString(followupContext?.previous_discovery_ranking_need), organization, dateScope, - metadataRouteFamily: toNonEmptyString(followupContext?.previous_discovery_metadata_route_family), + metadataRouteFamily: normalizeMetadataRouteFamily(followupContext?.previous_discovery_metadata_route_family), + metadataRouteFamilySelectionBasis: normalizeMetadataRouteFamilySelectionBasis(followupContext?.previous_discovery_metadata_route_family_selection_basis), metadataSelectedEntitySet: toNonEmptyString(followupContext?.previous_discovery_metadata_selected_entity_set), + metadataSelectedSurfaceObjects: collectEntityCandidates(followupContext?.previous_discovery_metadata_selected_surface_objects), + metadataRecommendedNextPrimitive: normalizeMetadataRecommendedPrimitive(followupContext?.previous_discovery_metadata_recommended_next_primitive), metadataAmbiguityDetected: followupContext?.previous_discovery_metadata_ambiguity_detected === true, metadataAmbiguityEntitySets: collectEntityCandidates(followupContext?.previous_discovery_metadata_ambiguity_entity_sets) }; } +function buildMetadataSurfaceRef(followupSeed) { + if (followupSeed.pilotScope !== "metadata_inspection_v1") { + return null; + } + const hasPayload = Boolean(followupSeed.metadataRouteFamily || + followupSeed.metadataSelectedEntitySet || + followupSeed.metadataRecommendedNextPrimitive || + followupSeed.metadataRouteFamilySelectionBasis) || + followupSeed.metadataSelectedSurfaceObjects.length > 0 || + followupSeed.metadataAmbiguityDetected || + followupSeed.metadataAmbiguityEntitySets.length > 0; + if (!hasPayload) { + return null; + } + return { + selected_entity_set: followupSeed.metadataSelectedEntitySet, + selected_surface_objects: followupSeed.metadataSelectedSurfaceObjects, + downstream_route_family: followupSeed.metadataRouteFamily, + route_family_selection_basis: followupSeed.metadataRouteFamilySelectionBasis, + recommended_next_primitive: followupSeed.metadataRecommendedNextPrimitive, + ambiguity_detected: followupSeed.metadataAmbiguityDetected, + ambiguity_entity_sets: followupSeed.metadataAmbiguityEntitySets + }; +} function metadataEntitySetsSuggestDocumentLane(values) { return values.some((value) => /(?:документ|document|invoice|waybill|накладн|счет[- ]?фактур|акт)/iu.test(value)); } @@ -295,7 +340,7 @@ function hasLifecycleSignal(text) { return /(?:сколько\s+лет|как\s+давно|давно\s+ли|возраст|перв(?:ая|ый)\s+актив|когда\s+начал|когда\s+появ|lifecycle|activity\s+duration|business\s+age|how\s+long)/iu.test(text); } function hasValueFlowSignal(text) { - return /(?:оборот|выручк|оплат|плат[её]ж|заплат|перечисл|списан|расход|исходящ|входящ|получ(?:ил|ено|ен)|поступил|поступлен|денежн[а-яёa-z0-9_-]*\s+поток|supplier|value[-\s]?flow|turnover|revenue|payment|payout|outflow|cash\s+flow)/iu.test(text); + return /(?:оборот|выручк|оплат|плат[её]ж|заплат|перечисл|списан|расход|исходящ|входящ|получ(?:ил|ено|ен)|поступил|поступлен|денежн[а-яёa-z0-9_-]*\s+поток|(? 0) { pushReason(reasonCodes, "mcp_discovery_entity_scope_available"); } @@ -1168,6 +1234,7 @@ function buildAssistantMcpDiscoveryTurnInput(input) { should_run_discovery: runDiscovery, semantic_data_need: runDiscovery ? semanticDataNeed : null, data_need_graph: dataNeedGraph, + metadata_surface_ref: runDiscovery ? metadataSurfaceRef : null, turn_meaning_ref: runDiscovery && hasTurnMeaning ? cleanTurnMeaning : null, source_signal: sourceSignal, reason_codes: reasonCodes diff --git a/llm_normalizer/backend/dist/services/assistantTransitionPolicy.js b/llm_normalizer/backend/dist/services/assistantTransitionPolicy.js index 631cae6..bdebb69 100644 --- a/llm_normalizer/backend/dist/services/assistantTransitionPolicy.js +++ b/llm_normalizer/backend/dist/services/assistantTransitionPolicy.js @@ -500,7 +500,10 @@ function createAssistantTransitionPolicy(deps) { const sourceIntent = (0, assistantContinuityPolicy_1.readAddressDebugIntent)(carryoverSourceDebug, deps.toNonEmptyString); const sourceDiscoveryPilotScope = sourceDiscoveryPilotScopeHint; const sourceDiscoveryMetadataRouteFamily = (0, assistantContinuityPolicy_1.readAssistantMcpDiscoveryMetadataRouteFamily)(carryoverSourceDebug, deps.toNonEmptyString); + const sourceDiscoveryMetadataRouteFamilySelectionBasis = (0, assistantContinuityPolicy_1.readAssistantMcpDiscoveryMetadataRouteFamilySelectionBasis)(carryoverSourceDebug, deps.toNonEmptyString); const sourceDiscoveryMetadataSelectedEntitySet = (0, assistantContinuityPolicy_1.readAssistantMcpDiscoveryMetadataSelectedEntitySet)(carryoverSourceDebug, deps.toNonEmptyString); + const sourceDiscoveryMetadataSelectedSurfaceObjects = (0, assistantContinuityPolicy_1.readAssistantMcpDiscoveryMetadataSelectedSurfaceObjects)(carryoverSourceDebug, deps.toNonEmptyString); + const sourceDiscoveryMetadataRecommendedNextPrimitive = (0, assistantContinuityPolicy_1.readAssistantMcpDiscoveryMetadataRecommendedNextPrimitive)(carryoverSourceDebug, deps.toNonEmptyString); const sourceDiscoveryMetadataAmbiguityDetected = (0, assistantContinuityPolicy_1.readAssistantMcpDiscoveryMetadataAmbiguityDetected)(carryoverSourceDebug); const sourceDiscoveryMetadataAmbiguityEntitySets = (0, assistantContinuityPolicy_1.readAssistantMcpDiscoveryMetadataAmbiguityEntitySets)(carryoverSourceDebug, deps.toNonEmptyString); const sourceDiscoveryEntityResolutionStatus = (0, assistantContinuityPolicy_1.readAssistantMcpDiscoveryEntityResolutionStatus)(carryoverSourceDebug, deps.toNonEmptyString); @@ -748,7 +751,12 @@ function createAssistantTransitionPolicy(deps) { ? sourceDiscoveryEntityAmbiguityCandidates : undefined, previous_discovery_metadata_route_family: sourceDiscoveryMetadataRouteFamily ?? undefined, + previous_discovery_metadata_route_family_selection_basis: sourceDiscoveryMetadataRouteFamilySelectionBasis ?? undefined, previous_discovery_metadata_selected_entity_set: sourceDiscoveryMetadataSelectedEntitySet ?? undefined, + previous_discovery_metadata_selected_surface_objects: sourceDiscoveryMetadataSelectedSurfaceObjects.length > 0 + ? sourceDiscoveryMetadataSelectedSurfaceObjects + : undefined, + previous_discovery_metadata_recommended_next_primitive: sourceDiscoveryMetadataRecommendedNextPrimitive ?? undefined, previous_discovery_metadata_ambiguity_detected: sourceDiscoveryMetadataAmbiguityDetected || undefined, previous_discovery_metadata_ambiguity_entity_sets: sourceDiscoveryMetadataAmbiguityEntitySets.length > 0 ? sourceDiscoveryMetadataAmbiguityEntitySets : undefined, resolved_counterparty_from_display: resolvedCounterpartyFromDisplay || undefined, diff --git a/llm_normalizer/backend/src/services/assistantContinuityPolicy.ts b/llm_normalizer/backend/src/services/assistantContinuityPolicy.ts index 0158a07..d20f7d8 100644 --- a/llm_normalizer/backend/src/services/assistantContinuityPolicy.ts +++ b/llm_normalizer/backend/src/services/assistantContinuityPolicy.ts @@ -270,6 +270,13 @@ export function readAssistantMcpDiscoveryMetadataRouteFamily( return toNonEmptyString(readAssistantMcpDiscoveryDerivedMetadataSurface(debug)?.downstream_route_family); } +export function readAssistantMcpDiscoveryMetadataRouteFamilySelectionBasis( + debug: Record | null, + toNonEmptyString: (value: unknown) => string | null = fallbackToNonEmptyString +): string | null { + return toNonEmptyString(readAssistantMcpDiscoveryDerivedMetadataSurface(debug)?.route_family_selection_basis); +} + export function readAssistantMcpDiscoveryMetadataSelectedEntitySet( debug: Record | null, toNonEmptyString: (value: unknown) => string | null = fallbackToNonEmptyString @@ -277,6 +284,24 @@ export function readAssistantMcpDiscoveryMetadataSelectedEntitySet( return toNonEmptyString(readAssistantMcpDiscoveryDerivedMetadataSurface(debug)?.selected_entity_set); } +export function readAssistantMcpDiscoveryMetadataSelectedSurfaceObjects( + debug: Record | null, + toNonEmptyString: (value: unknown) => string | null = fallbackToNonEmptyString +): string[] { + const values = readAssistantMcpDiscoveryDerivedMetadataSurface(debug)?.selected_surface_objects; + if (!Array.isArray(values)) { + return []; + } + return values.map((item) => toNonEmptyString(item)).filter((item): item is string => Boolean(item)); +} + +export function readAssistantMcpDiscoveryMetadataRecommendedNextPrimitive( + debug: Record | null, + toNonEmptyString: (value: unknown) => string | null = fallbackToNonEmptyString +): string | null { + return toNonEmptyString(readAssistantMcpDiscoveryDerivedMetadataSurface(debug)?.recommended_next_primitive); +} + export function readAssistantMcpDiscoveryMetadataAmbiguityDetected( debug: Record | null ): boolean { diff --git a/llm_normalizer/backend/src/services/assistantMcpCatalogIndex.ts b/llm_normalizer/backend/src/services/assistantMcpCatalogIndex.ts index c85aad2..0c3e4b2 100644 --- a/llm_normalizer/backend/src/services/assistantMcpCatalogIndex.ts +++ b/llm_normalizer/backend/src/services/assistantMcpCatalogIndex.ts @@ -14,6 +14,9 @@ export interface AssistantMcpCatalogPrimitiveContract { primitive_id: AssistantMcpDiscoveryPrimitive; purpose: string; decomposition_hints: string[]; + supported_fact_families: string[]; + supported_action_families: string[]; + planning_tags: string[]; required_axes_any_of: string[][]; optional_axes: string[]; output_fact_kinds: string[]; @@ -45,6 +48,9 @@ const PRIMITIVE_CONTRACTS: AssistantMcpCatalogPrimitiveContract[] = [ primitive_id: "search_business_entity", purpose: "Find candidate 1C business entities by user wording before a fact query is executed.", decomposition_hints: ["search_business_entity"], + supported_fact_families: ["entity_grounding"], + supported_action_families: ["search_business_entity"], + planning_tags: ["subject_resolution"], required_axes_any_of: [["business_entity"], ["counterparty"], ["organization"], ["contract"], ["item"]], optional_axes: ["period", "document", "account"], output_fact_kinds: ["entity_candidates", "entity_ambiguity"], @@ -56,6 +62,9 @@ const PRIMITIVE_CONTRACTS: AssistantMcpCatalogPrimitiveContract[] = [ primitive_id: "inspect_1c_metadata", purpose: "Inspect available 1C schema/catalog/document/register surface before selecting a query lane.", decomposition_hints: ["inspect_metadata_surface"], + supported_fact_families: ["schema_surface"], + supported_action_families: ["inspect_catalog", "inspect_documents", "inspect_registers", "inspect_fields", "inspect_surface"], + planning_tags: ["metadata", "surface_inspection"], required_axes_any_of: [["metadata_scope"], ["domain_family"], ["document"], ["register"]], optional_axes: ["business_entity", "account", "counterparty"], output_fact_kinds: ["available_fields", "available_entity_sets", "known_limitations"], @@ -67,6 +76,9 @@ const PRIMITIVE_CONTRACTS: AssistantMcpCatalogPrimitiveContract[] = [ primitive_id: "resolve_entity_reference", purpose: "Resolve a user-visible entity name to a concrete 1C reference candidate.", decomposition_hints: ["resolve_entity_reference"], + supported_fact_families: ["entity_grounding", "value_flow", "document_evidence", "movement_evidence", "activity_lifecycle"], + supported_action_families: ["search_business_entity", "turnover", "payout", "net_value_flow", "list_documents", "list_movements", "activity_duration"], + planning_tags: ["subject_resolution"], required_axes_any_of: [["business_entity"], ["counterparty"], ["organization"], ["contract"], ["item"]], optional_axes: ["period", "inn", "document"], output_fact_kinds: ["resolved_entity_ref", "entity_conflict"], @@ -83,6 +95,9 @@ const PRIMITIVE_CONTRACTS: AssistantMcpCatalogPrimitiveContract[] = [ "collect_outgoing_movements", "fetch_scoped_movements" ], + supported_fact_families: ["value_flow", "movement_evidence"], + supported_action_families: ["turnover", "payout", "net_value_flow", "list_movements"], + planning_tags: ["movement", "comparison", "ranking", "aggregation", "monthly_aggregation"], required_axes_any_of: [["period", "account"], ["period", "counterparty"], ["period", "organization"]], optional_axes: ["contract", "document", "amount", "item", "warehouse"], output_fact_kinds: ["movement_rows", "turnover", "balance_delta"], @@ -94,6 +109,9 @@ const PRIMITIVE_CONTRACTS: AssistantMcpCatalogPrimitiveContract[] = [ primitive_id: "query_documents", purpose: "Fetch documents related to a scoped entity, period, contract, or movement explanation.", decomposition_hints: ["fetch_scoped_documents", "fetch_supporting_documents"], + supported_fact_families: ["document_evidence", "activity_lifecycle"], + supported_action_families: ["list_documents", "activity_duration"], + planning_tags: ["document"], required_axes_any_of: [["document"], ["counterparty"], ["contract"], ["period", "organization"]], optional_axes: ["account", "amount", "item", "warehouse"], output_fact_kinds: ["document_rows", "document_dates", "document_amounts"], @@ -105,6 +123,9 @@ const PRIMITIVE_CONTRACTS: AssistantMcpCatalogPrimitiveContract[] = [ primitive_id: "aggregate_by_axis", purpose: "Aggregate already-scoped 1C evidence by a business axis such as counterparty, contract, or period.", decomposition_hints: ["aggregate_checked_amounts", "aggregate_ranked_axis_values", "aggregate_by_month"], + supported_fact_families: ["value_flow"], + supported_action_families: ["turnover", "payout", "net_value_flow"], + planning_tags: ["aggregation", "ranking", "monthly_aggregation"], required_axes_any_of: [["aggregate_axis", "period"], ["aggregate_axis", "counterparty"], ["aggregate_axis", "account"]], optional_axes: ["organization", "contract", "document", "amount"], output_fact_kinds: ["aggregate_totals", "ranked_axis_values"], @@ -116,6 +137,9 @@ const PRIMITIVE_CONTRACTS: AssistantMcpCatalogPrimitiveContract[] = [ primitive_id: "drilldown_related_objects", purpose: "Drill from a known entity or document into related contracts, documents, movements, or payments.", decomposition_hints: ["drilldown_related_objects"], + supported_fact_families: [], + supported_action_families: [], + planning_tags: ["drilldown"], required_axes_any_of: [["business_entity"], ["document"], ["contract"], ["counterparty"]], optional_axes: ["period", "account", "amount"], output_fact_kinds: ["related_objects", "relationship_edges"], @@ -127,6 +151,29 @@ const PRIMITIVE_CONTRACTS: AssistantMcpCatalogPrimitiveContract[] = [ primitive_id: "probe_coverage", purpose: "Check whether the selected MCP/schema route can prove the requested fact or only support a bounded inference.", decomposition_hints: ["probe_coverage"], + supported_fact_families: [ + "schema_surface", + "entity_grounding", + "value_flow", + "document_evidence", + "movement_evidence", + "activity_lifecycle" + ], + supported_action_families: [ + "inspect_catalog", + "inspect_documents", + "inspect_registers", + "inspect_fields", + "inspect_surface", + "search_business_entity", + "turnover", + "payout", + "net_value_flow", + "list_documents", + "list_movements", + "activity_duration" + ], + planning_tags: ["coverage"], required_axes_any_of: [["coverage_target"], ["domain_family"], ["primitive_id"]], optional_axes: ["period", "organization", "counterparty", "document", "account"], output_fact_kinds: ["coverage_status", "known_gaps"], @@ -138,6 +185,9 @@ const PRIMITIVE_CONTRACTS: AssistantMcpCatalogPrimitiveContract[] = [ primitive_id: "explain_evidence_basis", purpose: "Produce a machine-readable explanation of which checked MCP evidence supports, limits, or fails the answer.", decomposition_hints: ["explain_evidence_basis"], + supported_fact_families: ["activity_lifecycle"], + supported_action_families: ["activity_duration"], + planning_tags: ["explanation"], required_axes_any_of: [["evidence_basis"], ["primitive_id"], ["source_rows_summary"]], optional_axes: ["coverage_target", "domain_family"], output_fact_kinds: ["confirmed_facts", "inferred_facts", "unknown_facts"], @@ -183,11 +233,192 @@ function normalizeDecompositionStep(value: string): string { return value.trim().toLowerCase(); } +function normalizePlanningToken(value: string): string { + return value.trim().toLowerCase(); +} + +function countAxisOverlap(axisSet: Set, groups: string[][]): number { + let best = 0; + for (const group of groups) { + const overlap = group.filter((axis) => axisSet.has(axis)).length; + if (overlap > best) { + best = overlap; + } + } + return best; +} + +function tagSetFromFactAxisInput(input: AssistantMcpCatalogFactAxisSearchInput): Set { + const tags = new Set(); + const requiredAxes = input.required_axes ?? []; + if (input.business_fact_family === "schema_surface") { + tags.add("metadata"); + tags.add("surface_inspection"); + } + if ( + input.business_fact_family === "entity_grounding" || + input.has_subject_candidates || + requiredAxes.some((axis) => ["business_entity", "counterparty", "contract", "item"].includes(axis)) + ) { + tags.add("subject_resolution"); + } + if (input.business_fact_family === "document_evidence") { + tags.add("document"); + } + if (input.business_fact_family === "movement_evidence") { + tags.add("movement"); + } + if (input.business_fact_family === "value_flow") { + tags.add("movement"); + } + if (input.comparison_need) { + tags.add("comparison"); + } + if (input.ranking_need) { + tags.add("ranking"); + tags.add("aggregation"); + } + if ((input.aggregation_need ?? "") === "by_month" || requiredAxes.includes("calendar_month")) { + tags.add("monthly_aggregation"); + tags.add("aggregation"); + } + if (requiredAxes.includes("aggregate_axis")) { + tags.add("aggregation"); + } + if (requiredAxes.includes("coverage_target")) { + tags.add("coverage"); + } + if (input.business_fact_family === "activity_lifecycle" || requiredAxes.includes("evidence_basis")) { + tags.add("explanation"); + } + return tags; +} + +function matchesPlanningToken(value: string | null | undefined, candidates: string[]): boolean { + const normalizedValue = normalizePlanningToken(value ?? ""); + return normalizedValue.length > 0 && candidates.some((candidate) => normalizePlanningToken(candidate) === normalizedValue); +} + +export interface AssistantMcpCatalogFactAxisSearchInput { + business_fact_family?: string | null; + action_family?: string | null; + required_axes?: string[]; + comparison_need?: string | null; + ranking_need?: string | null; + aggregation_need?: string | null; + has_subject_candidates?: boolean; + allow_aggregate_by_axis?: boolean; +} + export interface AssistantMcpCatalogPrimitiveSearchInput { decomposition_candidates: string[]; allow_aggregate_by_axis?: boolean; } +export interface AssistantMcpCatalogMetadataSurfaceSearchInput { + downstream_route_family?: string | null; + selected_entity_set?: string | null; + selected_surface_objects?: string[]; + recommended_next_primitive?: string | null; + required_axes?: string[]; + allow_aggregate_by_axis?: boolean; +} + +function normalizeMetadataSurfaceToken(value: string): string { + return value.trim().toLowerCase().replace(/[\s_.-]+/g, ""); +} + +function metadataSurfaceSuggestsDocument(value: string): boolean { + const token = normalizeMetadataSurfaceToken(value); + return ( + token.includes("документ") || + token.includes("document") || + token.includes("invoice") || + token.includes("waybill") || + token.includes("накладн") || + token.includes("счетфактур") || + token.includes("счётфактур") || + token.includes("акт") + ); +} + +function metadataSurfaceSuggestsMovement(value: string): boolean { + const token = normalizeMetadataSurfaceToken(value); + return ( + token.includes("регистр") || + token.includes("register") || + token.includes("movement") || + token.includes("движени") || + token.includes("операц") || + token.includes("проводк") || + token.includes("bank") + ); +} + +function metadataSurfaceSuggestsCatalog(value: string): boolean { + const token = normalizeMetadataSurfaceToken(value); + return ( + token.includes("справочник") || + token.includes("catalog") || + token.includes("directory") + ); +} + +function tagSetFromMetadataSurfaceInput(input: AssistantMcpCatalogMetadataSurfaceSearchInput): Set { + const tags = new Set(); + const routeFamily = normalizePlanningToken(input.downstream_route_family ?? ""); + const recommendedPrimitive = normalizePlanningToken(input.recommended_next_primitive ?? ""); + const surfaceValues = [ + input.selected_entity_set ?? "", + ...(input.selected_surface_objects ?? []) + ]; + + if (routeFamily === "document_evidence" || recommendedPrimitive === "query_documents") { + tags.add("document"); + } + if (routeFamily === "movement_evidence" || recommendedPrimitive === "query_movements") { + tags.add("movement"); + } + if (routeFamily === "catalog_drilldown" || recommendedPrimitive === "drilldown_related_objects") { + tags.add("drilldown"); + } + for (const value of surfaceValues) { + if (metadataSurfaceSuggestsDocument(value)) { + tags.add("document"); + } + if (metadataSurfaceSuggestsMovement(value)) { + tags.add("movement"); + } + if (metadataSurfaceSuggestsCatalog(value)) { + tags.add("drilldown"); + } + } + const requiredAxisSet = toStringSet(input.required_axes ?? []); + if (requiredAxisSet.has("coverage_target")) { + tags.add("coverage"); + } + if ( + requiredAxisSet.has("counterparty") || + requiredAxisSet.has("business_entity") || + requiredAxisSet.has("contract") + ) { + tags.add("subject_resolution"); + } + return tags; +} + +function factFamiliesFromMetadataSurfaceInput(input: AssistantMcpCatalogMetadataSurfaceSearchInput): Set { + const families = new Set(); + const routeFamily = normalizePlanningToken(input.downstream_route_family ?? ""); + if (routeFamily === "document_evidence") { + families.add("document_evidence"); + } + if (routeFamily === "movement_evidence") { + families.add("movement_evidence"); + } + return families; +} + export function searchAssistantMcpCatalogPrimitivesByDecompositionCandidates( input: AssistantMcpCatalogPrimitiveSearchInput ): AssistantMcpDiscoveryPrimitive[] { @@ -220,6 +451,135 @@ export function searchAssistantMcpCatalogPrimitivesByDecompositionCandidates( return result; } +export function searchAssistantMcpCatalogPrimitivesByFactAxis( + input: AssistantMcpCatalogFactAxisSearchInput +): AssistantMcpDiscoveryPrimitive[] { + const allowAggregateByAxis = input.allow_aggregate_by_axis !== false; + const requiredAxisSet = toStringSet(input.required_axes ?? []); + const desiredTags = tagSetFromFactAxisInput(input); + const scored: Array<{ primitive: AssistantMcpDiscoveryPrimitive; score: number }> = []; + + for (const contract of PRIMITIVE_CONTRACTS) { + if (contract.primitive_id === "aggregate_by_axis" && !allowAggregateByAxis) { + continue; + } + if (contract.primitive_id === "search_business_entity" && input.business_fact_family !== "entity_grounding") { + continue; + } + if (input.business_fact_family === "schema_surface" && contract.primitive_id !== "inspect_1c_metadata") { + continue; + } + + const factMatch = matchesPlanningToken(input.business_fact_family, contract.supported_fact_families); + const actionMatch = matchesPlanningToken(input.action_family, contract.supported_action_families); + const tagMatches = contract.planning_tags.filter((tag) => desiredTags.has(normalizePlanningToken(tag))); + if (contract.primitive_id === "search_business_entity" && !desiredTags.has("subject_resolution")) { + continue; + } + if (contract.primitive_id === "resolve_entity_reference" && !desiredTags.has("subject_resolution")) { + continue; + } + if (contract.primitive_id === "aggregate_by_axis" && !desiredTags.has("aggregation")) { + continue; + } + if (contract.primitive_id === "explain_evidence_basis" && !desiredTags.has("explanation")) { + continue; + } + if (!factMatch && !actionMatch && tagMatches.length <= 0) { + continue; + } + const hasCompatibleAxisGroup = requiredAxisSet.size > 0 && hasAnyAxisGroup(requiredAxisSet, contract.required_axes_any_of); + const axisOverlap = requiredAxisSet.size > 0 ? countAxisOverlap(requiredAxisSet, contract.required_axes_any_of) : 0; + + let score = 0; + if (factMatch) { + score += 5; + } + if (actionMatch) { + score += 3; + } + score += tagMatches.length * 2; + if (hasCompatibleAxisGroup) { + score += 2; + } else if (axisOverlap > 0) { + score += 1; + } + + if (score <= 0) { + continue; + } + scored.push({ + primitive: contract.primitive_id, + score + }); + } + + return scored + .sort((left, right) => right.score - left.score) + .map((item) => item.primitive); +} + +export function searchAssistantMcpCatalogPrimitivesByMetadataSurface( + input: AssistantMcpCatalogMetadataSurfaceSearchInput +): AssistantMcpDiscoveryPrimitive[] { + const allowAggregateByAxis = input.allow_aggregate_by_axis !== false; + const requiredAxisSet = toStringSet(input.required_axes ?? []); + const desiredTags = tagSetFromMetadataSurfaceInput(input); + const desiredFactFamilies = factFamiliesFromMetadataSurfaceInput(input); + const recommendedPrimitive = normalizePlanningToken(input.recommended_next_primitive ?? ""); + const scored: Array<{ primitive: AssistantMcpDiscoveryPrimitive; score: number }> = []; + + for (const contract of PRIMITIVE_CONTRACTS) { + if (contract.primitive_id === "aggregate_by_axis" && !allowAggregateByAxis) { + continue; + } + if (contract.primitive_id === "search_business_entity") { + continue; + } + const tagMatches = contract.planning_tags.filter((tag) => desiredTags.has(normalizePlanningToken(tag))); + const factMatch = contract.supported_fact_families.some((family) => + desiredFactFamilies.has(normalizePlanningToken(family)) + ); + const primitiveRecommended = normalizePlanningToken(contract.primitive_id) === recommendedPrimitive; + if (contract.primitive_id === "resolve_entity_reference" && !desiredTags.has("subject_resolution")) { + continue; + } + if (contract.primitive_id === "aggregate_by_axis" && !desiredTags.has("aggregation")) { + continue; + } + if (!primitiveRecommended && !factMatch && tagMatches.length <= 0) { + continue; + } + const hasCompatibleAxisGroup = requiredAxisSet.size > 0 && hasAnyAxisGroup(requiredAxisSet, contract.required_axes_any_of); + const axisOverlap = requiredAxisSet.size > 0 ? countAxisOverlap(requiredAxisSet, contract.required_axes_any_of) : 0; + let score = 0; + + if (primitiveRecommended) { + score += 6; + } + if (factMatch) { + score += 5; + } + score += tagMatches.length * 2; + if (hasCompatibleAxisGroup) { + score += 2; + } else if (axisOverlap > 0) { + score += 1; + } + if (score <= 0) { + continue; + } + scored.push({ + primitive: contract.primitive_id, + score + }); + } + + return scored + .sort((left, right) => right.score - left.score) + .map((item) => item.primitive); +} + export function buildAssistantMcpCatalogIndex(): AssistantMcpCatalogIndexContract { const reasonCodes: string[] = []; const missingContracts = ASSISTANT_MCP_DISCOVERY_PRIMITIVES.filter((primitive) => !PRIMITIVE_CONTRACT_MAP.has(primitive)); diff --git a/llm_normalizer/backend/src/services/assistantMcpDiscoveryAnswerAdapter.ts b/llm_normalizer/backend/src/services/assistantMcpDiscoveryAnswerAdapter.ts index 84205d5..1921355 100644 --- a/llm_normalizer/backend/src/services/assistantMcpDiscoveryAnswerAdapter.ts +++ b/llm_normalizer/backend/src/services/assistantMcpDiscoveryAnswerAdapter.ts @@ -130,7 +130,9 @@ function isEntityResolutionPilot(pilot: AssistantMcpDiscoveryPilotExecutionContr function isMetadataLaneChoiceClarification(pilot: AssistantMcpDiscoveryPilotExecutionContract): boolean { return ( pilot.reason_codes.includes("planner_selected_metadata_lane_clarification_recipe") || - pilot.dry_run.reason_codes.includes("planner_selected_metadata_lane_clarification_recipe") + pilot.reason_codes.includes("planner_selected_metadata_lane_clarification_from_data_need_graph") || + pilot.dry_run.reason_codes.includes("planner_selected_metadata_lane_clarification_recipe") || + pilot.dry_run.reason_codes.includes("planner_selected_metadata_lane_clarification_from_data_need_graph") ); } diff --git a/llm_normalizer/backend/src/services/assistantMcpDiscoveryDataNeedGraph.ts b/llm_normalizer/backend/src/services/assistantMcpDiscoveryDataNeedGraph.ts index 0cda799..afa72fa 100644 --- a/llm_normalizer/backend/src/services/assistantMcpDiscoveryDataNeedGraph.ts +++ b/llm_normalizer/backend/src/services/assistantMcpDiscoveryDataNeedGraph.ts @@ -109,13 +109,29 @@ function aggregationNeedFor(axis: string): string | null { return `by_${axis}`; } +function hasAllTimeScopeHint(rawUtterance: string): boolean { + if (!rawUtterance) { + return false; + } + return /(?:\u0437\u0430\s+\u0432\u0441[\u0435\u0451]\s+\u0432\u0440\u0435\u043c\u044f|\u0437\u0430\s+\u0432\u0435\u0441\u044c\s+\u043f\u0435\u0440\u0438\u043e\u0434|\u0437\u0430\s+\u0432\u0441\u044e\s+\u0438\u0441\u0442\u043e\u0440\u0438(?:\u044e|\u0438)|\u0437\u0430\s+\u043b\u044e\u0431\u043e\u0439\s+\u043f\u0435\u0440\u0438\u043e\u0434|for\s+all\s+time|all\s+time|entire\s+period|full\s+history|any\s+period)/iu.test( + rawUtterance + ); +} + function timeScopeNeedFor(input: { family: string | null; explicitDateScope: string | null; + allTimeScopeHint: boolean; }): string | null { if (input.explicitDateScope) { return "explicit_period"; } + if ( + input.allTimeScopeHint && + (input.family === "value_flow" || input.family === "movement_evidence" || input.family === "document_evidence") + ) { + return "all_time_scope"; + } if (input.family === "value_flow" || input.family === "movement_evidence" || input.family === "document_evidence") { return "period_required"; } @@ -341,6 +357,7 @@ export function buildAssistantMcpDiscoveryDataNeedGraph( const aggregationNeed = aggregationNeedFor(aggregationAxis); const comparisonNeed = comparisonNeedFor(action); const rankingNeed = rankingNeedFromRawUtterance(rawUtterance) ?? seededRankingNeed; + const allTimeScopeHint = hasAllTimeScopeHint(rawUtterance); const oneSidedOpenScopeTotalHint = hasOpenScopeOneSidedValueTotalHintUtf8Safe(rawUtterance, action); const openScopeWithoutSubject = subjectCandidates.length === 0 && @@ -368,7 +385,8 @@ export function buildAssistantMcpDiscoveryDataNeedGraph( } const timeScopeNeed = timeScopeNeedFor({ family: businessFactFamily, - explicitDateScope + explicitDateScope, + allTimeScopeHint }); if (timeScopeNeed === "period_required" && !explicitDateScope) { pushUnique(clarificationGaps, "period"); @@ -400,6 +418,9 @@ export function buildAssistantMcpDiscoveryDataNeedGraph( if (openScopeWithoutSubject && !rankingNeed && !comparisonNeed) { pushReason(reasonCodes, "data_need_graph_open_scope_total_without_subject"); } + if (allTimeScopeHint) { + pushReason(reasonCodes, "data_need_graph_all_time_scope_hint"); + } if (clarificationGaps.includes("organization")) { pushReason(reasonCodes, "data_need_graph_open_scope_total_needs_organization"); } diff --git a/llm_normalizer/backend/src/services/assistantMcpDiscoveryPilotExecutor.ts b/llm_normalizer/backend/src/services/assistantMcpDiscoveryPilotExecutor.ts index 3e3cf5a..148e29d 100644 --- a/llm_normalizer/backend/src/services/assistantMcpDiscoveryPilotExecutor.ts +++ b/llm_normalizer/backend/src/services/assistantMcpDiscoveryPilotExecutor.ts @@ -149,7 +149,13 @@ export interface AssistantMcpDiscoveryDerivedMetadataSurface { matched_objects: string[]; selected_entity_set: string | null; selected_surface_objects: string[]; + surface_family_scores: { + document_evidence: number; + movement_evidence: number; + catalog_drilldown: number; + }; downstream_route_family: "document_evidence" | "movement_evidence" | "catalog_drilldown" | null; + route_family_selection_basis: "selected_entity_set" | "dominant_surface_objects" | null; recommended_next_primitive: "query_documents" | "query_movements" | "drilldown_related_objects" | null; ambiguity_detected: boolean; ambiguity_entity_sets: string[]; @@ -1193,6 +1199,34 @@ function metadataRouteFamilyForEntitySet( return null; } +function metadataRouteFamilyForEntitySetRelaxed( + entitySet: string +): "document_evidence" | "movement_evidence" | "catalog_drilldown" | null { + const strict = metadataRouteFamilyForEntitySet(entitySet); + if (strict) { + return strict; + } + const raw = String(entitySet ?? "").trim(); + if (!raw) { + return null; + } + if (raw.includes("Документ") || raw.includes("Документ")) { + return "document_evidence"; + } + if ( + raw.includes("РегистрНакопления") || + raw.includes("РегистрСведений") || + raw.includes("РегистрНакопления") || + raw.includes("РегистрСведений") + ) { + return "movement_evidence"; + } + if (raw.includes("Справочник") || raw.includes("Справочник")) { + return "catalog_drilldown"; + } + return null; +} + function metadataNextPrimitiveForRouteFamily( routeFamily: "document_evidence" | "movement_evidence" | "catalog_drilldown" | null ): "query_documents" | "query_movements" | "drilldown_related_objects" | null { @@ -1254,6 +1288,63 @@ function metadataObjectsForEntitySet(entitySet: string | null, matchedObjects: s return matchedObjects.filter((item) => item.startsWith(`${entitySet}.`) || item.includes(entitySet)); } +function emptyMetadataSurfaceFamilyScores(): AssistantMcpDiscoveryDerivedMetadataSurface["surface_family_scores"] { + return { + document_evidence: 0, + movement_evidence: 0, + catalog_drilldown: 0 + }; +} + +function metadataSurfaceFamilyScores( + matchedObjects: string[] +): AssistantMcpDiscoveryDerivedMetadataSurface["surface_family_scores"] { + const scores = emptyMetadataSurfaceFamilyScores(); + for (const objectName of matchedObjects) { + const entitySet = inferMetadataEntitySetFromObjectName(objectName); + const routeFamily = entitySet ? metadataRouteFamilyForEntitySetRelaxed(entitySet) : null; + if (routeFamily) { + scores[routeFamily] += 1; + } + } + return scores; +} + +function metadataObjectsForRouteFamily( + routeFamily: "document_evidence" | "movement_evidence" | "catalog_drilldown" | null, + matchedObjects: string[] +): string[] { + if (!routeFamily) { + return []; + } + return matchedObjects.filter((objectName) => { + const entitySet = inferMetadataEntitySetFromObjectName(objectName); + return entitySet ? metadataRouteFamilyForEntitySetRelaxed(entitySet) === routeFamily : false; + }); +} + +function selectMetadataRouteFamilyFromSurfaceScores( + scores: AssistantMcpDiscoveryDerivedMetadataSurface["surface_family_scores"] +): "document_evidence" | "movement_evidence" | "catalog_drilldown" | null { + const ranked = (Object.entries(scores) as Array< + ["document_evidence" | "movement_evidence" | "catalog_drilldown", number] + >) + .filter(([, score]) => score > 0) + .sort((left, right) => right[1] - left[1]); + const top = ranked[0]; + const second = ranked[1]; + if (!top) { + return null; + } + if (!second) { + return top[0]; + } + const absoluteMargin = top[1] - second[1]; + const relativeRatio = second[1] > 0 ? top[1] / second[1] : Number.POSITIVE_INFINITY; + const clearlyDominant = absoluteMargin >= 2 || relativeRatio >= 1.5; + return clearlyDominant ? top[0] : null; +} + function deriveMetadataSurface( result: AddressMcpMetadataRowsResult | null, metadataScope: string | null, @@ -1275,15 +1366,34 @@ function deriveMetadataSurface( } } const grounding = selectMetadataEntityGrounding(availableEntitySets, requestedMetaTypes); - const downstreamRouteFamily = grounding.selectedEntitySet - ? metadataRouteFamilyForEntitySet(grounding.selectedEntitySet) + const surfaceFamilyScores = metadataSurfaceFamilyScores(matchedObjects); + const selectedEntitySetRouteFamily = grounding.selectedEntitySet + ? metadataRouteFamilyForEntitySetRelaxed(grounding.selectedEntitySet) : null; + const scoredRouteFamily = + selectedEntitySetRouteFamily === null ? selectMetadataRouteFamilyFromSurfaceScores(surfaceFamilyScores) : null; + const downstreamRouteFamily = selectedEntitySetRouteFamily ?? scoredRouteFamily; + const routeFamilySelectionBasis = selectedEntitySetRouteFamily + ? "selected_entity_set" + : scoredRouteFamily + ? "dominant_surface_objects" + : null; + const selectedSurfaceObjects = + grounding.selectedEntitySet !== null + ? metadataObjectsForEntitySet(grounding.selectedEntitySet, matchedObjects) + : metadataObjectsForRouteFamily(downstreamRouteFamily, matchedObjects); const knownLimitations: string[] = []; - if (grounding.ambiguityDetected && grounding.ambiguityEntitySets.length > 0) { + const ambiguityRemainsUnresolved = grounding.ambiguityDetected && !downstreamRouteFamily; + if (ambiguityRemainsUnresolved && grounding.ambiguityEntitySets.length > 0) { knownLimitations.push( `Exact downstream metadata surface remains ambiguous across: ${grounding.ambiguityEntitySets.join(", ")}` ); } + if (grounding.ambiguityDetected && downstreamRouteFamily && routeFamilySelectionBasis === "dominant_surface_objects") { + knownLimitations.push( + `Metadata surface spans multiple object sets, but dominant confirmed objects point to ${downstreamRouteFamily}` + ); + } return { metadata_scope: metadataScope, requested_meta_types: requestedMetaTypes, @@ -1291,11 +1401,13 @@ function deriveMetadataSurface( available_entity_sets: availableEntitySets, matched_objects: matchedObjects, selected_entity_set: grounding.selectedEntitySet, - selected_surface_objects: metadataObjectsForEntitySet(grounding.selectedEntitySet, matchedObjects), + selected_surface_objects: selectedSurfaceObjects, + surface_family_scores: surfaceFamilyScores, downstream_route_family: downstreamRouteFamily, + route_family_selection_basis: routeFamilySelectionBasis, recommended_next_primitive: metadataNextPrimitiveForRouteFamily(downstreamRouteFamily), - ambiguity_detected: grounding.ambiguityDetected, - ambiguity_entity_sets: grounding.ambiguityEntitySets, + ambiguity_detected: ambiguityRemainsUnresolved, + ambiguity_entity_sets: ambiguityRemainsUnresolved ? grounding.ambiguityEntitySets : [], available_fields: metadataAvailableFields(result.rows), known_limitations: knownLimitations, inference_basis: "confirmed_1c_metadata_surface_rows" @@ -1322,6 +1434,15 @@ function buildMetadataConfirmedFacts( if (surface.selected_surface_objects.length > 0) { facts.push(`Selected metadata objects: ${surface.selected_surface_objects.slice(0, 8).join(", ")}`); } + if ( + surface.surface_family_scores.document_evidence > 0 || + surface.surface_family_scores.movement_evidence > 0 || + surface.surface_family_scores.catalog_drilldown > 0 + ) { + facts.push( + `Metadata surface family scores: document=${surface.surface_family_scores.document_evidence}, movement=${surface.surface_family_scores.movement_evidence}, catalog=${surface.surface_family_scores.catalog_drilldown}` + ); + } if (surface.available_fields.length > 0) { facts.push(`Available metadata fields/sections: ${surface.available_fields.slice(0, 12).join(", ")}`); } @@ -1331,7 +1452,7 @@ function buildMetadataConfirmedFacts( function buildMetadataInferredFacts( surface: AssistantMcpDiscoveryDerivedMetadataSurface | null ): string[] { - if (!surface || !surface.selected_entity_set || !surface.downstream_route_family || !surface.recommended_next_primitive) { + if (!surface || !surface.downstream_route_family || !surface.recommended_next_primitive) { return []; } return [ @@ -2304,6 +2425,9 @@ export async function executeAssistantMcpDiscoveryPilot( const derivedMetadataSurface = deriveMetadataSurface(metadataResult, metadataScope, requestedMetaTypes); if (derivedMetadataSurface) { pushReason(reasonCodes, "pilot_derived_metadata_surface_from_confirmed_rows"); + if (derivedMetadataSurface.route_family_selection_basis === "dominant_surface_objects") { + pushReason(reasonCodes, "pilot_selected_metadata_route_family_from_dominant_surface_objects"); + } } const evidence = resolveAssistantMcpDiscoveryEvidence({ plan: planner.discovery_plan, diff --git a/llm_normalizer/backend/src/services/assistantMcpDiscoveryPlanner.ts b/llm_normalizer/backend/src/services/assistantMcpDiscoveryPlanner.ts index b3a35aa..0b82e73 100644 --- a/llm_normalizer/backend/src/services/assistantMcpDiscoveryPlanner.ts +++ b/llm_normalizer/backend/src/services/assistantMcpDiscoveryPlanner.ts @@ -6,6 +6,8 @@ import { } from "./assistantMcpDiscoveryPolicy"; import { searchAssistantMcpCatalogPrimitivesByDecompositionCandidates, + searchAssistantMcpCatalogPrimitivesByFactAxis, + searchAssistantMcpCatalogPrimitivesByMetadataSurface, reviewAssistantMcpDiscoveryPlanAgainstCatalog, type AssistantMcpCatalogPlanReview } from "./assistantMcpCatalogIndex"; @@ -14,6 +16,24 @@ import type { AssistantMcpDiscoveryDataNeedGraphContract } from "./assistantMcpD export const ASSISTANT_MCP_DISCOVERY_PLANNER_SCHEMA_VERSION = "assistant_mcp_discovery_planner_v1" as const; export type AssistantMcpDiscoveryPlannerStatus = "ready_for_execution" | "needs_clarification" | "blocked"; +export type AssistantMcpDiscoveryMetadataRouteFamily = + | "document_evidence" + | "movement_evidence" + | "catalog_drilldown"; +export type AssistantMcpDiscoveryMetadataRecommendedPrimitive = + | "query_documents" + | "query_movements" + | "drilldown_related_objects"; + +export interface AssistantMcpDiscoveryMetadataSurfaceRef { + selected_entity_set: string | null; + selected_surface_objects: string[]; + downstream_route_family: AssistantMcpDiscoveryMetadataRouteFamily | null; + route_family_selection_basis: "selected_entity_set" | "dominant_surface_objects" | null; + recommended_next_primitive: AssistantMcpDiscoveryMetadataRecommendedPrimitive | null; + ambiguity_detected: boolean; + ambiguity_entity_sets: string[]; +} export type AssistantMcpDiscoveryChainId = | "metadata_inspection" @@ -29,6 +49,7 @@ export type AssistantMcpDiscoveryChainId = export interface AssistantMcpDiscoveryPlannerInput { semanticDataNeed?: string | null; dataNeedGraph?: AssistantMcpDiscoveryDataNeedGraphContract | null; + metadataSurface?: AssistantMcpDiscoveryMetadataSurfaceRef | null; turnMeaning?: AssistantMcpDiscoveryTurnMeaningRef | null; } @@ -38,6 +59,7 @@ export interface AssistantMcpDiscoveryPlannerContract { planner_status: AssistantMcpDiscoveryPlannerStatus; semantic_data_need: string | null; data_need_graph: AssistantMcpDiscoveryDataNeedGraphContract | null; + metadata_surface_ref: AssistantMcpDiscoveryMetadataSurfaceRef | null; selected_chain_id: AssistantMcpDiscoveryChainId; selected_chain_summary: string; proposed_primitives: AssistantMcpDiscoveryPrimitive[]; @@ -135,43 +157,185 @@ function isYearDateScope(meaning: AssistantMcpDiscoveryTurnMeaningRef | null | u return /^\d{4}$/.test(toNonEmptyString(meaning?.explicit_date_scope) ?? ""); } -function primitivesFromGraphDecomposition(input: { - dataNeedGraph: AssistantMcpDiscoveryDataNeedGraphContract | null; +function mergeCatalogPrimitivesWithFallback( + catalogPrimitives: AssistantMcpDiscoveryPrimitive[], + fallbackPrimitives: AssistantMcpDiscoveryPrimitive[] +): AssistantMcpDiscoveryPrimitive[] { + const result: AssistantMcpDiscoveryPrimitive[] = []; + for (const primitive of fallbackPrimitives) { + if (catalogPrimitives.includes(primitive) && !result.includes(primitive)) { + result.push(primitive); + } + } + for (const primitive of catalogPrimitives) { + if (!result.includes(primitive)) { + result.push(primitive); + } + } + for (const primitive of fallbackPrimitives) { + if (!result.includes(primitive)) { + result.push(primitive); + } + } + return result; +} + +function preferredPrimitiveFromMetadataSurface( + surface: AssistantMcpDiscoveryMetadataSurfaceRef | null | undefined +): AssistantMcpDiscoveryPrimitive | null { + const recommendedPrimitive = surface?.recommended_next_primitive ?? null; + if (recommendedPrimitive) { + return recommendedPrimitive; + } + if (surface?.ambiguity_detected) { + return null; + } + if (surface?.downstream_route_family === "document_evidence") { + return "query_documents"; + } + if (surface?.downstream_route_family === "movement_evidence") { + return "query_movements"; + } + if (surface?.downstream_route_family === "catalog_drilldown") { + return "drilldown_related_objects"; + } + return null; +} + +function filterCatalogPrimitivesByMetadataSurface(input: { + catalogPrimitives: AssistantMcpDiscoveryPrimitive[]; fallbackPrimitives: AssistantMcpDiscoveryPrimitive[]; - allowAggregateByAxis?: boolean; + metadataSurface?: AssistantMcpDiscoveryMetadataSurfaceRef | null; }): { primitives: AssistantMcpDiscoveryPrimitive[]; reasonCodes: string[] } { - const decompositionCandidates = input.dataNeedGraph?.decomposition_candidates ?? []; - if (decompositionCandidates.length <= 0) { - return { primitives: input.fallbackPrimitives, reasonCodes: [] }; + const preferredPrimitive = preferredPrimitiveFromMetadataSurface(input.metadataSurface); + const reasonCodes: string[] = []; + if ( + !preferredPrimitive || + input.metadataSurface?.ambiguity_detected || + !input.fallbackPrimitives.includes(preferredPrimitive) + ) { + return { + primitives: input.catalogPrimitives, + reasonCodes + }; } - const searchedPrimitives = searchAssistantMcpCatalogPrimitivesByDecompositionCandidates({ - decomposition_candidates: decompositionCandidates, - allow_aggregate_by_axis: input.allowAggregateByAxis + const laneSensitivePrimitives = new Set([ + "query_documents", + "query_movements", + "drilldown_related_objects" + ]); + const filteredPrimitives = input.catalogPrimitives.filter( + (primitive) => !laneSensitivePrimitives.has(primitive) || primitive === preferredPrimitive + ); + if (filteredPrimitives.length !== input.catalogPrimitives.length) { + reasonCodes.push("planner_filtered_catalog_primitives_by_confirmed_metadata_surface"); + } + if ( + (filteredPrimitives.includes(preferredPrimitive) || input.fallbackPrimitives.includes(preferredPrimitive)) && + input.metadataSurface?.selected_surface_objects.length + ) { + reasonCodes.push("planner_surface_aware_next_lane_from_confirmed_metadata_objects"); + } + return { + primitives: filteredPrimitives, + reasonCodes + }; +} + +function selectPrimitivesFromGraphAndCatalog(input: { + dataNeedGraph: AssistantMcpDiscoveryDataNeedGraphContract | null; + fallbackPrimitives: AssistantMcpDiscoveryPrimitive[]; + requiredAxes: string[]; + metadataSurface?: AssistantMcpDiscoveryMetadataSurfaceRef | null; + actionFamily?: string | null; + allowAggregateByAxis?: boolean; +}): { primitives: AssistantMcpDiscoveryPrimitive[]; reasonCodes: string[] } { + const reasonCodes: string[] = []; + const decompositionCandidates = input.dataNeedGraph?.decomposition_candidates ?? []; + const decompositionPrimitives = + decompositionCandidates.length > 0 + ? searchAssistantMcpCatalogPrimitivesByDecompositionCandidates({ + decomposition_candidates: decompositionCandidates, + allow_aggregate_by_axis: input.allowAggregateByAxis + }) + : []; + if (decompositionPrimitives.length > 0) { + reasonCodes.push("planner_selected_catalog_primitives_from_decomposition_candidates"); + } + + const metadataSurfacePrimitives = input.metadataSurface + ? searchAssistantMcpCatalogPrimitivesByMetadataSurface({ + downstream_route_family: input.metadataSurface.downstream_route_family, + selected_entity_set: input.metadataSurface.selected_entity_set, + selected_surface_objects: input.metadataSurface.selected_surface_objects, + recommended_next_primitive: input.metadataSurface.recommended_next_primitive, + required_axes: input.requiredAxes, + allow_aggregate_by_axis: input.allowAggregateByAxis + }) + : []; + if (metadataSurfacePrimitives.length > 0) { + reasonCodes.push("planner_selected_catalog_primitives_from_metadata_surface_search"); + } + + const factAxisPrimitives = input.dataNeedGraph + ? searchAssistantMcpCatalogPrimitivesByFactAxis({ + business_fact_family: input.dataNeedGraph.business_fact_family, + action_family: input.actionFamily ?? input.dataNeedGraph.action_family, + required_axes: input.requiredAxes, + comparison_need: input.dataNeedGraph.comparison_need, + ranking_need: input.dataNeedGraph.ranking_need, + aggregation_need: input.dataNeedGraph.aggregation_need, + has_subject_candidates: hasSubjectCandidates(input.dataNeedGraph), + allow_aggregate_by_axis: input.allowAggregateByAxis + }) + : []; + if (factAxisPrimitives.length > 0) { + reasonCodes.push("planner_selected_catalog_primitives_from_fact_axis_search"); + } + + const combinedCatalogPrimitives: AssistantMcpDiscoveryPrimitive[] = []; + for (const primitive of decompositionPrimitives) { + if (!combinedCatalogPrimitives.includes(primitive)) { + combinedCatalogPrimitives.push(primitive); + } + } + for (const primitive of metadataSurfacePrimitives) { + if (!combinedCatalogPrimitives.includes(primitive)) { + combinedCatalogPrimitives.push(primitive); + } + } + for (const primitive of factAxisPrimitives) { + if (!combinedCatalogPrimitives.includes(primitive)) { + combinedCatalogPrimitives.push(primitive); + } + } + + const filteredCatalogPrimitives = filterCatalogPrimitivesByMetadataSurface({ + catalogPrimitives: combinedCatalogPrimitives, + fallbackPrimitives: input.fallbackPrimitives, + metadataSurface: input.metadataSurface }); - if (searchedPrimitives.length <= 0) { + reasonCodes.push(...filteredCatalogPrimitives.reasonCodes); + + if (filteredCatalogPrimitives.primitives.length <= 0) { return { primitives: input.fallbackPrimitives, reasonCodes: ["planner_fell_back_to_recipe_primitives_after_empty_catalog_search"] }; } - const mergedPrimitives = [...searchedPrimitives]; - for (const primitive of input.fallbackPrimitives) { - if (!mergedPrimitives.includes(primitive)) { - mergedPrimitives.push(primitive); - } + const mergedPrimitives = mergeCatalogPrimitivesWithFallback( + filteredCatalogPrimitives.primitives, + input.fallbackPrimitives + ); + if (input.fallbackPrimitives.some((primitive) => !filteredCatalogPrimitives.primitives.includes(primitive))) { + reasonCodes.push("planner_completed_catalog_searched_chain_with_recipe_primitives"); } return { primitives: mergedPrimitives, - reasonCodes: - mergedPrimitives.length === searchedPrimitives.length - ? ["planner_selected_catalog_primitives_from_decomposition_candidates"] - : [ - "planner_selected_catalog_primitives_from_decomposition_candidates", - "planner_completed_catalog_searched_chain_with_recipe_primitives" - ] + reasonCodes }; } @@ -192,6 +356,40 @@ function budgetOverrideFor(input: AssistantMcpDiscoveryPlannerInput, recipe: Pla return {}; } +function routeFamilyFromThinMetadataSurfaceInput( + input: AssistantMcpDiscoveryPlannerInput +): AssistantMcpDiscoveryMetadataRouteFamily | null { + const surface = input.metadataSurface ?? null; + if (!surface || surface.ambiguity_detected || !surface.downstream_route_family || !surface.recommended_next_primitive) { + return null; + } + const meaning = input.turnMeaning ?? null; + const dataNeedGraph = input.dataNeedGraph ?? null; + const graphFactFamily = lower(dataNeedGraph?.business_fact_family); + const domain = lower(meaning?.asked_domain_family); + const action = lower(meaning?.asked_action_family); + const unsupported = lower(meaning?.unsupported_but_understood_family); + const semanticNeed = lower(input.semanticDataNeed); + const combined = `${domain} ${action} ${unsupported} ${semanticNeed}`.trim(); + const explicitlyOtherFamily = + includesAny(combined, ["value_flow", "turnover", "revenue", "payment", "payout", "net", "lifecycle", "activity", "duration", "metadata lane clarification"]); + if (explicitlyOtherFamily) { + return null; + } + if (graphFactFamily === "document_evidence" || includesAny(combined, ["document", "documents", "list_documents"])) { + return surface.downstream_route_family === "document_evidence" ? "document_evidence" : null; + } + if (graphFactFamily === "movement_evidence" || includesAny(combined, ["movement", "movements", "list_movements", "bank_operations"])) { + return surface.downstream_route_family === "movement_evidence" ? "movement_evidence" : null; + } + if (!graphFactFamily && !domain && !action) { + if (surface.downstream_route_family === "document_evidence" || surface.downstream_route_family === "movement_evidence") { + return surface.downstream_route_family; + } + } + return null; +} + function recipeFor(input: AssistantMcpDiscoveryPlannerInput): PlannerRecipe { const meaning = input.turnMeaning ?? null; const dataNeedGraph = input.dataNeedGraph ?? null; @@ -224,18 +422,63 @@ function recipeFor(input: AssistantMcpDiscoveryPlannerInput): PlannerRecipe { }; } + const thinSurfaceRouteFamily = routeFamilyFromThinMetadataSurfaceInput(input); + if (thinSurfaceRouteFamily === "document_evidence") { + pushUnique(axes, "coverage_target"); + const primitiveSelection = selectPrimitivesFromGraphAndCatalog({ + dataNeedGraph, + fallbackPrimitives: ["resolve_entity_reference", "query_documents", "probe_coverage"], + requiredAxes: axes, + metadataSurface: input.metadataSurface, + actionFamily: action + }); + return { + semanticDataNeed: "document evidence", + chainId: "document_evidence", + chainSummary: + "Ground the next checked document lane from the confirmed metadata surface, then fetch scoped document rows and probe coverage before answering.", + primitives: primitiveSelection.primitives, + axes, + reason: "planner_selected_document_from_confirmed_metadata_surface_ref", + extraReasons: primitiveSelection.reasonCodes + }; + } + if (thinSurfaceRouteFamily === "movement_evidence") { + pushUnique(axes, "coverage_target"); + const primitiveSelection = selectPrimitivesFromGraphAndCatalog({ + dataNeedGraph, + fallbackPrimitives: ["resolve_entity_reference", "query_movements", "probe_coverage"], + requiredAxes: axes, + metadataSurface: input.metadataSurface, + actionFamily: action + }); + return { + semanticDataNeed: "movement evidence", + chainId: "movement_evidence", + chainSummary: + "Ground the next checked movement lane from the confirmed metadata surface, then fetch scoped movement rows and probe coverage before answering.", + primitives: primitiveSelection.primitives, + axes, + reason: "planner_selected_movement_from_confirmed_metadata_surface_ref", + extraReasons: primitiveSelection.reasonCodes + }; + } + if (graphFactFamily === "value_flow") { if (dataNeedGraph?.comparison_need === "incoming_vs_outgoing" && !hasSubjectCandidates(dataNeedGraph)) { - const primitiveSelection = primitivesFromGraphDecomposition({ - dataNeedGraph, - fallbackPrimitives: ["query_movements", "probe_coverage"], - allowAggregateByAxis: false - }); pushUnique(axes, "amount"); pushUnique(axes, "coverage_target"); if (requestedAggregationAxis === "month" || graphAggregation === "by_month") { pushUnique(axes, "calendar_month"); } + const primitiveSelection = selectPrimitivesFromGraphAndCatalog({ + dataNeedGraph, + fallbackPrimitives: ["query_movements", "probe_coverage"], + requiredAxes: axes, + metadataSurface: input.metadataSurface, + actionFamily: action, + allowAggregateByAxis: false + }); return { semanticDataNeed: "bidirectional value-flow comparison evidence", chainId: "value_flow_comparison", @@ -248,14 +491,17 @@ function recipeFor(input: AssistantMcpDiscoveryPlannerInput): PlannerRecipe { }; } if (dataNeedGraph?.ranking_need && !hasSubjectCandidates(dataNeedGraph)) { - const primitiveSelection = primitivesFromGraphDecomposition({ - dataNeedGraph, - fallbackPrimitives: ["query_movements", "aggregate_by_axis", "probe_coverage"], - allowAggregateByAxis: true - }); pushUnique(axes, "aggregate_axis"); pushUnique(axes, "amount"); pushUnique(axes, "coverage_target"); + const primitiveSelection = selectPrimitivesFromGraphAndCatalog({ + dataNeedGraph, + fallbackPrimitives: ["query_movements", "aggregate_by_axis", "probe_coverage"], + requiredAxes: axes, + metadataSurface: input.metadataSurface, + actionFamily: action, + allowAggregateByAxis: true + }); return { semanticDataNeed: "ranked value-flow evidence", chainId: "value_flow_ranking", @@ -271,11 +517,6 @@ function recipeFor(input: AssistantMcpDiscoveryPlannerInput): PlannerRecipe { }; } if (openScopeTotalWithoutSubject) { - const primitiveSelection = primitivesFromGraphDecomposition({ - dataNeedGraph, - fallbackPrimitives: ["query_movements", "aggregate_by_axis", "probe_coverage"], - allowAggregateByAxis: true - }); pushUnique(axes, "organization"); pushUnique(axes, "aggregate_axis"); pushUnique(axes, "amount"); @@ -283,6 +524,13 @@ function recipeFor(input: AssistantMcpDiscoveryPlannerInput): PlannerRecipe { if (requestedAggregationAxis === "month" || graphAggregation === "by_month") { pushUnique(axes, "calendar_month"); } + const primitiveSelection = selectPrimitivesFromGraphAndCatalog({ + dataNeedGraph, + fallbackPrimitives: ["query_movements", "aggregate_by_axis", "probe_coverage"], + requiredAxes: axes, + actionFamily: action, + allowAggregateByAxis: true + }); return { semanticDataNeed: "organization-scoped value-flow evidence", chainId: "value_flow", @@ -297,17 +545,20 @@ function recipeFor(input: AssistantMcpDiscoveryPlannerInput): PlannerRecipe { extraReasons: primitiveSelection.reasonCodes }; } - const primitiveSelection = primitivesFromGraphDecomposition({ - dataNeedGraph, - fallbackPrimitives: ["resolve_entity_reference", "query_movements", "aggregate_by_axis", "probe_coverage"], - allowAggregateByAxis: true - }); pushUnique(axes, "aggregate_axis"); pushUnique(axes, "amount"); pushUnique(axes, "coverage_target"); if (requestedAggregationAxis === "month" || graphAggregation === "by_month") { pushUnique(axes, "calendar_month"); } + const primitiveSelection = selectPrimitivesFromGraphAndCatalog({ + dataNeedGraph, + fallbackPrimitives: ["resolve_entity_reference", "query_movements", "aggregate_by_axis", "probe_coverage"], + requiredAxes: axes, + metadataSurface: input.metadataSurface, + actionFamily: action, + allowAggregateByAxis: true + }); return { semanticDataNeed: "counterparty value-flow evidence", chainId: "value_flow", @@ -323,13 +574,16 @@ function recipeFor(input: AssistantMcpDiscoveryPlannerInput): PlannerRecipe { } if (graphFactFamily === "activity_lifecycle") { - const primitiveSelection = primitivesFromGraphDecomposition({ - dataNeedGraph, - fallbackPrimitives: ["resolve_entity_reference", "query_documents", "probe_coverage", "explain_evidence_basis"] - }); pushUnique(axes, "document_date"); pushUnique(axes, "coverage_target"); pushUnique(axes, "evidence_basis"); + const primitiveSelection = selectPrimitivesFromGraphAndCatalog({ + dataNeedGraph, + fallbackPrimitives: ["resolve_entity_reference", "query_documents", "probe_coverage", "explain_evidence_basis"], + requiredAxes: axes, + metadataSurface: input.metadataSurface, + actionFamily: action + }); return { semanticDataNeed: "counterparty lifecycle evidence", chainId: "lifecycle", @@ -342,11 +596,14 @@ function recipeFor(input: AssistantMcpDiscoveryPlannerInput): PlannerRecipe { } if (graphFactFamily === "schema_surface") { - const primitiveSelection = primitivesFromGraphDecomposition({ - dataNeedGraph, - fallbackPrimitives: ["inspect_1c_metadata"] - }); pushUnique(axes, "metadata_scope"); + const primitiveSelection = selectPrimitivesFromGraphAndCatalog({ + dataNeedGraph, + fallbackPrimitives: ["inspect_1c_metadata"], + requiredAxes: axes, + metadataSurface: input.metadataSurface, + actionFamily: action + }); return { semanticDataNeed: "1C metadata evidence", chainId: "metadata_inspection", @@ -359,11 +616,14 @@ function recipeFor(input: AssistantMcpDiscoveryPlannerInput): PlannerRecipe { } if (graphFactFamily === "movement_evidence") { - const primitiveSelection = primitivesFromGraphDecomposition({ - dataNeedGraph, - fallbackPrimitives: ["resolve_entity_reference", "query_movements", "probe_coverage"] - }); pushUnique(axes, "coverage_target"); + const primitiveSelection = selectPrimitivesFromGraphAndCatalog({ + dataNeedGraph, + fallbackPrimitives: ["resolve_entity_reference", "query_movements", "probe_coverage"], + requiredAxes: axes, + metadataSurface: input.metadataSurface, + actionFamily: action + }); return { semanticDataNeed: "movement evidence", chainId: "movement_evidence", @@ -376,11 +636,14 @@ function recipeFor(input: AssistantMcpDiscoveryPlannerInput): PlannerRecipe { } if (graphFactFamily === "document_evidence") { - const primitiveSelection = primitivesFromGraphDecomposition({ - dataNeedGraph, - fallbackPrimitives: ["resolve_entity_reference", "query_documents", "probe_coverage"] - }); pushUnique(axes, "coverage_target"); + const primitiveSelection = selectPrimitivesFromGraphAndCatalog({ + dataNeedGraph, + fallbackPrimitives: ["resolve_entity_reference", "query_documents", "probe_coverage"], + requiredAxes: axes, + metadataSurface: input.metadataSurface, + actionFamily: action + }); return { semanticDataNeed: "document evidence", chainId: "document_evidence", @@ -393,12 +656,15 @@ function recipeFor(input: AssistantMcpDiscoveryPlannerInput): PlannerRecipe { } if (graphFactFamily === "entity_grounding" || (!graphFactFamily && (dataNeedGraph?.subject_candidates.length ?? 0) > 0)) { - const primitiveSelection = primitivesFromGraphDecomposition({ - dataNeedGraph, - fallbackPrimitives: ["search_business_entity", "resolve_entity_reference", "probe_coverage"] - }); pushUnique(axes, "business_entity"); pushUnique(axes, "coverage_target"); + const primitiveSelection = selectPrimitivesFromGraphAndCatalog({ + dataNeedGraph, + fallbackPrimitives: ["search_business_entity", "resolve_entity_reference", "probe_coverage"], + requiredAxes: axes, + metadataSurface: input.metadataSurface, + actionFamily: action + }); return { semanticDataNeed: "entity discovery evidence", chainId: "entity_resolution", @@ -537,6 +803,7 @@ export function planAssistantMcpDiscovery( const budgetOverride = budgetOverrideFor(input, recipe); const semanticDataNeed = toNonEmptyString(input.semanticDataNeed) ?? recipe.semanticDataNeed; const dataNeedGraph = input.dataNeedGraph ?? null; + const metadataSurface = input.metadataSurface ?? null; const reasonCodes: string[] = []; pushReason(reasonCodes, recipe.reason); for (const reason of recipe.extraReasons ?? []) { @@ -545,6 +812,9 @@ export function planAssistantMcpDiscovery( if (dataNeedGraph) { pushReason(reasonCodes, "planner_consumed_data_need_graph_v1"); } + if (metadataSurface) { + pushReason(reasonCodes, "planner_consumed_metadata_surface_ref_v1"); + } if (budgetOverride.maxProbeCount) { pushReason(reasonCodes, "planner_enabled_chunked_coverage_probe_budget"); } @@ -596,12 +866,13 @@ export function planAssistantMcpDiscovery( planner_status: plannerStatus, semantic_data_need: semanticDataNeed, data_need_graph: dataNeedGraph, - selected_chain_id: recipe.chainId, - selected_chain_summary: recipe.chainSummary, - proposed_primitives: recipe.primitives, - required_axes: recipe.axes, - discovery_plan: plan, - catalog_review: adjustedReview, - reason_codes: reasonCodes - }; + metadata_surface_ref: metadataSurface, + selected_chain_id: recipe.chainId, + selected_chain_summary: recipe.chainSummary, + proposed_primitives: recipe.primitives, + required_axes: recipe.axes, + discovery_plan: plan, + catalog_review: adjustedReview, + reason_codes: reasonCodes + }; } diff --git a/llm_normalizer/backend/src/services/assistantMcpDiscoveryRuntimeBridge.ts b/llm_normalizer/backend/src/services/assistantMcpDiscoveryRuntimeBridge.ts index 3819f7c..48ca87f 100644 --- a/llm_normalizer/backend/src/services/assistantMcpDiscoveryRuntimeBridge.ts +++ b/llm_normalizer/backend/src/services/assistantMcpDiscoveryRuntimeBridge.ts @@ -9,6 +9,7 @@ import { } from "./assistantMcpDiscoveryPilotExecutor"; import { planAssistantMcpDiscovery, + type AssistantMcpDiscoveryMetadataSurfaceRef, type AssistantMcpDiscoveryPlannerContract } from "./assistantMcpDiscoveryPlanner"; import type { AssistantMcpDiscoveryDataNeedGraphContract } from "./assistantMcpDiscoveryDataNeedGraph"; @@ -27,6 +28,7 @@ export type AssistantMcpDiscoveryRuntimeBridgeStatus = export interface AssistantMcpDiscoveryRuntimeBridgeInput { semanticDataNeed?: string | null; dataNeedGraph?: AssistantMcpDiscoveryDataNeedGraphContract | null; + metadataSurface?: AssistantMcpDiscoveryMetadataSurfaceRef | null; turnMeaning?: AssistantMcpDiscoveryTurnMeaningRef | null; deps?: AssistantMcpDiscoveryPilotExecutorDeps; } @@ -101,6 +103,7 @@ export async function runAssistantMcpDiscoveryRuntimeBridge( const planner = planAssistantMcpDiscovery({ semanticDataNeed: input.semanticDataNeed, dataNeedGraph: input.dataNeedGraph, + metadataSurface: input.metadataSurface, turnMeaning: input.turnMeaning }); const pilot = await executeAssistantMcpDiscoveryPilot(planner, input.deps); diff --git a/llm_normalizer/backend/src/services/assistantMcpDiscoveryRuntimeEntryPoint.ts b/llm_normalizer/backend/src/services/assistantMcpDiscoveryRuntimeEntryPoint.ts index 44c438c..ca8b2db 100644 --- a/llm_normalizer/backend/src/services/assistantMcpDiscoveryRuntimeEntryPoint.ts +++ b/llm_normalizer/backend/src/services/assistantMcpDiscoveryRuntimeEntryPoint.ts @@ -102,6 +102,7 @@ export async function runAssistantMcpDiscoveryRuntimeEntryPoint( const bridge = await runAssistantMcpDiscoveryRuntimeBridge({ semanticDataNeed: turnInput.semantic_data_need, dataNeedGraph: turnInput.data_need_graph, + metadataSurface: turnInput.metadata_surface_ref, turnMeaning: turnInput.turn_meaning_ref, deps: input.deps }); diff --git a/llm_normalizer/backend/src/services/assistantMcpDiscoveryTurnInputAdapter.ts b/llm_normalizer/backend/src/services/assistantMcpDiscoveryTurnInputAdapter.ts index 69c036b..d64c06f 100644 --- a/llm_normalizer/backend/src/services/assistantMcpDiscoveryTurnInputAdapter.ts +++ b/llm_normalizer/backend/src/services/assistantMcpDiscoveryTurnInputAdapter.ts @@ -3,6 +3,11 @@ import { buildAssistantMcpDiscoveryDataNeedGraph, type AssistantMcpDiscoveryDataNeedGraphContract } from "./assistantMcpDiscoveryDataNeedGraph"; +import type { + AssistantMcpDiscoveryMetadataRecommendedPrimitive, + AssistantMcpDiscoveryMetadataRouteFamily, + AssistantMcpDiscoveryMetadataSurfaceRef +} from "./assistantMcpDiscoveryPlanner"; export const ASSISTANT_MCP_DISCOVERY_TURN_INPUT_SCHEMA_VERSION = "assistant_mcp_discovery_turn_input_v1" as const; @@ -30,6 +35,7 @@ export interface AssistantMcpDiscoveryTurnInputContract { should_run_discovery: boolean; semantic_data_need: string | null; data_need_graph: AssistantMcpDiscoveryDataNeedGraphContract | null; + metadata_surface_ref: AssistantMcpDiscoveryMetadataSurfaceRef | null; turn_meaning_ref: AssistantMcpDiscoveryTurnMeaningRef | null; source_signal: AssistantMcpDiscoveryTurnInputSource; reason_codes: string[]; @@ -203,6 +209,31 @@ function collectDateScopeFromFilters(filters: Record | null): s return periodFrom ?? periodTo ?? null; } +function normalizeMetadataRouteFamily(value: unknown): AssistantMcpDiscoveryMetadataRouteFamily | null { + const text = toNonEmptyString(value); + if (text === "document_evidence" || text === "movement_evidence" || text === "catalog_drilldown") { + return text; + } + return null; +} + +function normalizeMetadataRecommendedPrimitive( + value: unknown +): AssistantMcpDiscoveryMetadataRecommendedPrimitive | null { + const text = toNonEmptyString(value); + if (text === "query_documents" || text === "query_movements" || text === "drilldown_related_objects") { + return text; + } + return null; +} + +function normalizeMetadataRouteFamilySelectionBasis( + value: unknown +): AssistantMcpDiscoveryMetadataSurfaceRef["route_family_selection_basis"] { + const text = toNonEmptyString(value); + return text === "selected_entity_set" || text === "dominant_surface_objects" ? text : null; +} + function mapPilotScopeToFollowupMeaning( pilotScope: string | null ): { @@ -320,8 +351,11 @@ function collectFollowupDiscoverySeed(followupContext: Record | rankingNeed: string | null; organization: string | null; dateScope: string | null; - metadataRouteFamily: string | null; + metadataRouteFamily: AssistantMcpDiscoveryMetadataRouteFamily | null; + metadataRouteFamilySelectionBasis: AssistantMcpDiscoveryMetadataSurfaceRef["route_family_selection_basis"]; metadataSelectedEntitySet: string | null; + metadataSelectedSurfaceObjects: string[]; + metadataRecommendedNextPrimitive: AssistantMcpDiscoveryMetadataRecommendedPrimitive | null; metadataAmbiguityDetected: boolean; metadataAmbiguityEntitySets: string[]; } { @@ -369,13 +403,52 @@ function collectFollowupDiscoverySeed(followupContext: Record | rankingNeed: toNonEmptyString(followupContext?.previous_discovery_ranking_need), organization, dateScope, - metadataRouteFamily: toNonEmptyString(followupContext?.previous_discovery_metadata_route_family), + metadataRouteFamily: normalizeMetadataRouteFamily(followupContext?.previous_discovery_metadata_route_family), + metadataRouteFamilySelectionBasis: normalizeMetadataRouteFamilySelectionBasis( + followupContext?.previous_discovery_metadata_route_family_selection_basis + ), metadataSelectedEntitySet: toNonEmptyString(followupContext?.previous_discovery_metadata_selected_entity_set), + metadataSelectedSurfaceObjects: collectEntityCandidates( + followupContext?.previous_discovery_metadata_selected_surface_objects + ), + metadataRecommendedNextPrimitive: normalizeMetadataRecommendedPrimitive( + followupContext?.previous_discovery_metadata_recommended_next_primitive + ), metadataAmbiguityDetected: followupContext?.previous_discovery_metadata_ambiguity_detected === true, metadataAmbiguityEntitySets: collectEntityCandidates(followupContext?.previous_discovery_metadata_ambiguity_entity_sets) }; } +function buildMetadataSurfaceRef( + followupSeed: ReturnType +): AssistantMcpDiscoveryMetadataSurfaceRef | null { + if (followupSeed.pilotScope !== "metadata_inspection_v1") { + return null; + } + const hasPayload = + Boolean( + followupSeed.metadataRouteFamily || + followupSeed.metadataSelectedEntitySet || + followupSeed.metadataRecommendedNextPrimitive || + followupSeed.metadataRouteFamilySelectionBasis + ) || + followupSeed.metadataSelectedSurfaceObjects.length > 0 || + followupSeed.metadataAmbiguityDetected || + followupSeed.metadataAmbiguityEntitySets.length > 0; + if (!hasPayload) { + return null; + } + return { + selected_entity_set: followupSeed.metadataSelectedEntitySet, + selected_surface_objects: followupSeed.metadataSelectedSurfaceObjects, + downstream_route_family: followupSeed.metadataRouteFamily, + route_family_selection_basis: followupSeed.metadataRouteFamilySelectionBasis, + recommended_next_primitive: followupSeed.metadataRecommendedNextPrimitive, + ambiguity_detected: followupSeed.metadataAmbiguityDetected, + ambiguity_entity_sets: followupSeed.metadataAmbiguityEntitySets + }; +} + function metadataEntitySetsSuggestDocumentLane(values: string[]): boolean { return values.some((value) => /(?:документ|document|invoice|waybill|накладн|счет[- ]?фактур|акт)/iu.test(value)); } @@ -401,7 +474,7 @@ function hasLifecycleSignal(text: string): boolean { } function hasValueFlowSignal(text: string): boolean { - return /(?:оборот|выручк|оплат|плат[её]ж|заплат|перечисл|списан|расход|исходящ|входящ|получ(?:ил|ено|ен)|поступил|поступлен|денежн[а-яёa-z0-9_-]*\s+поток|supplier|value[-\s]?flow|turnover|revenue|payment|payout|outflow|cash\s+flow)/iu.test( + return /(?:оборот|выручк|оплат|плат[её]ж|заплат|перечисл|списан|расход|исходящ|входящ|получ(?:ил|ено|ен)|поступил|поступлен|денежн[а-яёa-z0-9_-]*\s+поток|(? 0) { pushReason(reasonCodes, "mcp_discovery_entity_scope_available"); } @@ -1463,6 +1563,7 @@ export function buildAssistantMcpDiscoveryTurnInput( should_run_discovery: runDiscovery, semantic_data_need: runDiscovery ? semanticDataNeed : null, data_need_graph: dataNeedGraph, + metadata_surface_ref: runDiscovery ? metadataSurfaceRef : null, turn_meaning_ref: runDiscovery && hasTurnMeaning ? cleanTurnMeaning : null, source_signal: sourceSignal, reason_codes: reasonCodes diff --git a/llm_normalizer/backend/src/services/assistantTransitionPolicy.ts b/llm_normalizer/backend/src/services/assistantTransitionPolicy.ts index b157852..e370198 100644 --- a/llm_normalizer/backend/src/services/assistantTransitionPolicy.ts +++ b/llm_normalizer/backend/src/services/assistantTransitionPolicy.ts @@ -16,7 +16,10 @@ import { readAssistantMcpDiscoveryEntityAmbiguityCandidates, readAssistantMcpDiscoveryEntityResolutionStatus, readAssistantMcpDiscoveryMetadataRouteFamily, + readAssistantMcpDiscoveryMetadataRouteFamilySelectionBasis, readAssistantMcpDiscoveryMetadataSelectedEntitySet, + readAssistantMcpDiscoveryMetadataSelectedSurfaceObjects, + readAssistantMcpDiscoveryMetadataRecommendedNextPrimitive, readAssistantMcpDiscoveryRankingNeed, readAddressDebugTemporalScope, readAssistantMcpDiscoveryPilotScope, @@ -665,10 +668,18 @@ export function createAssistantTransitionPolicy(deps) { carryoverSourceDebug, deps.toNonEmptyString ); + const sourceDiscoveryMetadataRouteFamilySelectionBasis = + readAssistantMcpDiscoveryMetadataRouteFamilySelectionBasis(carryoverSourceDebug, deps.toNonEmptyString); const sourceDiscoveryMetadataSelectedEntitySet = readAssistantMcpDiscoveryMetadataSelectedEntitySet( carryoverSourceDebug, deps.toNonEmptyString ); + const sourceDiscoveryMetadataSelectedSurfaceObjects = readAssistantMcpDiscoveryMetadataSelectedSurfaceObjects( + carryoverSourceDebug, + deps.toNonEmptyString + ); + const sourceDiscoveryMetadataRecommendedNextPrimitive = + readAssistantMcpDiscoveryMetadataRecommendedNextPrimitive(carryoverSourceDebug, deps.toNonEmptyString); const sourceDiscoveryMetadataAmbiguityDetected = readAssistantMcpDiscoveryMetadataAmbiguityDetected( carryoverSourceDebug ); @@ -1037,7 +1048,15 @@ export function createAssistantTransitionPolicy(deps) { ? sourceDiscoveryEntityAmbiguityCandidates : undefined, previous_discovery_metadata_route_family: sourceDiscoveryMetadataRouteFamily ?? undefined, + previous_discovery_metadata_route_family_selection_basis: + sourceDiscoveryMetadataRouteFamilySelectionBasis ?? undefined, previous_discovery_metadata_selected_entity_set: sourceDiscoveryMetadataSelectedEntitySet ?? undefined, + previous_discovery_metadata_selected_surface_objects: + sourceDiscoveryMetadataSelectedSurfaceObjects.length > 0 + ? sourceDiscoveryMetadataSelectedSurfaceObjects + : undefined, + previous_discovery_metadata_recommended_next_primitive: + sourceDiscoveryMetadataRecommendedNextPrimitive ?? undefined, previous_discovery_metadata_ambiguity_detected: sourceDiscoveryMetadataAmbiguityDetected || undefined, previous_discovery_metadata_ambiguity_entity_sets: sourceDiscoveryMetadataAmbiguityEntitySets.length > 0 ? sourceDiscoveryMetadataAmbiguityEntitySets : undefined, diff --git a/llm_normalizer/backend/tests/assistantMcpCatalogIndex.test.ts b/llm_normalizer/backend/tests/assistantMcpCatalogIndex.test.ts index fad98e1..adfec43 100644 --- a/llm_normalizer/backend/tests/assistantMcpCatalogIndex.test.ts +++ b/llm_normalizer/backend/tests/assistantMcpCatalogIndex.test.ts @@ -4,7 +4,9 @@ import { buildAssistantMcpCatalogIndex, getAssistantMcpCatalogPrimitive, reviewAssistantMcpDiscoveryPlanAgainstCatalog, - searchAssistantMcpCatalogPrimitivesByDecompositionCandidates + searchAssistantMcpCatalogPrimitivesByDecompositionCandidates, + searchAssistantMcpCatalogPrimitivesByFactAxis, + searchAssistantMcpCatalogPrimitivesByMetadataSurface } from "../src/services/assistantMcpCatalogIndex"; describe("assistant MCP catalog index", () => { @@ -18,6 +20,9 @@ describe("assistant MCP catalog index", () => { expect(entry.safe_for_model_planning).toBe(true); expect(entry.runtime_must_execute).toBe(true); expect(entry.decomposition_hints.length).toBeGreaterThan(0); + expect(Array.isArray(entry.supported_fact_families)).toBe(true); + expect(Array.isArray(entry.supported_action_families)).toBe(true); + expect(Array.isArray(entry.planning_tags)).toBe(true); expect(entry.required_axes_any_of.length).toBeGreaterThan(0); expect(entry.output_fact_kinds.length).toBeGreaterThan(0); } @@ -55,6 +60,41 @@ describe("assistant MCP catalog index", () => { expect(primitives).toEqual(["query_movements", "probe_coverage"]); }); + it("can search reviewed primitives directly from fact family and required axes", () => { + const primitives = searchAssistantMcpCatalogPrimitivesByFactAxis({ + business_fact_family: "document_evidence", + action_family: "list_documents", + has_subject_candidates: true, + required_axes: ["counterparty", "period", "coverage_target"] + }); + + expect(primitives).toEqual(["resolve_entity_reference", "query_documents", "probe_coverage"]); + }); + + it("can search reviewed primitives directly from a confirmed document metadata surface", () => { + const primitives = searchAssistantMcpCatalogPrimitivesByMetadataSurface({ + downstream_route_family: "document_evidence", + selected_entity_set: "Document", + selected_surface_objects: ["Document.InvoiceIssued"], + recommended_next_primitive: "query_documents", + required_axes: ["counterparty", "period", "coverage_target"] + }); + + expect(primitives).toEqual(["query_documents", "resolve_entity_reference", "probe_coverage"]); + }); + + it("can search reviewed primitives directly from a confirmed movement metadata surface", () => { + const primitives = searchAssistantMcpCatalogPrimitivesByMetadataSurface({ + downstream_route_family: "movement_evidence", + selected_entity_set: "AccumulationRegister", + selected_surface_objects: ["Register.BankOperations"], + recommended_next_primitive: "query_movements", + required_axes: ["counterparty", "period", "coverage_target"] + }); + + expect(primitives).toEqual(["query_movements", "resolve_entity_reference", "probe_coverage"]); + }); + it("marks a counterparty turnover discovery plan as catalog-compatible when required axes exist", () => { const plan = buildAssistantMcpDiscoveryPlan({ semanticDataNeed: "counterparty turnover evidence", diff --git a/llm_normalizer/backend/tests/assistantMcpDiscoveryAnswerAdapter.test.ts b/llm_normalizer/backend/tests/assistantMcpDiscoveryAnswerAdapter.test.ts index 1f22844..bcece6b 100644 --- a/llm_normalizer/backend/tests/assistantMcpDiscoveryAnswerAdapter.test.ts +++ b/llm_normalizer/backend/tests/assistantMcpDiscoveryAnswerAdapter.test.ts @@ -311,6 +311,42 @@ describe("assistant MCP discovery answer adapter", () => { expect(draft.must_not_claim).toContain("Do not claim rows were checked when mcp_execution_performed=false."); }); + it("keeps metadata lane-choice clarification human-facing when planner selects it from data-need graph", async () => { + const planner = planAssistantMcpDiscovery({ + dataNeedGraph: { + schema_version: "assistant_data_need_graph_v1", + policy_owner: "assistantMcpDiscoveryDataNeedGraph", + subject_candidates: ["НДС"], + business_fact_family: "metadata_surface", + action_family: "resolve_next_lane", + aggregation_need: null, + time_scope_need: null, + comparison_need: null, + ranking_need: null, + proof_expectation: "supporting_evidence", + clarification_gaps: ["lane_family_choice"], + decomposition_candidates: [], + forbidden_overclaim_flags: ["no_raw_model_claims"], + reason_codes: ["data_need_graph_built", "data_need_graph_requires_lane_family_choice"] + }, + turnMeaning: { + asked_domain_family: "metadata", + asked_action_family: "resolve_next_lane", + explicit_entity_candidates: ["НДС"], + unsupported_but_understood_family: "metadata_lane_choice_clarification" + } + }); + const pilot = await executeAssistantMcpDiscoveryPilot(planner, buildDeps([])); + + const draft = buildAssistantMcpDiscoveryAnswerDraft(pilot); + + expect(draft.answer_mode).toBe("needs_clarification"); + expect(draft.headline).toContain("data-lane"); + expect(draft.next_step_line).toContain("по документам"); + expect(draft.next_step_line).toContain("по движениям/регистрам"); + expect(draft.next_step_line).not.toContain("Уточните контрагента"); + }); + it("keeps movement clarification anchored to the chosen lane after metadata ambiguity was resolved", async () => { const planner = planAssistantMcpDiscovery({ turnMeaning: { diff --git a/llm_normalizer/backend/tests/assistantMcpDiscoveryDataNeedGraph.test.ts b/llm_normalizer/backend/tests/assistantMcpDiscoveryDataNeedGraph.test.ts index 1ed8fd1..b94835a 100644 --- a/llm_normalizer/backend/tests/assistantMcpDiscoveryDataNeedGraph.test.ts +++ b/llm_normalizer/backend/tests/assistantMcpDiscoveryDataNeedGraph.test.ts @@ -181,4 +181,29 @@ describe("assistant MCP discovery data need graph", () => { expect(result.reason_codes).toContain("data_need_graph_open_scope_total_without_subject"); expect(result.reason_codes).toContain("data_need_graph_open_scope_total_needs_organization"); }); + + it("treats all-time open-scope totals as an open-ended period rather than a missing period", () => { + const result = buildAssistantMcpDiscoveryDataNeedGraph({ + semanticDataNeed: "counterparty value-flow evidence", + rawUtterance: "сколько вообще денег мы заработали за все время?", + turnMeaning: { + asked_domain_family: "counterparty_value", + asked_action_family: "turnover", + explicit_organization_scope: "ООО Альтернатива Плюс" + } + }); + + expect(result.business_fact_family).toBe("value_flow"); + expect(result.subject_candidates).toEqual([]); + expect(result.time_scope_need).toBe("all_time_scope"); + expect(result.clarification_gaps).toEqual([]); + expect(result.proof_expectation).toBe("coverage_checked_fact"); + expect(result.decomposition_candidates).toEqual([ + "collect_scoped_movements", + "aggregate_checked_amounts", + "probe_coverage" + ]); + expect(result.reason_codes).toContain("data_need_graph_open_scope_total_without_subject"); + expect(result.reason_codes).toContain("data_need_graph_all_time_scope_hint"); + }); }); diff --git a/llm_normalizer/backend/tests/assistantMcpDiscoveryPilotExecutor.test.ts b/llm_normalizer/backend/tests/assistantMcpDiscoveryPilotExecutor.test.ts index 2a8f437..13d3dcd 100644 --- a/llm_normalizer/backend/tests/assistantMcpDiscoveryPilotExecutor.test.ts +++ b/llm_normalizer/backend/tests/assistantMcpDiscoveryPilotExecutor.test.ts @@ -378,6 +378,105 @@ describe("assistant MCP discovery pilot executor", () => { ); }); + it("selects a downstream lane from dominant metadata surface objects when one family clearly prevails", async () => { + const planner = planAssistantMcpDiscovery({ + turnMeaning: { + asked_domain_family: "metadata", + asked_action_family: "inspect_surface", + explicit_entity_candidates: ["НДС"] + } + }); + const deps = buildMetadataDeps([ + { + FullName: "Документ.СчетФактураВыданный", + MetaType: "Документ", + attributes: [{ Name: "Дата" }] + }, + { + FullName: "Документ.СчетФактураПолученный", + MetaType: "Документ", + attributes: [{ Name: "Контрагент" }] + }, + { + FullName: "РегистрНакопления.НДСПокупок", + MetaType: "РегистрНакопления", + resources: [{ Name: "СуммаНДС" }] + } + ]); + + const result = await executeAssistantMcpDiscoveryPilot(planner, deps); + + expect(result.pilot_status).toBe("executed"); + expect(result.derived_metadata_surface).toMatchObject({ + metadata_scope: "НДС", + available_entity_sets: ["Документ", "РегистрНакопления"], + selected_entity_set: null, + selected_surface_objects: [ + "Документ.СчетФактураВыданный", + "Документ.СчетФактураПолученный" + ], + surface_family_scores: { + document_evidence: 2, + movement_evidence: 1, + catalog_drilldown: 0 + }, + downstream_route_family: "document_evidence", + route_family_selection_basis: "dominant_surface_objects", + recommended_next_primitive: "query_documents", + ambiguity_detected: false, + ambiguity_entity_sets: [] + }); + expect(result.reason_codes).toContain("pilot_selected_metadata_route_family_from_dominant_surface_objects"); + expect(result.evidence.inferred_facts).toContain( + "A likely next checked lane may be inferred as document_evidence from the confirmed metadata surface" + ); + expect(result.evidence.unknown_facts).not.toContain( + "Exact downstream metadata surface remains ambiguous across: Документ, РегистрНакопления" + ); + }); + + it("keeps metadata ambiguity unresolved when surface-family scores are nearly tied", async () => { + const planner = planAssistantMcpDiscovery({ + turnMeaning: { + asked_domain_family: "metadata", + asked_action_family: "inspect_surface", + explicit_entity_candidates: ["НДС"] + } + }); + const deps = buildMetadataDeps([ + { FullName: "Документ.A", MetaType: "Документ" }, + { FullName: "Документ.B", MetaType: "Документ" }, + { FullName: "Документ.C", MetaType: "Документ" }, + { FullName: "Документ.D", MetaType: "Документ" }, + { FullName: "РегистрНакопления.A", MetaType: "РегистрНакопления" }, + { FullName: "РегистрНакопления.B", MetaType: "РегистрНакопления" }, + { FullName: "РегистрНакопления.C", MetaType: "РегистрНакопления" } + ]); + + const result = await executeAssistantMcpDiscoveryPilot(planner, deps); + + expect(result.pilot_status).toBe("executed"); + expect(result.derived_metadata_surface).toMatchObject({ + metadata_scope: "НДС", + selected_entity_set: null, + surface_family_scores: { + document_evidence: 4, + movement_evidence: 3, + catalog_drilldown: 0 + }, + downstream_route_family: null, + route_family_selection_basis: null, + recommended_next_primitive: null, + ambiguity_detected: true + }); + expect(result.derived_metadata_surface?.ambiguity_entity_sets).toContain("Документ"); + expect(result.derived_metadata_surface?.ambiguity_entity_sets).toContain("РегистрНакопления"); + expect(result.reason_codes).not.toContain("pilot_selected_metadata_route_family_from_dominant_surface_objects"); + expect(result.evidence.unknown_facts).toContain( + "Exact downstream metadata surface remains ambiguous across: Документ, РегистрНакопления" + ); + }); + it("infers metadata entity-set families from object names when meta type columns are absent", async () => { const planner = planAssistantMcpDiscovery({ turnMeaning: { diff --git a/llm_normalizer/backend/tests/assistantMcpDiscoveryPlanner.test.ts b/llm_normalizer/backend/tests/assistantMcpDiscoveryPlanner.test.ts index 60087b0..e9b7c37 100644 --- a/llm_normalizer/backend/tests/assistantMcpDiscoveryPlanner.test.ts +++ b/llm_normalizer/backend/tests/assistantMcpDiscoveryPlanner.test.ts @@ -113,6 +113,110 @@ describe("assistant MCP discovery planner", () => { expect(result.required_axes).toEqual(["counterparty", "coverage_target"]); }); + it("expands a document evidence chain from catalog fact-axis search when decomposition hints are absent", () => { + const result = planAssistantMcpDiscovery({ + dataNeedGraph: { + schema_version: "assistant_data_need_graph_v1", + policy_owner: "assistantMcpDiscoveryDataNeedGraph", + subject_candidates: ["SVK"], + business_fact_family: "document_evidence", + action_family: "list_documents", + aggregation_need: null, + time_scope_need: "explicit_period", + comparison_need: null, + ranking_need: null, + proof_expectation: "coverage_checked_fact", + clarification_gaps: [], + decomposition_candidates: [], + forbidden_overclaim_flags: ["no_raw_model_claims", "no_unchecked_fact_totals"], + reason_codes: ["data_need_graph_built"] + }, + turnMeaning: { + asked_domain_family: "documents", + asked_action_family: "list_documents", + explicit_entity_candidates: ["SVK"], + explicit_date_scope: "2020", + unsupported_but_understood_family: "document_evidence" + } + }); + + expect(result.planner_status).toBe("ready_for_execution"); + expect(result.selected_chain_id).toBe("document_evidence"); + expect(result.proposed_primitives).toEqual(["resolve_entity_reference", "query_documents", "probe_coverage"]); + expect(result.reason_codes).toContain("planner_selected_catalog_primitives_from_fact_axis_search"); + expect(result.reason_codes).not.toContain("planner_fell_back_to_recipe_primitives_after_empty_catalog_search"); + }); + + it("filters conflicting document-vs-movement primitives when confirmed metadata surface recommends documents", () => { + const result = planAssistantMcpDiscovery({ + dataNeedGraph: { + schema_version: "assistant_data_need_graph_v1", + policy_owner: "assistantMcpDiscoveryDataNeedGraph", + subject_candidates: ["SVK"], + business_fact_family: "document_evidence", + action_family: "list_documents", + aggregation_need: null, + time_scope_need: "explicit_period", + comparison_need: null, + ranking_need: null, + proof_expectation: "coverage_checked_fact", + clarification_gaps: [], + decomposition_candidates: ["fetch_scoped_documents", "fetch_scoped_movements", "probe_coverage"], + forbidden_overclaim_flags: ["no_raw_model_claims", "no_unchecked_fact_totals"], + reason_codes: ["data_need_graph_built"] + }, + metadataSurface: { + selected_entity_set: "Документ", + selected_surface_objects: ["Документ.СчетФактураВыданный"], + downstream_route_family: "document_evidence", + route_family_selection_basis: "selected_entity_set", + recommended_next_primitive: "query_documents", + ambiguity_detected: false, + ambiguity_entity_sets: [] + }, + turnMeaning: { + asked_domain_family: "documents", + asked_action_family: "list_documents", + explicit_entity_candidates: ["SVK"], + explicit_date_scope: "2020", + unsupported_but_understood_family: "document_evidence" + } + }); + + expect(result.planner_status).toBe("ready_for_execution"); + expect(result.metadata_surface_ref?.recommended_next_primitive).toBe("query_documents"); + expect(result.proposed_primitives).toEqual(["resolve_entity_reference", "query_documents", "probe_coverage"]); + expect(result.proposed_primitives).not.toContain("query_movements"); + expect(result.reason_codes).toContain("planner_consumed_metadata_surface_ref_v1"); + expect(result.reason_codes).toContain("planner_filtered_catalog_primitives_by_confirmed_metadata_surface"); + expect(result.reason_codes).toContain("planner_surface_aware_next_lane_from_confirmed_metadata_objects"); + }); + + it("can select document evidence directly from a confirmed metadata surface when the follow-up itself is thin", () => { + const result = planAssistantMcpDiscovery({ + metadataSurface: { + selected_entity_set: "Document", + selected_surface_objects: ["Document.InvoiceIssued"], + downstream_route_family: "document_evidence", + route_family_selection_basis: "selected_entity_set", + recommended_next_primitive: "query_documents", + ambiguity_detected: false, + ambiguity_entity_sets: [] + }, + turnMeaning: { + explicit_entity_candidates: ["SVK"], + explicit_date_scope: "2020" + } + }); + + expect(result.planner_status).toBe("ready_for_execution"); + expect(result.selected_chain_id).toBe("document_evidence"); + expect(result.proposed_primitives).toEqual(["resolve_entity_reference", "query_documents", "probe_coverage"]); + expect(result.required_axes).toEqual(["counterparty", "period", "coverage_target"]); + expect(result.reason_codes).toContain("planner_selected_document_from_confirmed_metadata_surface_ref"); + expect(result.reason_codes).toContain("planner_selected_catalog_primitives_from_metadata_surface_search"); + }); + it("builds a movement discovery plan without aggregating value-flow totals", () => { const result = planAssistantMcpDiscovery({ dataNeedGraph: { @@ -151,6 +255,141 @@ describe("assistant MCP discovery planner", () => { expect(result.reason_codes).toContain("planner_selected_catalog_primitives_from_decomposition_candidates"); }); + it("filters conflicting document-vs-movement primitives when confirmed metadata surface recommends movements", () => { + const result = planAssistantMcpDiscovery({ + dataNeedGraph: { + schema_version: "assistant_data_need_graph_v1", + policy_owner: "assistantMcpDiscoveryDataNeedGraph", + subject_candidates: ["SVK"], + business_fact_family: "movement_evidence", + action_family: "list_movements", + aggregation_need: null, + time_scope_need: "explicit_period", + comparison_need: null, + ranking_need: null, + proof_expectation: "coverage_checked_fact", + clarification_gaps: [], + decomposition_candidates: ["fetch_scoped_documents", "fetch_scoped_movements", "probe_coverage"], + forbidden_overclaim_flags: ["no_raw_model_claims", "no_unchecked_fact_totals"], + reason_codes: ["data_need_graph_built"] + }, + metadataSurface: { + selected_entity_set: "РегистрНакопления", + selected_surface_objects: ["РегистрНакопления.ДвиженияДенежныхСредств"], + downstream_route_family: "movement_evidence", + route_family_selection_basis: "dominant_surface_objects", + recommended_next_primitive: "query_movements", + ambiguity_detected: false, + ambiguity_entity_sets: [] + }, + turnMeaning: { + asked_domain_family: "movements", + asked_action_family: "list_movements", + explicit_entity_candidates: ["SVK"], + explicit_date_scope: "2020", + unsupported_but_understood_family: "movement_evidence" + } + }); + + expect(result.planner_status).toBe("ready_for_execution"); + expect(result.metadata_surface_ref?.recommended_next_primitive).toBe("query_movements"); + expect(result.proposed_primitives).toEqual(["resolve_entity_reference", "query_movements", "probe_coverage"]); + expect(result.proposed_primitives).not.toContain("query_documents"); + expect(result.reason_codes).toContain("planner_filtered_catalog_primitives_by_confirmed_metadata_surface"); + }); + + it("can select movement evidence directly from a confirmed metadata surface when the follow-up itself is thin", () => { + const result = planAssistantMcpDiscovery({ + metadataSurface: { + selected_entity_set: "AccumulationRegister", + selected_surface_objects: ["Register.BankOperations"], + downstream_route_family: "movement_evidence", + route_family_selection_basis: "dominant_surface_objects", + recommended_next_primitive: "query_movements", + ambiguity_detected: false, + ambiguity_entity_sets: [] + }, + turnMeaning: { + explicit_entity_candidates: ["SVK"], + explicit_date_scope: "2020" + } + }); + + expect(result.planner_status).toBe("ready_for_execution"); + expect(result.selected_chain_id).toBe("movement_evidence"); + expect(result.proposed_primitives).toEqual(["resolve_entity_reference", "query_movements", "probe_coverage"]); + expect(result.required_axes).toEqual(["counterparty", "period", "coverage_target"]); + expect(result.reason_codes).toContain("planner_selected_movement_from_confirmed_metadata_surface_ref"); + expect(result.reason_codes).toContain("planner_selected_catalog_primitives_from_metadata_surface_search"); + }); + + it("does not force a lane from ambiguous metadata surface even when decomposition hints mention both documents and movements", () => { + const result = planAssistantMcpDiscovery({ + dataNeedGraph: { + schema_version: "assistant_data_need_graph_v1", + policy_owner: "assistantMcpDiscoveryDataNeedGraph", + subject_candidates: ["SVK"], + business_fact_family: "document_evidence", + action_family: "list_documents", + aggregation_need: null, + time_scope_need: "explicit_period", + comparison_need: null, + ranking_need: null, + proof_expectation: "coverage_checked_fact", + clarification_gaps: [], + decomposition_candidates: ["fetch_scoped_documents", "fetch_scoped_movements", "probe_coverage"], + forbidden_overclaim_flags: ["no_raw_model_claims", "no_unchecked_fact_totals"], + reason_codes: ["data_need_graph_built"] + }, + metadataSurface: { + selected_entity_set: null, + selected_surface_objects: ["Документ.СчетФактураВыданный", "РегистрНакопления.ДвиженияДенежныхСредств"], + downstream_route_family: null, + route_family_selection_basis: null, + recommended_next_primitive: null, + ambiguity_detected: true, + ambiguity_entity_sets: ["Документ", "РегистрНакопления"] + }, + turnMeaning: { + asked_domain_family: "documents", + asked_action_family: "list_documents", + explicit_entity_candidates: ["SVK"], + explicit_date_scope: "2020", + unsupported_but_understood_family: "document_evidence" + } + }); + + expect(result.planner_status).toBe("ready_for_execution"); + expect(result.reason_codes).toContain("planner_consumed_metadata_surface_ref_v1"); + expect(result.reason_codes).not.toContain("planner_filtered_catalog_primitives_by_confirmed_metadata_surface"); + expect(result.proposed_primitives).toContain("query_documents"); + expect(result.proposed_primitives).toContain("query_movements"); + }); + + it("does not force a thin follow-up into a lane when the carried metadata surface is still ambiguous", () => { + const result = planAssistantMcpDiscovery({ + metadataSurface: { + selected_entity_set: null, + selected_surface_objects: ["Document.InvoiceIssued", "Register.BankOperations"], + downstream_route_family: null, + route_family_selection_basis: null, + recommended_next_primitive: null, + ambiguity_detected: true, + ambiguity_entity_sets: ["Document", "AccumulationRegister"] + }, + turnMeaning: { + explicit_entity_candidates: ["SVK"], + explicit_date_scope: "2020" + } + }); + + expect(result.planner_status).toBe("ready_for_execution"); + expect(result.selected_chain_id).toBe("entity_resolution"); + expect(result.reason_codes).toContain("planner_selected_entity_resolution_recipe"); + expect(result.reason_codes).not.toContain("planner_selected_document_from_confirmed_metadata_surface_ref"); + expect(result.reason_codes).not.toContain("planner_selected_movement_from_confirmed_metadata_surface_ref"); + }); + it("can select value-flow chain from data need graph even when turn meaning family is still under-specified", () => { const result = planAssistantMcpDiscovery({ dataNeedGraph: { diff --git a/llm_normalizer/backend/tests/assistantMcpDiscoveryRuntimeBridge.test.ts b/llm_normalizer/backend/tests/assistantMcpDiscoveryRuntimeBridge.test.ts index 27a223d..d4f1f42 100644 --- a/llm_normalizer/backend/tests/assistantMcpDiscoveryRuntimeBridge.test.ts +++ b/llm_normalizer/backend/tests/assistantMcpDiscoveryRuntimeBridge.test.ts @@ -253,6 +253,41 @@ describe("assistant MCP discovery runtime bridge", () => { expect(result.reason_codes).toContain("runtime_bridge_status_checked_sources_only"); }); + it("keeps document evidence executable when the planner expands primitives from fact-axis search", async () => { + const result = await runAssistantMcpDiscoveryRuntimeBridge({ + dataNeedGraph: { + schema_version: "assistant_data_need_graph_v1", + policy_owner: "assistantMcpDiscoveryDataNeedGraph", + subject_candidates: ["SVK"], + business_fact_family: "document_evidence", + action_family: "list_documents", + aggregation_need: null, + time_scope_need: "explicit_period", + comparison_need: null, + ranking_need: null, + proof_expectation: "coverage_checked_fact", + clarification_gaps: [], + decomposition_candidates: [], + forbidden_overclaim_flags: ["no_raw_model_claims", "no_unchecked_fact_totals"], + reason_codes: ["data_need_graph_built"] + }, + turnMeaning: { + asked_domain_family: "documents", + asked_action_family: "list_documents", + explicit_entity_candidates: ["SVK"], + explicit_date_scope: "2020", + unsupported_but_understood_family: "document_evidence" + }, + deps: buildDeps([{ Period: "2020-01-15T00:00:00", Registrator: "DOC-1", Counterparty: "SVK" }]) + }); + + expect(result.bridge_status).toBe("answer_draft_ready"); + expect(result.planner.selected_chain_id).toBe("document_evidence"); + expect(result.planner.proposed_primitives).toEqual(["resolve_entity_reference", "query_documents", "probe_coverage"]); + expect(result.planner.reason_codes).toContain("planner_selected_catalog_primitives_from_fact_axis_search"); + expect(result.business_fact_answer_allowed).toBe(true); + }); + it("preserves the answer adapter boundary against internal mechanics leakage", async () => { const result = await runAssistantMcpDiscoveryRuntimeBridge({ turnMeaning: { diff --git a/llm_normalizer/backend/tests/assistantMcpDiscoveryTurnInputAdapter.test.ts b/llm_normalizer/backend/tests/assistantMcpDiscoveryTurnInputAdapter.test.ts index 6e4a1d4..761b72a 100644 --- a/llm_normalizer/backend/tests/assistantMcpDiscoveryTurnInputAdapter.test.ts +++ b/llm_normalizer/backend/tests/assistantMcpDiscoveryTurnInputAdapter.test.ts @@ -801,7 +801,10 @@ describe("assistant MCP discovery turn input adapter", () => { followupContext: { previous_discovery_pilot_scope: "metadata_inspection_v1", previous_discovery_metadata_route_family: "document_evidence", + previous_discovery_metadata_route_family_selection_basis: "selected_entity_set", previous_discovery_metadata_selected_entity_set: "Документ", + previous_discovery_metadata_selected_surface_objects: ["Документ.СчетФактураВыданный"], + previous_discovery_metadata_recommended_next_primitive: "query_documents", previous_filters: { counterparty: "SVK", period_from: "2020-01-01", @@ -824,7 +827,18 @@ describe("assistant MCP discovery turn input adapter", () => { unsupported_but_understood_family: "document_evidence", stale_replay_forbidden: true }); + expect(result.metadata_surface_ref).toMatchObject({ + selected_entity_set: "Документ", + selected_surface_objects: ["Документ.СчетФактураВыданный"], + downstream_route_family: "document_evidence", + route_family_selection_basis: "selected_entity_set", + recommended_next_primitive: "query_documents", + ambiguity_detected: false, + ambiguity_entity_sets: [] + }); expect(result.reason_codes).toContain("mcp_discovery_metadata_grounded_document_followup"); + expect(result.reason_codes).toContain("mcp_discovery_metadata_surface_ref_from_followup_context"); + expect(result.reason_codes).toContain("mcp_discovery_metadata_next_primitive_from_followup_context"); expect(result.reason_codes).toContain("mcp_discovery_counterparty_from_followup_context"); }); @@ -1164,6 +1178,15 @@ describe("assistant MCP discovery turn input adapter", () => { unsupported_but_understood_family: "metadata_lane_choice_clarification", stale_replay_forbidden: true }); + expect(result.metadata_surface_ref).toMatchObject({ + selected_entity_set: null, + selected_surface_objects: [], + downstream_route_family: null, + route_family_selection_basis: null, + recommended_next_primitive: null, + ambiguity_detected: true, + ambiguity_entity_sets: ["Документ", "РегистрНакопления"] + }); expect(result.reason_codes).toContain("mcp_discovery_metadata_ambiguity_requires_lane_choice"); }); @@ -1477,6 +1500,37 @@ describe("assistant MCP discovery turn input adapter", () => { expect(result.data_need_graph?.clarification_gaps).toEqual([]); }); + it("does not keep a stale follow-up date when the user switches an open-scope total to all-time wording", () => { + const orgName = "ООО Альтернатива Плюс"; + const result = buildAssistantMcpDiscoveryTurnInput({ + userMessage: "сколько вообще денег мы заработали за все время?", + followupContext: { + previous_discovery_pilot_scope: "counterparty_value_flow_query_movements_v1", + previous_filters: { + organization: orgName, + as_of_date: "2026-04-23" + } + } + }); + + expect(result.adapter_status).toBe("ready"); + expect(result.should_run_discovery).toBe(true); + expect(result.semantic_data_need).toBe("counterparty value-flow evidence"); + expect(result.turn_meaning_ref).toMatchObject({ + asked_domain_family: "counterparty_value", + asked_action_family: "turnover", + explicit_organization_scope: orgName, + unsupported_but_understood_family: "counterparty_value_or_turnover", + stale_replay_forbidden: true + }); + expect(result.turn_meaning_ref?.explicit_date_scope).toBeUndefined(); + expect(result.reason_codes).toContain("mcp_discovery_all_time_scope_signal_detected"); + expect(result.reason_codes).not.toContain("mcp_discovery_date_scope_from_followup_context"); + expect(result.data_need_graph?.clarification_gaps).toEqual([]); + expect(result.data_need_graph?.time_scope_need).toBe("all_time_scope"); + expect(result.data_need_graph?.reason_codes).toContain("data_need_graph_all_time_scope_hint"); + }); + it("resumes an open-scope ranking from follow-up context when the user clarifies only the organization", () => { const orgName = "РћРћРћ Альтернатива Плюс"; const result = buildAssistantMcpDiscoveryTurnInput({ diff --git a/llm_normalizer/backend/tests/assistantTransitionPolicy.test.ts b/llm_normalizer/backend/tests/assistantTransitionPolicy.test.ts index c468fec..90b4c59 100644 --- a/llm_normalizer/backend/tests/assistantTransitionPolicy.test.ts +++ b/llm_normalizer/backend/tests/assistantTransitionPolicy.test.ts @@ -1365,7 +1365,10 @@ describe("assistantTransitionPolicy", () => { pilot_scope: "metadata_inspection_v1", derived_metadata_surface: { selected_entity_set: "Документ", + selected_surface_objects: ["Документ.СчетФактураВыданный"], downstream_route_family: "document_evidence", + route_family_selection_basis: "selected_entity_set", + recommended_next_primitive: "query_documents", ambiguity_detected: false } }, @@ -1384,7 +1387,12 @@ describe("assistantTransitionPolicy", () => { expect(carryover?.followupContext?.previous_discovery_pilot_scope).toBe("metadata_inspection_v1"); expect(carryover?.followupContext?.previous_discovery_metadata_route_family).toBe("document_evidence"); + expect(carryover?.followupContext?.previous_discovery_metadata_route_family_selection_basis).toBe("selected_entity_set"); expect(carryover?.followupContext?.previous_discovery_metadata_selected_entity_set).toBe("Документ"); + expect(carryover?.followupContext?.previous_discovery_metadata_selected_surface_objects).toEqual([ + "Документ.СчетФактураВыданный" + ]); + expect(carryover?.followupContext?.previous_discovery_metadata_recommended_next_primitive).toBe("query_documents"); expect(carryover?.followupContext?.previous_discovery_metadata_ambiguity_detected).toBeUndefined(); }); it("carries metadata ambiguity entity sets into follow-up context for downstream lane arbitration", () => {