From 6d9c1568c3daceaf7ce2c819c603d19158d685fb Mon Sep 17 00:00:00 2001 From: dctouch Date: Tue, 21 Apr 2026 22:33:46 +0300 Subject: [PATCH] =?UTF-8?q?ARCH:=20=D0=B7=D0=B0=D0=B7=D0=B5=D0=BC=D0=BB?= =?UTF-8?q?=D0=B8=D1=82=D1=8C=20metadata=20surface=20=D0=B2=20=D1=81=D0=BB?= =?UTF-8?q?=D0=B5=D0=B4=D1=83=D1=8E=D1=89=D0=B8=D0=B9=20MCP=20lane?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../assistantMcpDiscoveryAnswerAdapter.js | 51 ++++++- .../assistantMcpDiscoveryPilotExecutor.js | 108 +++++++++++++- .../assistantMcpDiscoveryResponseCandidate.js | 21 +++ .../assistantMcpDiscoveryAnswerAdapter.ts | 56 ++++++- .../assistantMcpDiscoveryPilotExecutor.ts | 140 +++++++++++++++++- .../assistantMcpDiscoveryResponseCandidate.ts | 24 +++ ...assistantMcpDiscoveryAnswerAdapter.test.ts | 40 ++++- ...assistantMcpDiscoveryPilotExecutor.test.ts | 45 ++++++ ...stantMcpDiscoveryResponseCandidate.test.ts | 15 +- 9 files changed, 491 insertions(+), 9 deletions(-) diff --git a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryAnswerAdapter.js b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryAnswerAdapter.js index 3d65e09..f779584 100644 --- a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryAnswerAdapter.js +++ b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryAnswerAdapter.js @@ -67,10 +67,28 @@ function isValueFlowPilot(pilot) { function isMetadataPilot(pilot) { return pilot.pilot_scope === "metadata_inspection_v1"; } +function metadataRouteFamilyLabelRu(routeFamily) { + if (routeFamily === "document_evidence") { + return "контур документов"; + } + if (routeFamily === "movement_evidence") { + return "контур движений/регистров"; + } + if (routeFamily === "catalog_drilldown") { + return "контур справочников и связанных объектов"; + } + return null; +} function headlineFor(mode, pilot) { const askedMonthlyBreakdown = pilot.derived_bidirectional_value_flow?.aggregation_axis === "month" || pilot.derived_value_flow?.aggregation_axis === "month"; if (pilot.derived_metadata_surface && mode === "confirmed_with_bounded_inference") { + if (pilot.derived_metadata_surface.ambiguity_detected) { + return "По метаданным 1С найдены конкурирующие schema-поверхности; перед следующим шагом нужно удержать неоднозначность явно."; + } + if (pilot.derived_metadata_surface.downstream_route_family) { + return "По метаданным 1С найдена схема и заземлена вероятная поверхность для следующего безопасного шага."; + } return "По метаданным 1С найдена доступная схема для дальнейшего безопасного поиска."; } if (askedMonthlyBreakdown && pilot.derived_bidirectional_value_flow && mode === "confirmed_with_bounded_inference") { @@ -109,6 +127,16 @@ function nextStepFor(mode, pilot) { if (mode === "needs_clarification") { return "Уточните контрагента, период или организацию, и я смогу выполнить проверку по 1С."; } + if (mode === "confirmed_with_bounded_inference" && pilot.derived_metadata_surface) { + const surface = pilot.derived_metadata_surface; + if (surface.ambiguity_detected && surface.ambiguity_entity_sets.length > 0) { + return `Следующим шагом лучше сузить surface до одного семейства: ${surface.ambiguity_entity_sets.join(", ")}.`; + } + const routeLabel = metadataRouteFamilyLabelRu(surface.downstream_route_family); + if (surface.selected_entity_set && routeLabel) { + return `Следующим шагом могу пойти в ${routeLabel} по surface «${surface.selected_entity_set}» и уже искать подтвержденные данные, а не только схему.`; + } + } if (mode === "checked_sources_only" && pilot.query_limitations.length > 0) { return "Можно повторить проверку после восстановления MCP-доступа или сузить вопрос до конкретного контрагента/периода."; } @@ -133,6 +161,7 @@ function buildMustNotClaim(pilot) { if (isMetadataPilot(pilot)) { claims.push("Do not present metadata surface as confirmed business data rows."); claims.push("Do not claim a document/register exists outside the checked metadata probe results."); + claims.push("Do not present the inferred next checked lane as already executed data retrieval."); } if (pilot.evidence.confirmed_facts.length === 0) { claims.push("Do not claim a confirmed business fact when confirmed_facts is empty."); @@ -194,10 +223,28 @@ function derivedMetadataConfirmedLine(pilot) { const objects = surface.matched_objects.length > 0 ? ` Найденные объекты: ${surface.matched_objects.slice(0, 8).join(", ")}.` : ""; + const selectedEntitySet = surface.selected_entity_set ? ` Выбранное family: ${surface.selected_entity_set}.` : ""; + const selectedObjects = surface.selected_surface_objects.length > 0 + ? ` Выбранные surface-объекты: ${surface.selected_surface_objects.slice(0, 6).join(", ")}.` + : ""; const fields = surface.available_fields.length > 0 ? ` Доступные поля/секции: ${surface.available_fields.slice(0, 12).join(", ")}.` : ""; - return `Подтвержденная metadata-поверхность 1С${scope}: ${surface.matched_rows} строк metadata-ответа.${entitySets}${objects}${fields}`.replace(/\s+/g, " ").trim(); + return `Подтвержденная metadata-поверхность 1С${scope}: ${surface.matched_rows} строк metadata-ответа.${entitySets}${objects}${selectedEntitySet}${selectedObjects}${fields}`.replace(/\s+/g, " ").trim(); +} +function derivedMetadataInferenceLine(pilot) { + const surface = pilot.derived_metadata_surface; + if (!surface) { + return null; + } + if (surface.ambiguity_detected && surface.ambiguity_entity_sets.length > 0) { + return `По подтвержденной metadata-поверхности видно несколько конкурирующих family: ${surface.ambiguity_entity_sets.join(", ")}. Следующий data-lane пока нельзя выбрать без явного сужения.`; + } + const routeLabel = metadataRouteFamilyLabelRu(surface.downstream_route_family); + if (!surface.selected_entity_set || !routeLabel) { + return null; + } + return `По подтвержденной metadata-поверхности следующий проверяемый шаг можно ограниченно оценить как ${routeLabel} через family «${surface.selected_entity_set}». Это еще не выполненный data-fetch, а только grounded выбор следующего контура.`; } function derivedValueFlowConfirmedLine(pilot) { const flow = pilot.derived_value_flow; @@ -285,7 +332,7 @@ function buildAssistantMcpDiscoveryAnswerDraft(pilot) { if (pilot.evidence.inferred_facts.length > 0) { pushReason(reasonCodes, "answer_contains_bounded_inference"); } - const derivedInferenceLine = derivedActivityInferenceLine(pilot); + const derivedInferenceLine = derivedActivityInferenceLine(pilot) ?? derivedMetadataInferenceLine(pilot); const inferenceLines = derivedInferenceLine ? [derivedInferenceLine] : pilot.evidence.inferred_facts; diff --git a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPilotExecutor.js b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPilotExecutor.js index 99beeb1..a7b233b 100644 --- a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPilotExecutor.js +++ b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPilotExecutor.js @@ -478,6 +478,80 @@ function metadataAvailableFields(rows) { } return result; } +function normalizeMetadataEntitySetToken(value) { + return String(value ?? "") + .toLowerCase() + .replace(/[\s_.-]+/g, ""); +} +function metadataMatchesRequestedType(entitySet, requestedMetaType) { + const entityToken = normalizeMetadataEntitySetToken(entitySet); + const requestedToken = normalizeMetadataEntitySetToken(requestedMetaType); + return entityToken.includes(requestedToken) || requestedToken.includes(entityToken); +} +function metadataRouteFamilyForEntitySet(entitySet) { + const token = normalizeMetadataEntitySetToken(entitySet); + if (token.includes("документ") || token.includes("document")) { + return "document_evidence"; + } + if (token.includes("регистрнакопления") || + token.includes("регистсведений") || + token.includes("регистрсведений") || + token.includes("accumulationregister") || + token.includes("informationregister")) { + return "movement_evidence"; + } + if (token.includes("справочник") || token.includes("catalog") || token.includes("directory")) { + return "catalog_drilldown"; + } + return null; +} +function metadataNextPrimitiveForRouteFamily(routeFamily) { + if (routeFamily === "document_evidence") { + return "query_documents"; + } + if (routeFamily === "movement_evidence") { + return "query_movements"; + } + if (routeFamily === "catalog_drilldown") { + return "drilldown_related_objects"; + } + return null; +} +function selectMetadataEntityGrounding(availableEntitySets, requestedMetaTypes) { + const requestedMatches = availableEntitySets.filter((entitySet) => requestedMetaTypes.some((requestedMetaType) => metadataMatchesRequestedType(entitySet, requestedMetaType))); + if (requestedMatches.length === 1) { + return { + selectedEntitySet: requestedMatches[0] ?? null, + ambiguityDetected: false, + ambiguityEntitySets: [] + }; + } + if (requestedMatches.length > 1) { + return { + selectedEntitySet: null, + ambiguityDetected: true, + ambiguityEntitySets: requestedMatches + }; + } + if (availableEntitySets.length === 1) { + return { + selectedEntitySet: availableEntitySets[0] ?? null, + ambiguityDetected: false, + ambiguityEntitySets: [] + }; + } + return { + selectedEntitySet: null, + ambiguityDetected: availableEntitySets.length > 1, + ambiguityEntitySets: availableEntitySets + }; +} +function metadataObjectsForEntitySet(entitySet, matchedObjects) { + if (!entitySet) { + return []; + } + return matchedObjects.filter((item) => item.startsWith(`${entitySet}.`) || item.includes(entitySet)); +} function deriveMetadataSurface(result, metadataScope, requestedMetaTypes) { if (!result || result.error || result.rows.length <= 0) { return null; @@ -494,14 +568,28 @@ function deriveMetadataSurface(result, metadataScope, requestedMetaTypes) { pushUnique(availableEntitySets, entitySet); } } + const grounding = selectMetadataEntityGrounding(availableEntitySets, requestedMetaTypes); + const downstreamRouteFamily = grounding.selectedEntitySet + ? metadataRouteFamilyForEntitySet(grounding.selectedEntitySet) + : null; + const knownLimitations = []; + if (grounding.ambiguityDetected && grounding.ambiguityEntitySets.length > 0) { + knownLimitations.push(`Exact downstream metadata surface remains ambiguous across: ${grounding.ambiguityEntitySets.join(", ")}`); + } return { metadata_scope: metadataScope, requested_meta_types: requestedMetaTypes, matched_rows: result.rows.length, available_entity_sets: availableEntitySets, matched_objects: matchedObjects, + selected_entity_set: grounding.selectedEntitySet, + selected_surface_objects: metadataObjectsForEntitySet(grounding.selectedEntitySet, matchedObjects), + downstream_route_family: downstreamRouteFamily, + recommended_next_primitive: metadataNextPrimitiveForRouteFamily(downstreamRouteFamily), + ambiguity_detected: grounding.ambiguityDetected, + ambiguity_entity_sets: grounding.ambiguityEntitySets, available_fields: metadataAvailableFields(result.rows), - known_limitations: [], + known_limitations: knownLimitations, inference_basis: "confirmed_1c_metadata_surface_rows" }; } @@ -515,13 +603,30 @@ function buildMetadataConfirmedFacts(surface) { if (surface.available_entity_sets.length > 0) { facts.push(`Available metadata object sets: ${surface.available_entity_sets.join(", ")}`); } + if (surface.selected_entity_set) { + facts.push(`Selected metadata entity set: ${surface.selected_entity_set}`); + } + if (surface.selected_surface_objects.length > 0) { + facts.push(`Selected metadata objects: ${surface.selected_surface_objects.slice(0, 8).join(", ")}`); + } if (surface.available_fields.length > 0) { facts.push(`Available metadata fields/sections: ${surface.available_fields.slice(0, 12).join(", ")}`); } return facts; } +function buildMetadataInferredFacts(surface) { + if (!surface || !surface.selected_entity_set || !surface.downstream_route_family || !surface.recommended_next_primitive) { + return []; + } + return [ + `A likely next checked lane may be inferred as ${surface.downstream_route_family} from the confirmed metadata surface` + ]; +} function buildMetadataUnknownFacts(surface, metadataScope) { if (surface) { + if (surface.ambiguity_detected && surface.ambiguity_entity_sets.length > 0) { + return [...surface.known_limitations]; + } if (surface.available_fields.length > 0) { return []; } @@ -1090,6 +1195,7 @@ async function executeAssistantMcpDiscoveryPilot(planner, deps = DEFAULT_DEPS) { plan: planner.discovery_plan, probeResults, confirmedFacts: buildMetadataConfirmedFacts(derivedMetadataSurface), + inferredFacts: buildMetadataInferredFacts(derivedMetadataSurface), unknownFacts: buildMetadataUnknownFacts(derivedMetadataSurface, metadataScope), sourceRowsSummary, queryLimitations, diff --git a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryResponseCandidate.js b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryResponseCandidate.js index c79d387..f2f3a53 100644 --- a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryResponseCandidate.js +++ b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryResponseCandidate.js @@ -121,13 +121,34 @@ function localizeLine(value) { if (metadataObjectSetsMatch) { return `Доступные типы metadata-объектов: ${metadataObjectSetsMatch[1]}.`; } + const selectedMetadataEntitySetMatch = value.match(/^Selected metadata entity set: (.+)$/i); + if (selectedMetadataEntitySetMatch) { + return `Выбранное семейство metadata-объектов: ${selectedMetadataEntitySetMatch[1]}.`; + } + const selectedMetadataObjectsMatch = value.match(/^Selected metadata objects: (.+)$/i); + if (selectedMetadataObjectsMatch) { + return `Выбранные metadata-объекты для следующего шага: ${selectedMetadataObjectsMatch[1]}.`; + } const metadataFieldsMatch = value.match(/^Available metadata fields\/sections: (.+)$/i); if (metadataFieldsMatch) { return `Доступные metadata-поля/секции: ${metadataFieldsMatch[1]}.`; } + const metadataLaneInferenceMatch = value.match(/^A likely next checked lane may be inferred as (document_evidence|movement_evidence|catalog_drilldown) from the confirmed metadata surface$/i); + if (metadataLaneInferenceMatch) { + const routeLabel = metadataLaneInferenceMatch[1] === "document_evidence" + ? "контур документов" + : metadataLaneInferenceMatch[1] === "movement_evidence" + ? "контур движений/регистров" + : "контур справочников и связанных объектов"; + return `Следующий проверяемый контур по этой metadata-поверхности можно ограниченно оценить как ${routeLabel}.`; + } if (/^Detailed metadata fields were not returned by this MCP metadata probe$/i.test(value)) { return "Эта MCP-проверка metadata не вернула детальный список полей."; } + const metadataAmbiguityMatch = value.match(/^Exact downstream metadata surface remains ambiguous across: (.+)$/i); + if (metadataAmbiguityMatch) { + return `Точная downstream metadata-поверхность пока неоднозначна между family: ${metadataAmbiguityMatch[1]}.`; + } const noMatchingMetadataScopeMatch = value.match(/^No matching 1C metadata objects were confirmed for scope "([^"]+)"$/i); if (noMatchingMetadataScopeMatch) { return `В 1С не подтверждены metadata-объекты по области "${noMatchingMetadataScopeMatch[1]}".`; diff --git a/llm_normalizer/backend/src/services/assistantMcpDiscoveryAnswerAdapter.ts b/llm_normalizer/backend/src/services/assistantMcpDiscoveryAnswerAdapter.ts index 0c09124..b822c2a 100644 --- a/llm_normalizer/backend/src/services/assistantMcpDiscoveryAnswerAdapter.ts +++ b/llm_normalizer/backend/src/services/assistantMcpDiscoveryAnswerAdapter.ts @@ -101,11 +101,32 @@ function isMetadataPilot(pilot: AssistantMcpDiscoveryPilotExecutionContract): bo return pilot.pilot_scope === "metadata_inspection_v1"; } +function metadataRouteFamilyLabelRu( + routeFamily: "document_evidence" | "movement_evidence" | "catalog_drilldown" | null +): string | null { + if (routeFamily === "document_evidence") { + return "контур документов"; + } + if (routeFamily === "movement_evidence") { + return "контур движений/регистров"; + } + if (routeFamily === "catalog_drilldown") { + return "контур справочников и связанных объектов"; + } + return null; +} + function headlineFor(mode: AssistantMcpDiscoveryAnswerMode, pilot: AssistantMcpDiscoveryPilotExecutionContract): string { const askedMonthlyBreakdown = pilot.derived_bidirectional_value_flow?.aggregation_axis === "month" || pilot.derived_value_flow?.aggregation_axis === "month"; if (pilot.derived_metadata_surface && mode === "confirmed_with_bounded_inference") { + if (pilot.derived_metadata_surface.ambiguity_detected) { + return "По метаданным 1С найдены конкурирующие schema-поверхности; перед следующим шагом нужно удержать неоднозначность явно."; + } + if (pilot.derived_metadata_surface.downstream_route_family) { + return "По метаданным 1С найдена схема и заземлена вероятная поверхность для следующего безопасного шага."; + } return "По метаданным 1С найдена доступная схема для дальнейшего безопасного поиска."; } if (askedMonthlyBreakdown && pilot.derived_bidirectional_value_flow && mode === "confirmed_with_bounded_inference") { @@ -145,6 +166,16 @@ function nextStepFor(mode: AssistantMcpDiscoveryAnswerMode, pilot: AssistantMcpD if (mode === "needs_clarification") { return "Уточните контрагента, период или организацию, и я смогу выполнить проверку по 1С."; } + if (mode === "confirmed_with_bounded_inference" && pilot.derived_metadata_surface) { + const surface = pilot.derived_metadata_surface; + if (surface.ambiguity_detected && surface.ambiguity_entity_sets.length > 0) { + return `Следующим шагом лучше сузить surface до одного семейства: ${surface.ambiguity_entity_sets.join(", ")}.`; + } + const routeLabel = metadataRouteFamilyLabelRu(surface.downstream_route_family); + if (surface.selected_entity_set && routeLabel) { + return `Следующим шагом могу пойти в ${routeLabel} по surface «${surface.selected_entity_set}» и уже искать подтвержденные данные, а не только схему.`; + } + } if (mode === "checked_sources_only" && pilot.query_limitations.length > 0) { return "Можно повторить проверку после восстановления MCP-доступа или сузить вопрос до конкретного контрагента/периода."; } @@ -170,6 +201,7 @@ function buildMustNotClaim(pilot: AssistantMcpDiscoveryPilotExecutionContract): if (isMetadataPilot(pilot)) { claims.push("Do not present metadata surface as confirmed business data rows."); claims.push("Do not claim a document/register exists outside the checked metadata probe results."); + claims.push("Do not present the inferred next checked lane as already executed data retrieval."); } if (pilot.evidence.confirmed_facts.length === 0) { claims.push("Do not claim a confirmed business fact when confirmed_facts is empty."); @@ -238,11 +270,31 @@ function derivedMetadataConfirmedLine(pilot: AssistantMcpDiscoveryPilotExecution surface.matched_objects.length > 0 ? ` Найденные объекты: ${surface.matched_objects.slice(0, 8).join(", ")}.` : ""; + const selectedEntitySet = surface.selected_entity_set ? ` Выбранное family: ${surface.selected_entity_set}.` : ""; + const selectedObjects = + surface.selected_surface_objects.length > 0 + ? ` Выбранные surface-объекты: ${surface.selected_surface_objects.slice(0, 6).join(", ")}.` + : ""; const fields = surface.available_fields.length > 0 ? ` Доступные поля/секции: ${surface.available_fields.slice(0, 12).join(", ")}.` : ""; - return `Подтвержденная metadata-поверхность 1С${scope}: ${surface.matched_rows} строк metadata-ответа.${entitySets}${objects}${fields}`.replace(/\s+/g, " ").trim(); + return `Подтвержденная metadata-поверхность 1С${scope}: ${surface.matched_rows} строк metadata-ответа.${entitySets}${objects}${selectedEntitySet}${selectedObjects}${fields}`.replace(/\s+/g, " ").trim(); +} + +function derivedMetadataInferenceLine(pilot: AssistantMcpDiscoveryPilotExecutionContract): string | null { + const surface = pilot.derived_metadata_surface; + if (!surface) { + return null; + } + if (surface.ambiguity_detected && surface.ambiguity_entity_sets.length > 0) { + return `По подтвержденной metadata-поверхности видно несколько конкурирующих family: ${surface.ambiguity_entity_sets.join(", ")}. Следующий data-lane пока нельзя выбрать без явного сужения.`; + } + const routeLabel = metadataRouteFamilyLabelRu(surface.downstream_route_family); + if (!surface.selected_entity_set || !routeLabel) { + return null; + } + return `По подтвержденной metadata-поверхности следующий проверяемый шаг можно ограниченно оценить как ${routeLabel} через family «${surface.selected_entity_set}». Это еще не выполненный data-fetch, а только grounded выбор следующего контура.`; } function derivedValueFlowConfirmedLine(pilot: AssistantMcpDiscoveryPilotExecutionContract): string | null { @@ -346,7 +398,7 @@ export function buildAssistantMcpDiscoveryAnswerDraft( if (pilot.evidence.inferred_facts.length > 0) { pushReason(reasonCodes, "answer_contains_bounded_inference"); } - const derivedInferenceLine = derivedActivityInferenceLine(pilot); + const derivedInferenceLine = derivedActivityInferenceLine(pilot) ?? derivedMetadataInferenceLine(pilot); const inferenceLines = derivedInferenceLine ? [derivedInferenceLine] : pilot.evidence.inferred_facts; diff --git a/llm_normalizer/backend/src/services/assistantMcpDiscoveryPilotExecutor.ts b/llm_normalizer/backend/src/services/assistantMcpDiscoveryPilotExecutor.ts index 8851f3d..c4d6618 100644 --- a/llm_normalizer/backend/src/services/assistantMcpDiscoveryPilotExecutor.ts +++ b/llm_normalizer/backend/src/services/assistantMcpDiscoveryPilotExecutor.ts @@ -122,6 +122,12 @@ export interface AssistantMcpDiscoveryDerivedMetadataSurface { matched_rows: number; available_entity_sets: string[]; matched_objects: string[]; + selected_entity_set: string | null; + selected_surface_objects: string[]; + downstream_route_family: "document_evidence" | "movement_evidence" | "catalog_drilldown" | null; + recommended_next_primitive: "query_documents" | "query_movements" | "drilldown_related_objects" | null; + ambiguity_detected: boolean; + ambiguity_entity_sets: string[]; available_fields: string[]; known_limitations: string[]; inference_basis: "confirmed_1c_metadata_surface_rows"; @@ -741,6 +747,101 @@ function metadataAvailableFields(rows: Array>): string[] return result; } +function normalizeMetadataEntitySetToken(value: string): string { + return String(value ?? "") + .toLowerCase() + .replace(/[\s_.-]+/g, ""); +} + +function metadataMatchesRequestedType(entitySet: string, requestedMetaType: string): boolean { + const entityToken = normalizeMetadataEntitySetToken(entitySet); + const requestedToken = normalizeMetadataEntitySetToken(requestedMetaType); + return entityToken.includes(requestedToken) || requestedToken.includes(entityToken); +} + +function metadataRouteFamilyForEntitySet( + entitySet: string +): "document_evidence" | "movement_evidence" | "catalog_drilldown" | null { + const token = normalizeMetadataEntitySetToken(entitySet); + if (token.includes("документ") || token.includes("document")) { + return "document_evidence"; + } + if ( + token.includes("регистрнакопления") || + token.includes("регистсведений") || + token.includes("регистрсведений") || + token.includes("accumulationregister") || + token.includes("informationregister") + ) { + return "movement_evidence"; + } + if (token.includes("справочник") || token.includes("catalog") || token.includes("directory")) { + return "catalog_drilldown"; + } + return null; +} + +function metadataNextPrimitiveForRouteFamily( + routeFamily: "document_evidence" | "movement_evidence" | "catalog_drilldown" | null +): "query_documents" | "query_movements" | "drilldown_related_objects" | null { + if (routeFamily === "document_evidence") { + return "query_documents"; + } + if (routeFamily === "movement_evidence") { + return "query_movements"; + } + if (routeFamily === "catalog_drilldown") { + return "drilldown_related_objects"; + } + return null; +} + +function selectMetadataEntityGrounding( + availableEntitySets: string[], + requestedMetaTypes: string[] +): { + selectedEntitySet: string | null; + ambiguityDetected: boolean; + ambiguityEntitySets: string[]; +} { + const requestedMatches = availableEntitySets.filter((entitySet) => + requestedMetaTypes.some((requestedMetaType) => metadataMatchesRequestedType(entitySet, requestedMetaType)) + ); + if (requestedMatches.length === 1) { + return { + selectedEntitySet: requestedMatches[0] ?? null, + ambiguityDetected: false, + ambiguityEntitySets: [] + }; + } + if (requestedMatches.length > 1) { + return { + selectedEntitySet: null, + ambiguityDetected: true, + ambiguityEntitySets: requestedMatches + }; + } + if (availableEntitySets.length === 1) { + return { + selectedEntitySet: availableEntitySets[0] ?? null, + ambiguityDetected: false, + ambiguityEntitySets: [] + }; + } + return { + selectedEntitySet: null, + ambiguityDetected: availableEntitySets.length > 1, + ambiguityEntitySets: availableEntitySets + }; +} + +function metadataObjectsForEntitySet(entitySet: string | null, matchedObjects: string[]): string[] { + if (!entitySet) { + return []; + } + return matchedObjects.filter((item) => item.startsWith(`${entitySet}.`) || item.includes(entitySet)); +} + function deriveMetadataSurface( result: AddressMcpMetadataRowsResult | null, metadataScope: string | null, @@ -761,14 +862,30 @@ function deriveMetadataSurface( pushUnique(availableEntitySets, entitySet); } } + const grounding = selectMetadataEntityGrounding(availableEntitySets, requestedMetaTypes); + const downstreamRouteFamily = grounding.selectedEntitySet + ? metadataRouteFamilyForEntitySet(grounding.selectedEntitySet) + : null; + const knownLimitations: string[] = []; + if (grounding.ambiguityDetected && grounding.ambiguityEntitySets.length > 0) { + knownLimitations.push( + `Exact downstream metadata surface remains ambiguous across: ${grounding.ambiguityEntitySets.join(", ")}` + ); + } return { metadata_scope: metadataScope, requested_meta_types: requestedMetaTypes, matched_rows: result.rows.length, available_entity_sets: availableEntitySets, matched_objects: matchedObjects, + selected_entity_set: grounding.selectedEntitySet, + selected_surface_objects: metadataObjectsForEntitySet(grounding.selectedEntitySet, matchedObjects), + downstream_route_family: downstreamRouteFamily, + recommended_next_primitive: metadataNextPrimitiveForRouteFamily(downstreamRouteFamily), + ambiguity_detected: grounding.ambiguityDetected, + ambiguity_entity_sets: grounding.ambiguityEntitySets, available_fields: metadataAvailableFields(result.rows), - known_limitations: [], + known_limitations: knownLimitations, inference_basis: "confirmed_1c_metadata_surface_rows" }; } @@ -787,17 +904,37 @@ function buildMetadataConfirmedFacts( if (surface.available_entity_sets.length > 0) { facts.push(`Available metadata object sets: ${surface.available_entity_sets.join(", ")}`); } + if (surface.selected_entity_set) { + facts.push(`Selected metadata entity set: ${surface.selected_entity_set}`); + } + if (surface.selected_surface_objects.length > 0) { + facts.push(`Selected metadata objects: ${surface.selected_surface_objects.slice(0, 8).join(", ")}`); + } if (surface.available_fields.length > 0) { facts.push(`Available metadata fields/sections: ${surface.available_fields.slice(0, 12).join(", ")}`); } return facts; } +function buildMetadataInferredFacts( + surface: AssistantMcpDiscoveryDerivedMetadataSurface | null +): string[] { + if (!surface || !surface.selected_entity_set || !surface.downstream_route_family || !surface.recommended_next_primitive) { + return []; + } + return [ + `A likely next checked lane may be inferred as ${surface.downstream_route_family} from the confirmed metadata surface` + ]; +} + function buildMetadataUnknownFacts( surface: AssistantMcpDiscoveryDerivedMetadataSurface | null, metadataScope: string | null ): string[] { if (surface) { + if (surface.ambiguity_detected && surface.ambiguity_entity_sets.length > 0) { + return [...surface.known_limitations]; + } if (surface.available_fields.length > 0) { return []; } @@ -1466,6 +1603,7 @@ export async function executeAssistantMcpDiscoveryPilot( plan: planner.discovery_plan, probeResults, confirmedFacts: buildMetadataConfirmedFacts(derivedMetadataSurface), + inferredFacts: buildMetadataInferredFacts(derivedMetadataSurface), unknownFacts: buildMetadataUnknownFacts(derivedMetadataSurface, metadataScope), sourceRowsSummary, queryLimitations, diff --git a/llm_normalizer/backend/src/services/assistantMcpDiscoveryResponseCandidate.ts b/llm_normalizer/backend/src/services/assistantMcpDiscoveryResponseCandidate.ts index 68fc41f..51f4bb1 100644 --- a/llm_normalizer/backend/src/services/assistantMcpDiscoveryResponseCandidate.ts +++ b/llm_normalizer/backend/src/services/assistantMcpDiscoveryResponseCandidate.ts @@ -157,13 +157,37 @@ function localizeLine(value: string): string { if (metadataObjectSetsMatch) { return `Доступные типы metadata-объектов: ${metadataObjectSetsMatch[1]}.`; } + const selectedMetadataEntitySetMatch = value.match(/^Selected metadata entity set: (.+)$/i); + if (selectedMetadataEntitySetMatch) { + return `Выбранное семейство metadata-объектов: ${selectedMetadataEntitySetMatch[1]}.`; + } + const selectedMetadataObjectsMatch = value.match(/^Selected metadata objects: (.+)$/i); + if (selectedMetadataObjectsMatch) { + return `Выбранные metadata-объекты для следующего шага: ${selectedMetadataObjectsMatch[1]}.`; + } const metadataFieldsMatch = value.match(/^Available metadata fields\/sections: (.+)$/i); if (metadataFieldsMatch) { return `Доступные metadata-поля/секции: ${metadataFieldsMatch[1]}.`; } + const metadataLaneInferenceMatch = value.match( + /^A likely next checked lane may be inferred as (document_evidence|movement_evidence|catalog_drilldown) from the confirmed metadata surface$/i + ); + if (metadataLaneInferenceMatch) { + const routeLabel = + metadataLaneInferenceMatch[1] === "document_evidence" + ? "контур документов" + : metadataLaneInferenceMatch[1] === "movement_evidence" + ? "контур движений/регистров" + : "контур справочников и связанных объектов"; + return `Следующий проверяемый контур по этой metadata-поверхности можно ограниченно оценить как ${routeLabel}.`; + } if (/^Detailed metadata fields were not returned by this MCP metadata probe$/i.test(value)) { return "Эта MCP-проверка metadata не вернула детальный список полей."; } + const metadataAmbiguityMatch = value.match(/^Exact downstream metadata surface remains ambiguous across: (.+)$/i); + if (metadataAmbiguityMatch) { + return `Точная downstream metadata-поверхность пока неоднозначна между family: ${metadataAmbiguityMatch[1]}.`; + } const noMatchingMetadataScopeMatch = value.match(/^No matching 1C metadata objects were confirmed for scope "([^"]+)"$/i); if (noMatchingMetadataScopeMatch) { return `В 1С не подтверждены metadata-объекты по области "${noMatchingMetadataScopeMatch[1]}".`; diff --git a/llm_normalizer/backend/tests/assistantMcpDiscoveryAnswerAdapter.test.ts b/llm_normalizer/backend/tests/assistantMcpDiscoveryAnswerAdapter.test.ts index 7e5e342..5611c01 100644 --- a/llm_normalizer/backend/tests/assistantMcpDiscoveryAnswerAdapter.test.ts +++ b/llm_normalizer/backend/tests/assistantMcpDiscoveryAnswerAdapter.test.ts @@ -130,11 +130,49 @@ describe("assistant MCP discovery answer adapter", () => { const confirmedText = draft.confirmed_lines.join("\n"); expect(draft.answer_mode).toBe("confirmed_with_bounded_inference"); - expect(draft.headline).toContain("метаданным 1С"); + expect(draft.headline).toContain("заземлена вероятная поверхность"); expect(confirmedText).toContain("Подтвержденная metadata-поверхность 1С"); expect(confirmedText).toContain("Документ.СчетФактураВыданный"); + expect(confirmedText).toContain("Выбранное family: Документ"); expect(confirmedText).toContain("Дата"); + expect(draft.inference_lines.join("\n")).toContain("контур документов"); + expect(draft.next_step_line).toContain("surface «Документ»"); expect(draft.must_not_claim).toContain("Do not present metadata surface as confirmed business data rows."); + expect(draft.must_not_claim).toContain("Do not present the inferred next checked lane as already executed data retrieval."); + }); + + it("keeps metadata answer honest when schema surface stays ambiguous", async () => { + const planner = planAssistantMcpDiscovery({ + turnMeaning: { + asked_domain_family: "metadata", + asked_action_family: "inspect_fields", + explicit_entity_candidates: ["НДС"] + } + }); + const pilot = await executeAssistantMcpDiscoveryPilot( + planner, + buildMetadataDeps([ + { + FullName: "Документ.СчетФактураВыданный", + MetaType: "Документ", + attributes: [{ Name: "Дата" }] + }, + { + FullName: "РегистрНакопления.НДСПокупок", + MetaType: "РегистрНакопления", + resources: [{ Name: "СуммаНДС" }] + } + ]) + ); + + const draft = buildAssistantMcpDiscoveryAnswerDraft(pilot); + + expect(draft.headline).toContain("конкурирующие schema-поверхности"); + expect(draft.inference_lines.join("\n")).toContain("несколько конкурирующих family"); + expect(draft.unknown_lines).toContain( + "Exact downstream metadata surface remains ambiguous across: Документ, РегистрНакопления" + ); + expect(draft.next_step_line).toContain("Документ, РегистрНакопления"); }); it("turns value-flow evidence into a bounded turnover answer draft", async () => { diff --git a/llm_normalizer/backend/tests/assistantMcpDiscoveryPilotExecutor.test.ts b/llm_normalizer/backend/tests/assistantMcpDiscoveryPilotExecutor.test.ts index e79b08c..948ced9 100644 --- a/llm_normalizer/backend/tests/assistantMcpDiscoveryPilotExecutor.test.ts +++ b/llm_normalizer/backend/tests/assistantMcpDiscoveryPilotExecutor.test.ts @@ -138,6 +138,12 @@ describe("assistant MCP discovery pilot executor", () => { matched_rows: 2, available_entity_sets: ["Документ"], matched_objects: ["Документ.СчетФактураВыданный", "Документ.СчетФактураПолученный"], + selected_entity_set: "Документ", + selected_surface_objects: ["Документ.СчетФактураВыданный", "Документ.СчетФактураПолученный"], + downstream_route_family: "document_evidence", + recommended_next_primitive: "query_documents", + ambiguity_detected: false, + ambiguity_entity_sets: [], available_fields: ["Дата", "Организация", "Контрагент"], inference_basis: "confirmed_1c_metadata_surface_rows" }); @@ -150,6 +156,45 @@ describe("assistant MCP discovery pilot executor", () => { }); }); + it("keeps metadata grounding ambiguous when several surface families compete", async () => { + const planner = planAssistantMcpDiscovery({ + turnMeaning: { + asked_domain_family: "metadata", + asked_action_family: "inspect_fields", + explicit_entity_candidates: ["НДС"] + } + }); + const deps = buildMetadataDeps([ + { + FullName: "Документ.СчетФактураВыданный", + MetaType: "Документ", + attributes: [{ Name: "Дата" }] + }, + { + FullName: "РегистрНакопления.НДСПокупок", + MetaType: "РегистрНакопления", + resources: [{ Name: "СуммаНДС" }] + } + ]); + + const result = await executeAssistantMcpDiscoveryPilot(planner, deps); + + expect(result.pilot_status).toBe("executed"); + expect(result.derived_metadata_surface).toMatchObject({ + metadata_scope: "НДС", + available_entity_sets: ["Документ", "РегистрНакопления"], + selected_entity_set: null, + downstream_route_family: null, + recommended_next_primitive: null, + ambiguity_detected: true, + ambiguity_entity_sets: ["Документ", "РегистрНакопления"] + }); + expect(result.evidence.inferred_facts).toEqual([]); + expect(result.evidence.unknown_facts).toContain( + "Exact downstream metadata surface remains ambiguous across: Документ, РегистрНакопления" + ); + }); + it("executes value-flow query_movements and derives a guarded turnover sum", async () => { const planner = planAssistantMcpDiscovery({ turnMeaning: { diff --git a/llm_normalizer/backend/tests/assistantMcpDiscoveryResponseCandidate.test.ts b/llm_normalizer/backend/tests/assistantMcpDiscoveryResponseCandidate.test.ts index 1ca71db..8693e7e 100644 --- a/llm_normalizer/backend/tests/assistantMcpDiscoveryResponseCandidate.test.ts +++ b/llm_normalizer/backend/tests/assistantMcpDiscoveryResponseCandidate.test.ts @@ -223,10 +223,17 @@ describe("assistant MCP discovery response candidate", () => { confirmed_lines: [ 'Confirmed 1C metadata surface for scope "НДС": 7 rows and 3 matching objects', "Available metadata object sets: accumulation_register, document", + "Selected metadata entity set: Документ", + "Selected metadata objects: Документ.СчетФактураВыданный", "Available metadata fields/sections: amount, vat_rate, organization" ], - inference_lines: [], - unknown_lines: ['No matching 1C metadata objects were confirmed for scope "Прибыль"'], + inference_lines: [ + "A likely next checked lane may be inferred as document_evidence from the confirmed metadata surface" + ], + unknown_lines: [ + 'No matching 1C metadata objects were confirmed for scope "Прибыль"', + "Exact downstream metadata surface remains ambiguous across: Документ, РегистрНакопления" + ], limitation_lines: ["Detailed metadata fields were not returned by this MCP metadata probe"], next_step_line: null } @@ -236,8 +243,12 @@ describe("assistant MCP discovery response candidate", () => { expect(candidate.reply_text).toContain('В 1С подтверждена metadata-поверхность по области "НДС"'); expect(candidate.reply_text).toContain("Доступные типы metadata-объектов"); + expect(candidate.reply_text).toContain("Выбранное семейство metadata-объектов: Документ"); + expect(candidate.reply_text).toContain("Выбранные metadata-объекты для следующего шага"); expect(candidate.reply_text).toContain("Доступные metadata-поля/секции"); + expect(candidate.reply_text).toContain("контур документов"); expect(candidate.reply_text).toContain('В 1С не подтверждены metadata-объекты по области "Прибыль"'); + expect(candidate.reply_text).toContain("неоднозначна между family"); expect(candidate.reply_text).toContain("Эта MCP-проверка metadata не вернула детальный список полей"); expect(candidate.reply_text).not.toContain("Confirmed 1C metadata surface"); });