diff --git a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPilotExecutor.js b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPilotExecutor.js index e166ecc..c615964 100644 --- a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPilotExecutor.js +++ b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPilotExecutor.js @@ -409,6 +409,10 @@ function metadataTypesForPlanner(planner) { } return ["Документ", "РегистрНакопления", "РегистрСведений", "Справочник"]; } +function metadataScopeRankingAllowedForPlanner(planner) { + const action = String(planner.discovery_plan.turn_meaning_ref?.asked_action_family ?? "").toLowerCase().trim(); + return action === "inspect_surface"; +} function valueFlowPilotProfile(planner) { const meaning = planner.discovery_plan.turn_meaning_ref; const action = String(meaning?.asked_action_family ?? "").toLowerCase(); @@ -912,16 +916,73 @@ function metadataSurfaceFamilyScores(matchedObjects) { } return scores; } -function metadataObjectsForRouteFamily(routeFamily, matchedObjects) { +function normalizeMetadataObjectRankingToken(value) { + return String(value ?? "") + .toLowerCase() + .replace(/[^\p{L}\p{N}]+/gu, ""); +} +function metadataScopeRankingTokens(metadataScope) { + const scope = String(metadataScope ?? "").trim(); + if (!scope) { + return []; + } + const condensed = normalizeMetadataObjectRankingToken(scope); + const result = []; + if (condensed.length >= 2) { + pushUnique(result, condensed); + } + for (const token of scope.toLowerCase().split(/[^\p{L}\p{N}]+/gu)) { + const normalized = normalizeMetadataObjectRankingToken(token); + if (normalized.length >= 2) { + pushUnique(result, normalized); + } + } + return result; +} +function metadataObjectRelevanceScore(metadataScope, objectName) { + const objectToken = normalizeMetadataObjectRankingToken(objectName); + if (!objectToken) { + return 1; + } + let score = 1; + for (const token of metadataScopeRankingTokens(metadataScope)) { + if (objectToken.includes(token)) { + score += token.length >= 6 ? 4 : 3; + } + } + return score; +} +function metadataWeightedSurfaceFamilyScores(matchedObjects, metadataScope) { + const scores = emptyMetadataSurfaceFamilyScores(); + for (const objectName of matchedObjects) { + const entitySet = inferMetadataEntitySetFromObjectName(objectName); + const routeFamily = entitySet ? metadataRouteFamilyForEntitySetRelaxed(entitySet) : null; + if (routeFamily) { + scores[routeFamily] += metadataObjectRelevanceScore(metadataScope, objectName); + } + } + return scores; +} +function sortMetadataObjectsByRelevance(matchedObjects, metadataScope) { + return [...matchedObjects].sort((left, right) => { + const scoreDelta = metadataObjectRelevanceScore(metadataScope, right) - metadataObjectRelevanceScore(metadataScope, left); + if (scoreDelta !== 0) { + return scoreDelta; + } + return left.localeCompare(right, "ru"); + }); +} +function metadataObjectsForRouteFamily(routeFamily, matchedObjects, metadataScope) { if (!routeFamily) { return []; } - return matchedObjects.filter((objectName) => { + const filtered = matchedObjects.filter((objectName) => { const entitySet = inferMetadataEntitySetFromObjectName(objectName); return entitySet ? metadataRouteFamilyForEntitySetRelaxed(entitySet) === routeFamily : false; }); + return sortMetadataObjectsByRelevance(filtered, metadataScope); } -function selectMetadataRouteFamilyFromSurfaceScores(scores) { +function selectDominantMetadataRouteFamilyFromScores(scores) { const ranked = Object.entries(scores) .filter(([, score]) => score > 0) .sort((left, right) => right[1] - left[1]); @@ -938,7 +999,39 @@ function selectMetadataRouteFamilyFromSurfaceScores(scores) { const clearlyDominant = absoluteMargin >= 2 || relativeRatio >= 1.5; return clearlyDominant ? top[0] : null; } -function deriveMetadataSurface(result, metadataScope, requestedMetaTypes) { +function selectMetadataRouteFamilyFromSurfaceScores(input) { + const countDominant = selectDominantMetadataRouteFamilyFromScores(input.countScores); + if (countDominant) { + return { + routeFamily: countDominant, + rankingApplied: false + }; + } + if (!input.allowScopeRanking) { + return { + routeFamily: null, + rankingApplied: false + }; + } + const rankedCounts = Object.entries(input.countScores) + .filter(([, score]) => score > 0) + .sort((left, right) => right[1] - left[1]); + const topCount = rankedCounts[0]?.[1] ?? 0; + const secondCount = rankedCounts[1]?.[1] ?? 0; + if (topCount <= 0 || topCount !== secondCount) { + return { + routeFamily: null, + rankingApplied: false + }; + } + const weightedScores = metadataWeightedSurfaceFamilyScores(input.matchedObjects, input.metadataScope); + const weightedDominant = selectDominantMetadataRouteFamilyFromScores(weightedScores); + return { + routeFamily: weightedDominant, + rankingApplied: Boolean(weightedDominant) + }; +} +function deriveMetadataSurface(result, metadataScope, requestedMetaTypes, allowScopeRanking) { if (!result || result.error || result.rows.length <= 0) { return null; } @@ -959,7 +1052,15 @@ function deriveMetadataSurface(result, metadataScope, requestedMetaTypes) { const selectedEntitySetRouteFamily = grounding.selectedEntitySet ? metadataRouteFamilyForEntitySetRelaxed(grounding.selectedEntitySet) : null; - const scoredRouteFamily = selectedEntitySetRouteFamily === null ? selectMetadataRouteFamilyFromSurfaceScores(surfaceFamilyScores) : null; + const scoredRouteSelection = selectedEntitySetRouteFamily === null + ? selectMetadataRouteFamilyFromSurfaceScores({ + matchedObjects, + metadataScope, + countScores: surfaceFamilyScores, + allowScopeRanking + }) + : { routeFamily: null, rankingApplied: false }; + const scoredRouteFamily = scoredRouteSelection.routeFamily; const downstreamRouteFamily = selectedEntitySetRouteFamily ?? scoredRouteFamily; const routeFamilySelectionBasis = selectedEntitySetRouteFamily ? "selected_entity_set" @@ -967,8 +1068,8 @@ function deriveMetadataSurface(result, metadataScope, requestedMetaTypes) { ? "dominant_surface_objects" : null; const selectedSurfaceObjects = grounding.selectedEntitySet !== null - ? metadataObjectsForEntitySet(grounding.selectedEntitySet, matchedObjects) - : metadataObjectsForRouteFamily(downstreamRouteFamily, matchedObjects); + ? sortMetadataObjectsByRelevance(metadataObjectsForEntitySet(grounding.selectedEntitySet, matchedObjects), metadataScope) + : metadataObjectsForRouteFamily(downstreamRouteFamily, matchedObjects, metadataScope); const knownLimitations = []; const ambiguityRemainsUnresolved = grounding.ambiguityDetected && !downstreamRouteFamily; if (ambiguityRemainsUnresolved && grounding.ambiguityEntitySets.length > 0) { @@ -991,6 +1092,7 @@ function deriveMetadataSurface(result, metadataScope, requestedMetaTypes) { recommended_next_primitive: metadataNextPrimitiveForRouteFamily(downstreamRouteFamily), ambiguity_detected: ambiguityRemainsUnresolved, ambiguity_entity_sets: ambiguityRemainsUnresolved ? grounding.ambiguityEntitySets : [], + surface_object_ranking_applied: scoredRouteSelection.rankingApplied, available_fields: metadataAvailableFields(result.rows), known_limitations: knownLimitations, inference_basis: "confirmed_1c_metadata_surface_rows" @@ -1823,12 +1925,15 @@ async function executeAssistantMcpDiscoveryPilot(planner, deps = DEFAULT_DEPS) { } } const sourceRowsSummary = metadataResult ? summarizeMetadataRows(metadataResult) : null; - const derivedMetadataSurface = deriveMetadataSurface(metadataResult, metadataScope, requestedMetaTypes); + const derivedMetadataSurface = deriveMetadataSurface(metadataResult, metadataScope, requestedMetaTypes, metadataScopeRankingAllowedForPlanner(planner)); if (derivedMetadataSurface) { pushReason(reasonCodes, "pilot_derived_metadata_surface_from_confirmed_rows"); if (derivedMetadataSurface.route_family_selection_basis === "dominant_surface_objects") { pushReason(reasonCodes, "pilot_selected_metadata_route_family_from_dominant_surface_objects"); } + if (derivedMetadataSurface.surface_object_ranking_applied) { + pushReason(reasonCodes, "pilot_selected_metadata_route_family_from_surface_object_ranking"); + } } const evidence = (0, assistantMcpDiscoveryPolicy_1.resolveAssistantMcpDiscoveryEvidence)({ plan: planner.discovery_plan, diff --git a/llm_normalizer/backend/src/services/assistantMcpDiscoveryPilotExecutor.ts b/llm_normalizer/backend/src/services/assistantMcpDiscoveryPilotExecutor.ts index 148e29d..3e4434e 100644 --- a/llm_normalizer/backend/src/services/assistantMcpDiscoveryPilotExecutor.ts +++ b/llm_normalizer/backend/src/services/assistantMcpDiscoveryPilotExecutor.ts @@ -159,6 +159,7 @@ export interface AssistantMcpDiscoveryDerivedMetadataSurface { recommended_next_primitive: "query_documents" | "query_movements" | "drilldown_related_objects" | null; ambiguity_detected: boolean; ambiguity_entity_sets: string[]; + surface_object_ranking_applied?: boolean; available_fields: string[]; known_limitations: string[]; inference_basis: "confirmed_1c_metadata_surface_rows"; @@ -688,6 +689,11 @@ function metadataTypesForPlanner(planner: AssistantMcpDiscoveryPlannerContract): return ["Документ", "РегистрНакопления", "РегистрСведений", "Справочник"]; } +function metadataScopeRankingAllowedForPlanner(planner: AssistantMcpDiscoveryPlannerContract): boolean { + const action = String(planner.discovery_plan.turn_meaning_ref?.asked_action_family ?? "").toLowerCase().trim(); + return action === "inspect_surface"; +} + interface ValueFlowPilotProfile { scope: Extract< AssistantMcpDiscoveryPilotScope, @@ -1310,20 +1316,86 @@ function metadataSurfaceFamilyScores( return scores; } +function normalizeMetadataObjectRankingToken(value: string): string { + return String(value ?? "") + .toLowerCase() + .replace(/[^\p{L}\p{N}]+/gu, ""); +} + +function metadataScopeRankingTokens(metadataScope: string | null): string[] { + const scope = String(metadataScope ?? "").trim(); + if (!scope) { + return []; + } + const condensed = normalizeMetadataObjectRankingToken(scope); + const result: string[] = []; + if (condensed.length >= 2) { + pushUnique(result, condensed); + } + for (const token of scope.toLowerCase().split(/[^\p{L}\p{N}]+/gu)) { + const normalized = normalizeMetadataObjectRankingToken(token); + if (normalized.length >= 2) { + pushUnique(result, normalized); + } + } + return result; +} + +function metadataObjectRelevanceScore(metadataScope: string | null, objectName: string): number { + const objectToken = normalizeMetadataObjectRankingToken(objectName); + if (!objectToken) { + return 1; + } + let score = 1; + for (const token of metadataScopeRankingTokens(metadataScope)) { + if (objectToken.includes(token)) { + score += token.length >= 6 ? 4 : 3; + } + } + return score; +} + +function metadataWeightedSurfaceFamilyScores( + matchedObjects: string[], + metadataScope: string | null +): AssistantMcpDiscoveryDerivedMetadataSurface["surface_family_scores"] { + const scores = emptyMetadataSurfaceFamilyScores(); + for (const objectName of matchedObjects) { + const entitySet = inferMetadataEntitySetFromObjectName(objectName); + const routeFamily = entitySet ? metadataRouteFamilyForEntitySetRelaxed(entitySet) : null; + if (routeFamily) { + scores[routeFamily] += metadataObjectRelevanceScore(metadataScope, objectName); + } + } + return scores; +} + +function sortMetadataObjectsByRelevance(matchedObjects: string[], metadataScope: string | null): string[] { + return [...matchedObjects].sort((left, right) => { + const scoreDelta = metadataObjectRelevanceScore(metadataScope, right) - metadataObjectRelevanceScore(metadataScope, left); + if (scoreDelta !== 0) { + return scoreDelta; + } + return left.localeCompare(right, "ru"); + }); +} + function metadataObjectsForRouteFamily( routeFamily: "document_evidence" | "movement_evidence" | "catalog_drilldown" | null, - matchedObjects: string[] + matchedObjects: string[], + metadataScope: string | null ): string[] { if (!routeFamily) { return []; } - return matchedObjects.filter((objectName) => { + const filtered = matchedObjects.filter((objectName) => { const entitySet = inferMetadataEntitySetFromObjectName(objectName); return entitySet ? metadataRouteFamilyForEntitySetRelaxed(entitySet) === routeFamily : false; }); + return sortMetadataObjectsByRelevance(filtered, metadataScope); } -function selectMetadataRouteFamilyFromSurfaceScores( +function selectDominantMetadataRouteFamilyFromScores( scores: AssistantMcpDiscoveryDerivedMetadataSurface["surface_family_scores"] ): "document_evidence" | "movement_evidence" | "catalog_drilldown" | null { const ranked = (Object.entries(scores) as Array< @@ -1345,10 +1417,54 @@ function selectMetadataRouteFamilyFromSurfaceScores( return clearlyDominant ? top[0] : null; } +function selectMetadataRouteFamilyFromSurfaceScores(input: { + matchedObjects: string[]; + metadataScope: string | null; + countScores: AssistantMcpDiscoveryDerivedMetadataSurface["surface_family_scores"]; + allowScopeRanking: boolean; +}): { + routeFamily: "document_evidence" | "movement_evidence" | "catalog_drilldown" | null; + rankingApplied: boolean; +} { + const countDominant = selectDominantMetadataRouteFamilyFromScores(input.countScores); + if (countDominant) { + return { + routeFamily: countDominant, + rankingApplied: false + }; + } + if (!input.allowScopeRanking) { + return { + routeFamily: null, + rankingApplied: false + }; + } + const rankedCounts = (Object.entries(input.countScores) as Array< + ["document_evidence" | "movement_evidence" | "catalog_drilldown", number] + >) + .filter(([, score]) => score > 0) + .sort((left, right) => right[1] - left[1]); + const topCount = rankedCounts[0]?.[1] ?? 0; + const secondCount = rankedCounts[1]?.[1] ?? 0; + if (topCount <= 0 || topCount !== secondCount) { + return { + routeFamily: null, + rankingApplied: false + }; + } + const weightedScores = metadataWeightedSurfaceFamilyScores(input.matchedObjects, input.metadataScope); + const weightedDominant = selectDominantMetadataRouteFamilyFromScores(weightedScores); + return { + routeFamily: weightedDominant, + rankingApplied: Boolean(weightedDominant) + }; +} + function deriveMetadataSurface( result: AddressMcpMetadataRowsResult | null, metadataScope: string | null, - requestedMetaTypes: string[] + requestedMetaTypes: string[], + allowScopeRanking: boolean ): AssistantMcpDiscoveryDerivedMetadataSurface | null { if (!result || result.error || result.rows.length <= 0) { return null; @@ -1370,8 +1486,16 @@ function deriveMetadataSurface( const selectedEntitySetRouteFamily = grounding.selectedEntitySet ? metadataRouteFamilyForEntitySetRelaxed(grounding.selectedEntitySet) : null; - const scoredRouteFamily = - selectedEntitySetRouteFamily === null ? selectMetadataRouteFamilyFromSurfaceScores(surfaceFamilyScores) : null; + const scoredRouteSelection = + selectedEntitySetRouteFamily === null + ? selectMetadataRouteFamilyFromSurfaceScores({ + matchedObjects, + metadataScope, + countScores: surfaceFamilyScores, + allowScopeRanking + }) + : { routeFamily: null, rankingApplied: false }; + const scoredRouteFamily = scoredRouteSelection.routeFamily; const downstreamRouteFamily = selectedEntitySetRouteFamily ?? scoredRouteFamily; const routeFamilySelectionBasis = selectedEntitySetRouteFamily ? "selected_entity_set" @@ -1380,8 +1504,8 @@ function deriveMetadataSurface( : null; const selectedSurfaceObjects = grounding.selectedEntitySet !== null - ? metadataObjectsForEntitySet(grounding.selectedEntitySet, matchedObjects) - : metadataObjectsForRouteFamily(downstreamRouteFamily, matchedObjects); + ? sortMetadataObjectsByRelevance(metadataObjectsForEntitySet(grounding.selectedEntitySet, matchedObjects), metadataScope) + : metadataObjectsForRouteFamily(downstreamRouteFamily, matchedObjects, metadataScope); const knownLimitations: string[] = []; const ambiguityRemainsUnresolved = grounding.ambiguityDetected && !downstreamRouteFamily; if (ambiguityRemainsUnresolved && grounding.ambiguityEntitySets.length > 0) { @@ -1408,6 +1532,7 @@ function deriveMetadataSurface( recommended_next_primitive: metadataNextPrimitiveForRouteFamily(downstreamRouteFamily), ambiguity_detected: ambiguityRemainsUnresolved, ambiguity_entity_sets: ambiguityRemainsUnresolved ? grounding.ambiguityEntitySets : [], + surface_object_ranking_applied: scoredRouteSelection.rankingApplied, available_fields: metadataAvailableFields(result.rows), known_limitations: knownLimitations, inference_basis: "confirmed_1c_metadata_surface_rows" @@ -2422,12 +2547,20 @@ export async function executeAssistantMcpDiscoveryPilot( } const sourceRowsSummary = metadataResult ? summarizeMetadataRows(metadataResult) : null; - const derivedMetadataSurface = deriveMetadataSurface(metadataResult, metadataScope, requestedMetaTypes); + const derivedMetadataSurface = deriveMetadataSurface( + metadataResult, + metadataScope, + requestedMetaTypes, + metadataScopeRankingAllowedForPlanner(planner) + ); if (derivedMetadataSurface) { pushReason(reasonCodes, "pilot_derived_metadata_surface_from_confirmed_rows"); if (derivedMetadataSurface.route_family_selection_basis === "dominant_surface_objects") { pushReason(reasonCodes, "pilot_selected_metadata_route_family_from_dominant_surface_objects"); } + if (derivedMetadataSurface.surface_object_ranking_applied) { + pushReason(reasonCodes, "pilot_selected_metadata_route_family_from_surface_object_ranking"); + } } const evidence = resolveAssistantMcpDiscoveryEvidence({ plan: planner.discovery_plan, diff --git a/llm_normalizer/backend/tests/assistantMcpDiscoveryPilotExecutor.test.ts b/llm_normalizer/backend/tests/assistantMcpDiscoveryPilotExecutor.test.ts index 13d3dcd..0020731 100644 --- a/llm_normalizer/backend/tests/assistantMcpDiscoveryPilotExecutor.test.ts +++ b/llm_normalizer/backend/tests/assistantMcpDiscoveryPilotExecutor.test.ts @@ -435,6 +435,50 @@ describe("assistant MCP discovery pilot executor", () => { ); }); + it("can break a weak metadata family tie by ranking surface objects against the requested scope", async () => { + const planner = planAssistantMcpDiscovery({ + turnMeaning: { + asked_domain_family: "metadata", + asked_action_family: "inspect_surface", + explicit_entity_candidates: ["НДС"] + } + }); + const deps = buildMetadataDeps([ + { + FullName: "Document.НДССчетФактура", + MetaType: "Document", + attributes: [{ Name: "Дата" }] + }, + { + FullName: "AccumulationRegister.BankOperations", + MetaType: "AccumulationRegister", + resources: [{ Name: "Amount" }] + } + ]); + + const result = await executeAssistantMcpDiscoveryPilot(planner, deps); + + expect(result.pilot_status).toBe("executed"); + expect(result.derived_metadata_surface).toMatchObject({ + metadata_scope: "НДС", + available_entity_sets: ["Document", "AccumulationRegister"], + selected_entity_set: null, + selected_surface_objects: ["Document.НДССчетФактура"], + surface_family_scores: { + document_evidence: 1, + movement_evidence: 1, + catalog_drilldown: 0 + }, + downstream_route_family: "document_evidence", + route_family_selection_basis: "dominant_surface_objects", + recommended_next_primitive: "query_documents", + ambiguity_detected: false, + ambiguity_entity_sets: [], + surface_object_ranking_applied: true + }); + expect(result.reason_codes).toContain("pilot_selected_metadata_route_family_from_surface_object_ranking"); + }); + it("keeps metadata ambiguity unresolved when surface-family scores are nearly tied", async () => { const planner = planAssistantMcpDiscovery({ turnMeaning: {