ARCH: ранжировать metadata surface objects только для честного tie-break

This commit is contained in:
dctouch 2026-04-23 10:34:56 +03:00
parent 3a89cca6bb
commit c2392f6420
3 changed files with 299 additions and 17 deletions

View File

@ -409,6 +409,10 @@ function metadataTypesForPlanner(planner) {
}
return ["Документ", "РегистрНакопления", "РегистрСведений", "Справочник"];
}
function metadataScopeRankingAllowedForPlanner(planner) {
const action = String(planner.discovery_plan.turn_meaning_ref?.asked_action_family ?? "").toLowerCase().trim();
return action === "inspect_surface";
}
function valueFlowPilotProfile(planner) {
const meaning = planner.discovery_plan.turn_meaning_ref;
const action = String(meaning?.asked_action_family ?? "").toLowerCase();
@ -912,16 +916,73 @@ function metadataSurfaceFamilyScores(matchedObjects) {
}
return scores;
}
function metadataObjectsForRouteFamily(routeFamily, matchedObjects) {
function normalizeMetadataObjectRankingToken(value) {
return String(value ?? "")
.toLowerCase()
.replace(/[^\p{L}\p{N}]+/gu, "");
}
function metadataScopeRankingTokens(metadataScope) {
const scope = String(metadataScope ?? "").trim();
if (!scope) {
return [];
}
const condensed = normalizeMetadataObjectRankingToken(scope);
const result = [];
if (condensed.length >= 2) {
pushUnique(result, condensed);
}
for (const token of scope.toLowerCase().split(/[^\p{L}\p{N}]+/gu)) {
const normalized = normalizeMetadataObjectRankingToken(token);
if (normalized.length >= 2) {
pushUnique(result, normalized);
}
}
return result;
}
function metadataObjectRelevanceScore(metadataScope, objectName) {
const objectToken = normalizeMetadataObjectRankingToken(objectName);
if (!objectToken) {
return 1;
}
let score = 1;
for (const token of metadataScopeRankingTokens(metadataScope)) {
if (objectToken.includes(token)) {
score += token.length >= 6 ? 4 : 3;
}
}
return score;
}
function metadataWeightedSurfaceFamilyScores(matchedObjects, metadataScope) {
const scores = emptyMetadataSurfaceFamilyScores();
for (const objectName of matchedObjects) {
const entitySet = inferMetadataEntitySetFromObjectName(objectName);
const routeFamily = entitySet ? metadataRouteFamilyForEntitySetRelaxed(entitySet) : null;
if (routeFamily) {
scores[routeFamily] += metadataObjectRelevanceScore(metadataScope, objectName);
}
}
return scores;
}
function sortMetadataObjectsByRelevance(matchedObjects, metadataScope) {
return [...matchedObjects].sort((left, right) => {
const scoreDelta = metadataObjectRelevanceScore(metadataScope, right) - metadataObjectRelevanceScore(metadataScope, left);
if (scoreDelta !== 0) {
return scoreDelta;
}
return left.localeCompare(right, "ru");
});
}
function metadataObjectsForRouteFamily(routeFamily, matchedObjects, metadataScope) {
if (!routeFamily) {
return [];
}
return matchedObjects.filter((objectName) => {
const filtered = matchedObjects.filter((objectName) => {
const entitySet = inferMetadataEntitySetFromObjectName(objectName);
return entitySet ? metadataRouteFamilyForEntitySetRelaxed(entitySet) === routeFamily : false;
});
return sortMetadataObjectsByRelevance(filtered, metadataScope);
}
function selectMetadataRouteFamilyFromSurfaceScores(scores) {
function selectDominantMetadataRouteFamilyFromScores(scores) {
const ranked = Object.entries(scores)
.filter(([, score]) => score > 0)
.sort((left, right) => right[1] - left[1]);
@ -938,7 +999,39 @@ function selectMetadataRouteFamilyFromSurfaceScores(scores) {
const clearlyDominant = absoluteMargin >= 2 || relativeRatio >= 1.5;
return clearlyDominant ? top[0] : null;
}
function deriveMetadataSurface(result, metadataScope, requestedMetaTypes) {
function selectMetadataRouteFamilyFromSurfaceScores(input) {
const countDominant = selectDominantMetadataRouteFamilyFromScores(input.countScores);
if (countDominant) {
return {
routeFamily: countDominant,
rankingApplied: false
};
}
if (!input.allowScopeRanking) {
return {
routeFamily: null,
rankingApplied: false
};
}
const rankedCounts = Object.entries(input.countScores)
.filter(([, score]) => score > 0)
.sort((left, right) => right[1] - left[1]);
const topCount = rankedCounts[0]?.[1] ?? 0;
const secondCount = rankedCounts[1]?.[1] ?? 0;
if (topCount <= 0 || topCount !== secondCount) {
return {
routeFamily: null,
rankingApplied: false
};
}
const weightedScores = metadataWeightedSurfaceFamilyScores(input.matchedObjects, input.metadataScope);
const weightedDominant = selectDominantMetadataRouteFamilyFromScores(weightedScores);
return {
routeFamily: weightedDominant,
rankingApplied: Boolean(weightedDominant)
};
}
function deriveMetadataSurface(result, metadataScope, requestedMetaTypes, allowScopeRanking) {
if (!result || result.error || result.rows.length <= 0) {
return null;
}
@ -959,7 +1052,15 @@ function deriveMetadataSurface(result, metadataScope, requestedMetaTypes) {
const selectedEntitySetRouteFamily = grounding.selectedEntitySet
? metadataRouteFamilyForEntitySetRelaxed(grounding.selectedEntitySet)
: null;
const scoredRouteFamily = selectedEntitySetRouteFamily === null ? selectMetadataRouteFamilyFromSurfaceScores(surfaceFamilyScores) : null;
const scoredRouteSelection = selectedEntitySetRouteFamily === null
? selectMetadataRouteFamilyFromSurfaceScores({
matchedObjects,
metadataScope,
countScores: surfaceFamilyScores,
allowScopeRanking
})
: { routeFamily: null, rankingApplied: false };
const scoredRouteFamily = scoredRouteSelection.routeFamily;
const downstreamRouteFamily = selectedEntitySetRouteFamily ?? scoredRouteFamily;
const routeFamilySelectionBasis = selectedEntitySetRouteFamily
? "selected_entity_set"
@ -967,8 +1068,8 @@ function deriveMetadataSurface(result, metadataScope, requestedMetaTypes) {
? "dominant_surface_objects"
: null;
const selectedSurfaceObjects = grounding.selectedEntitySet !== null
? metadataObjectsForEntitySet(grounding.selectedEntitySet, matchedObjects)
: metadataObjectsForRouteFamily(downstreamRouteFamily, matchedObjects);
? sortMetadataObjectsByRelevance(metadataObjectsForEntitySet(grounding.selectedEntitySet, matchedObjects), metadataScope)
: metadataObjectsForRouteFamily(downstreamRouteFamily, matchedObjects, metadataScope);
const knownLimitations = [];
const ambiguityRemainsUnresolved = grounding.ambiguityDetected && !downstreamRouteFamily;
if (ambiguityRemainsUnresolved && grounding.ambiguityEntitySets.length > 0) {
@ -991,6 +1092,7 @@ function deriveMetadataSurface(result, metadataScope, requestedMetaTypes) {
recommended_next_primitive: metadataNextPrimitiveForRouteFamily(downstreamRouteFamily),
ambiguity_detected: ambiguityRemainsUnresolved,
ambiguity_entity_sets: ambiguityRemainsUnresolved ? grounding.ambiguityEntitySets : [],
surface_object_ranking_applied: scoredRouteSelection.rankingApplied,
available_fields: metadataAvailableFields(result.rows),
known_limitations: knownLimitations,
inference_basis: "confirmed_1c_metadata_surface_rows"
@ -1823,12 +1925,15 @@ async function executeAssistantMcpDiscoveryPilot(planner, deps = DEFAULT_DEPS) {
}
}
const sourceRowsSummary = metadataResult ? summarizeMetadataRows(metadataResult) : null;
const derivedMetadataSurface = deriveMetadataSurface(metadataResult, metadataScope, requestedMetaTypes);
const derivedMetadataSurface = deriveMetadataSurface(metadataResult, metadataScope, requestedMetaTypes, metadataScopeRankingAllowedForPlanner(planner));
if (derivedMetadataSurface) {
pushReason(reasonCodes, "pilot_derived_metadata_surface_from_confirmed_rows");
if (derivedMetadataSurface.route_family_selection_basis === "dominant_surface_objects") {
pushReason(reasonCodes, "pilot_selected_metadata_route_family_from_dominant_surface_objects");
}
if (derivedMetadataSurface.surface_object_ranking_applied) {
pushReason(reasonCodes, "pilot_selected_metadata_route_family_from_surface_object_ranking");
}
}
const evidence = (0, assistantMcpDiscoveryPolicy_1.resolveAssistantMcpDiscoveryEvidence)({
plan: planner.discovery_plan,

View File

@ -159,6 +159,7 @@ export interface AssistantMcpDiscoveryDerivedMetadataSurface {
recommended_next_primitive: "query_documents" | "query_movements" | "drilldown_related_objects" | null;
ambiguity_detected: boolean;
ambiguity_entity_sets: string[];
surface_object_ranking_applied?: boolean;
available_fields: string[];
known_limitations: string[];
inference_basis: "confirmed_1c_metadata_surface_rows";
@ -688,6 +689,11 @@ function metadataTypesForPlanner(planner: AssistantMcpDiscoveryPlannerContract):
return ["Документ", "РегистрНакопления", "РегистрСведений", "Справочник"];
}
function metadataScopeRankingAllowedForPlanner(planner: AssistantMcpDiscoveryPlannerContract): boolean {
const action = String(planner.discovery_plan.turn_meaning_ref?.asked_action_family ?? "").toLowerCase().trim();
return action === "inspect_surface";
}
interface ValueFlowPilotProfile {
scope: Extract<
AssistantMcpDiscoveryPilotScope,
@ -1310,20 +1316,86 @@ function metadataSurfaceFamilyScores(
return scores;
}
function normalizeMetadataObjectRankingToken(value: string): string {
return String(value ?? "")
.toLowerCase()
.replace(/[^\p{L}\p{N}]+/gu, "");
}
function metadataScopeRankingTokens(metadataScope: string | null): string[] {
const scope = String(metadataScope ?? "").trim();
if (!scope) {
return [];
}
const condensed = normalizeMetadataObjectRankingToken(scope);
const result: string[] = [];
if (condensed.length >= 2) {
pushUnique(result, condensed);
}
for (const token of scope.toLowerCase().split(/[^\p{L}\p{N}]+/gu)) {
const normalized = normalizeMetadataObjectRankingToken(token);
if (normalized.length >= 2) {
pushUnique(result, normalized);
}
}
return result;
}
function metadataObjectRelevanceScore(metadataScope: string | null, objectName: string): number {
const objectToken = normalizeMetadataObjectRankingToken(objectName);
if (!objectToken) {
return 1;
}
let score = 1;
for (const token of metadataScopeRankingTokens(metadataScope)) {
if (objectToken.includes(token)) {
score += token.length >= 6 ? 4 : 3;
}
}
return score;
}
function metadataWeightedSurfaceFamilyScores(
matchedObjects: string[],
metadataScope: string | null
): AssistantMcpDiscoveryDerivedMetadataSurface["surface_family_scores"] {
const scores = emptyMetadataSurfaceFamilyScores();
for (const objectName of matchedObjects) {
const entitySet = inferMetadataEntitySetFromObjectName(objectName);
const routeFamily = entitySet ? metadataRouteFamilyForEntitySetRelaxed(entitySet) : null;
if (routeFamily) {
scores[routeFamily] += metadataObjectRelevanceScore(metadataScope, objectName);
}
}
return scores;
}
function sortMetadataObjectsByRelevance(matchedObjects: string[], metadataScope: string | null): string[] {
return [...matchedObjects].sort((left, right) => {
const scoreDelta = metadataObjectRelevanceScore(metadataScope, right) - metadataObjectRelevanceScore(metadataScope, left);
if (scoreDelta !== 0) {
return scoreDelta;
}
return left.localeCompare(right, "ru");
});
}
function metadataObjectsForRouteFamily(
routeFamily: "document_evidence" | "movement_evidence" | "catalog_drilldown" | null,
matchedObjects: string[]
matchedObjects: string[],
metadataScope: string | null
): string[] {
if (!routeFamily) {
return [];
}
return matchedObjects.filter((objectName) => {
const filtered = matchedObjects.filter((objectName) => {
const entitySet = inferMetadataEntitySetFromObjectName(objectName);
return entitySet ? metadataRouteFamilyForEntitySetRelaxed(entitySet) === routeFamily : false;
});
return sortMetadataObjectsByRelevance(filtered, metadataScope);
}
function selectMetadataRouteFamilyFromSurfaceScores(
function selectDominantMetadataRouteFamilyFromScores(
scores: AssistantMcpDiscoveryDerivedMetadataSurface["surface_family_scores"]
): "document_evidence" | "movement_evidence" | "catalog_drilldown" | null {
const ranked = (Object.entries(scores) as Array<
@ -1345,10 +1417,54 @@ function selectMetadataRouteFamilyFromSurfaceScores(
return clearlyDominant ? top[0] : null;
}
function selectMetadataRouteFamilyFromSurfaceScores(input: {
matchedObjects: string[];
metadataScope: string | null;
countScores: AssistantMcpDiscoveryDerivedMetadataSurface["surface_family_scores"];
allowScopeRanking: boolean;
}): {
routeFamily: "document_evidence" | "movement_evidence" | "catalog_drilldown" | null;
rankingApplied: boolean;
} {
const countDominant = selectDominantMetadataRouteFamilyFromScores(input.countScores);
if (countDominant) {
return {
routeFamily: countDominant,
rankingApplied: false
};
}
if (!input.allowScopeRanking) {
return {
routeFamily: null,
rankingApplied: false
};
}
const rankedCounts = (Object.entries(input.countScores) as Array<
["document_evidence" | "movement_evidence" | "catalog_drilldown", number]
>)
.filter(([, score]) => score > 0)
.sort((left, right) => right[1] - left[1]);
const topCount = rankedCounts[0]?.[1] ?? 0;
const secondCount = rankedCounts[1]?.[1] ?? 0;
if (topCount <= 0 || topCount !== secondCount) {
return {
routeFamily: null,
rankingApplied: false
};
}
const weightedScores = metadataWeightedSurfaceFamilyScores(input.matchedObjects, input.metadataScope);
const weightedDominant = selectDominantMetadataRouteFamilyFromScores(weightedScores);
return {
routeFamily: weightedDominant,
rankingApplied: Boolean(weightedDominant)
};
}
function deriveMetadataSurface(
result: AddressMcpMetadataRowsResult | null,
metadataScope: string | null,
requestedMetaTypes: string[]
requestedMetaTypes: string[],
allowScopeRanking: boolean
): AssistantMcpDiscoveryDerivedMetadataSurface | null {
if (!result || result.error || result.rows.length <= 0) {
return null;
@ -1370,8 +1486,16 @@ function deriveMetadataSurface(
const selectedEntitySetRouteFamily = grounding.selectedEntitySet
? metadataRouteFamilyForEntitySetRelaxed(grounding.selectedEntitySet)
: null;
const scoredRouteFamily =
selectedEntitySetRouteFamily === null ? selectMetadataRouteFamilyFromSurfaceScores(surfaceFamilyScores) : null;
const scoredRouteSelection =
selectedEntitySetRouteFamily === null
? selectMetadataRouteFamilyFromSurfaceScores({
matchedObjects,
metadataScope,
countScores: surfaceFamilyScores,
allowScopeRanking
})
: { routeFamily: null, rankingApplied: false };
const scoredRouteFamily = scoredRouteSelection.routeFamily;
const downstreamRouteFamily = selectedEntitySetRouteFamily ?? scoredRouteFamily;
const routeFamilySelectionBasis = selectedEntitySetRouteFamily
? "selected_entity_set"
@ -1380,8 +1504,8 @@ function deriveMetadataSurface(
: null;
const selectedSurfaceObjects =
grounding.selectedEntitySet !== null
? metadataObjectsForEntitySet(grounding.selectedEntitySet, matchedObjects)
: metadataObjectsForRouteFamily(downstreamRouteFamily, matchedObjects);
? sortMetadataObjectsByRelevance(metadataObjectsForEntitySet(grounding.selectedEntitySet, matchedObjects), metadataScope)
: metadataObjectsForRouteFamily(downstreamRouteFamily, matchedObjects, metadataScope);
const knownLimitations: string[] = [];
const ambiguityRemainsUnresolved = grounding.ambiguityDetected && !downstreamRouteFamily;
if (ambiguityRemainsUnresolved && grounding.ambiguityEntitySets.length > 0) {
@ -1408,6 +1532,7 @@ function deriveMetadataSurface(
recommended_next_primitive: metadataNextPrimitiveForRouteFamily(downstreamRouteFamily),
ambiguity_detected: ambiguityRemainsUnresolved,
ambiguity_entity_sets: ambiguityRemainsUnresolved ? grounding.ambiguityEntitySets : [],
surface_object_ranking_applied: scoredRouteSelection.rankingApplied,
available_fields: metadataAvailableFields(result.rows),
known_limitations: knownLimitations,
inference_basis: "confirmed_1c_metadata_surface_rows"
@ -2422,12 +2547,20 @@ export async function executeAssistantMcpDiscoveryPilot(
}
const sourceRowsSummary = metadataResult ? summarizeMetadataRows(metadataResult) : null;
const derivedMetadataSurface = deriveMetadataSurface(metadataResult, metadataScope, requestedMetaTypes);
const derivedMetadataSurface = deriveMetadataSurface(
metadataResult,
metadataScope,
requestedMetaTypes,
metadataScopeRankingAllowedForPlanner(planner)
);
if (derivedMetadataSurface) {
pushReason(reasonCodes, "pilot_derived_metadata_surface_from_confirmed_rows");
if (derivedMetadataSurface.route_family_selection_basis === "dominant_surface_objects") {
pushReason(reasonCodes, "pilot_selected_metadata_route_family_from_dominant_surface_objects");
}
if (derivedMetadataSurface.surface_object_ranking_applied) {
pushReason(reasonCodes, "pilot_selected_metadata_route_family_from_surface_object_ranking");
}
}
const evidence = resolveAssistantMcpDiscoveryEvidence({
plan: planner.discovery_plan,

View File

@ -435,6 +435,50 @@ describe("assistant MCP discovery pilot executor", () => {
);
});
it("can break a weak metadata family tie by ranking surface objects against the requested scope", async () => {
const planner = planAssistantMcpDiscovery({
turnMeaning: {
asked_domain_family: "metadata",
asked_action_family: "inspect_surface",
explicit_entity_candidates: ["НДС"]
}
});
const deps = buildMetadataDeps([
{
FullName: "Document.НДССчетФактура",
MetaType: "Document",
attributes: [{ Name: "Дата" }]
},
{
FullName: "AccumulationRegister.BankOperations",
MetaType: "AccumulationRegister",
resources: [{ Name: "Amount" }]
}
]);
const result = await executeAssistantMcpDiscoveryPilot(planner, deps);
expect(result.pilot_status).toBe("executed");
expect(result.derived_metadata_surface).toMatchObject({
metadata_scope: "НДС",
available_entity_sets: ["Document", "AccumulationRegister"],
selected_entity_set: null,
selected_surface_objects: ["Document.НДССчетФактура"],
surface_family_scores: {
document_evidence: 1,
movement_evidence: 1,
catalog_drilldown: 0
},
downstream_route_family: "document_evidence",
route_family_selection_basis: "dominant_surface_objects",
recommended_next_primitive: "query_documents",
ambiguity_detected: false,
ambiguity_entity_sets: [],
surface_object_ranking_applied: true
});
expect(result.reason_codes).toContain("pilot_selected_metadata_route_family_from_surface_object_ranking");
});
it("keeps metadata ambiguity unresolved when surface-family scores are nearly tied", async () => {
const planner = planAssistantMcpDiscovery({
turnMeaning: {