From 98988fa6357ff84f2d1cf2413cce5a724541c7dc Mon Sep 17 00:00:00 2001 From: dctouch Date: Mon, 20 Apr 2026 09:10:11 +0300 Subject: [PATCH] =?UTF-8?q?ARCH:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2=D0=B8?= =?UTF-8?q?=D1=82=D1=8C=20planner=20MCP=20semantic=20discovery?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...alog_authority_recovery_plan_2026-04-19.md | 22 ++ .../services/assistantMcpDiscoveryPlanner.js | 161 +++++++++++++ .../services/assistantMcpDiscoveryPlanner.ts | 215 ++++++++++++++++++ .../assistantMcpDiscoveryPlanner.test.ts | 98 ++++++++ 4 files changed, 496 insertions(+) create mode 100644 llm_normalizer/backend/dist/services/assistantMcpDiscoveryPlanner.js create mode 100644 llm_normalizer/backend/src/services/assistantMcpDiscoveryPlanner.ts create mode 100644 llm_normalizer/backend/tests/assistantMcpDiscoveryPlanner.test.ts diff --git a/docs/ARCH/11 - architecture_turnaround/14 - semantic_dialog_authority_recovery_plan_2026-04-19.md b/docs/ARCH/11 - architecture_turnaround/14 - semantic_dialog_authority_recovery_plan_2026-04-19.md index 6717607..f79b6c0 100644 --- a/docs/ARCH/11 - architecture_turnaround/14 - semantic_dialog_authority_recovery_plan_2026-04-19.md +++ b/docs/ARCH/11 - architecture_turnaround/14 - semantic_dialog_authority_recovery_plan_2026-04-19.md @@ -711,6 +711,28 @@ Validation: - `npm test -- assistantMcpDiscoveryPolicy.test.ts assistantMcpCatalogIndex.test.ts` passed 11/11; - `npm run build` passed. +## Progress Update - 2026-04-20 MCP Discovery Planner Seed + +The third implementation slice of Big Block 5 added a standalone planner over the discovery policy and catalog index: + +- `assistantMcpDiscoveryPlanner.ts` +- `assistantMcpDiscoveryPlanner.test.ts` + +The planner still does not execute live MCP calls. + +It converts current-turn meaning into a guarded discovery plan and immediately reviews it against catalog constraints: + +- value-flow questions choose entity resolution, movement query, aggregation, and coverage probe; +- document questions choose entity resolution, document query, and coverage probe; +- lifecycle/activity-duration questions choose document evidence plus explicit evidence-basis explanation; +- metadata questions stay in metadata inspection; +- unclassified turns remain in clarification state rather than executing a blind query. + +Validation: + +- `npm test -- assistantMcpDiscoveryPolicy.test.ts assistantMcpCatalogIndex.test.ts assistantMcpDiscoveryPlanner.test.ts` passed 17/17; +- `npm run build` passed. + ## Execution Rule Do not implement this plan as: diff --git a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPlanner.js b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPlanner.js new file mode 100644 index 0000000..c1c42db --- /dev/null +++ b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPlanner.js @@ -0,0 +1,161 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.ASSISTANT_MCP_DISCOVERY_PLANNER_SCHEMA_VERSION = void 0; +exports.planAssistantMcpDiscovery = planAssistantMcpDiscovery; +const assistantMcpDiscoveryPolicy_1 = require("./assistantMcpDiscoveryPolicy"); +const assistantMcpCatalogIndex_1 = require("./assistantMcpCatalogIndex"); +exports.ASSISTANT_MCP_DISCOVERY_PLANNER_SCHEMA_VERSION = "assistant_mcp_discovery_planner_v1"; +function toNonEmptyString(value) { + if (value === null || value === undefined) { + return null; + } + const text = String(value).trim(); + return text.length > 0 ? text : null; +} +function lower(value) { + return String(value ?? "").trim().toLowerCase(); +} +function normalizeReasonCode(value) { + const normalized = value + .trim() + .replace(/[^\p{L}\p{N}_.:-]+/gu, "_") + .replace(/^_+|_+$/g, "") + .toLowerCase(); + return normalized.length > 0 ? normalized.slice(0, 120) : null; +} +function pushReason(target, value) { + const normalized = normalizeReasonCode(value); + if (normalized && !target.includes(normalized)) { + target.push(normalized); + } +} +function pushUnique(target, value) { + const text = value.trim(); + if (text && !target.includes(text)) { + target.push(text); + } +} +function hasEntity(meaning) { + return (meaning?.explicit_entity_candidates?.length ?? 0) > 0; +} +function addScopeAxes(axes, meaning) { + if (hasEntity(meaning)) { + pushUnique(axes, "counterparty"); + } + if (toNonEmptyString(meaning?.explicit_organization_scope)) { + pushUnique(axes, "organization"); + } + if (toNonEmptyString(meaning?.explicit_date_scope)) { + pushUnique(axes, "period"); + } +} +function includesAny(text, tokens) { + return tokens.some((token) => text.includes(token)); +} +function recipeFor(input) { + const meaning = input.turnMeaning ?? null; + const domain = lower(meaning?.asked_domain_family); + const action = lower(meaning?.asked_action_family); + const unsupported = lower(meaning?.unsupported_but_understood_family); + const combined = `${domain} ${action} ${unsupported}`.trim(); + const axes = []; + addScopeAxes(axes, meaning); + if (includesAny(combined, ["turnover", "revenue", "payment", "payout", "value"])) { + pushUnique(axes, "aggregate_axis"); + pushUnique(axes, "amount"); + pushUnique(axes, "coverage_target"); + return { + semanticDataNeed: "counterparty value-flow evidence", + primitives: ["resolve_entity_reference", "query_movements", "aggregate_by_axis", "probe_coverage"], + axes, + reason: "planner_selected_value_flow_recipe" + }; + } + if (includesAny(combined, ["document", "documents"])) { + pushUnique(axes, "coverage_target"); + return { + semanticDataNeed: "document evidence", + primitives: ["resolve_entity_reference", "query_documents", "probe_coverage"], + axes, + reason: "planner_selected_document_recipe" + }; + } + if (includesAny(combined, ["lifecycle", "activity", "duration", "age"])) { + pushUnique(axes, "document_date"); + pushUnique(axes, "coverage_target"); + pushUnique(axes, "evidence_basis"); + return { + semanticDataNeed: "counterparty lifecycle evidence", + primitives: ["resolve_entity_reference", "query_documents", "probe_coverage", "explain_evidence_basis"], + axes, + reason: "planner_selected_lifecycle_recipe" + }; + } + if (includesAny(combined, ["metadata", "schema", "catalog"])) { + pushUnique(axes, "metadata_scope"); + return { + semanticDataNeed: "1C metadata evidence", + primitives: ["inspect_1c_metadata"], + axes, + reason: "planner_selected_metadata_recipe" + }; + } + if (hasEntity(meaning)) { + pushUnique(axes, "business_entity"); + return { + semanticDataNeed: "entity discovery evidence", + primitives: ["search_business_entity", "resolve_entity_reference", "probe_coverage"], + axes, + reason: "planner_selected_entity_resolution_recipe" + }; + } + return { + semanticDataNeed: "unclassified 1C discovery need", + primitives: ["inspect_1c_metadata"], + axes, + reason: "planner_selected_clarification_recipe" + }; +} +function statusFrom(plan, review) { + if (plan.plan_status === "blocked" || review.review_status === "catalog_blocked") { + return "blocked"; + } + if (plan.plan_status !== "allowed" || review.review_status !== "catalog_compatible") { + return "needs_clarification"; + } + return "ready_for_execution"; +} +function planAssistantMcpDiscovery(input) { + const recipe = recipeFor(input); + const semanticDataNeed = toNonEmptyString(input.semanticDataNeed) ?? recipe.semanticDataNeed; + const reasonCodes = []; + pushReason(reasonCodes, recipe.reason); + const plan = (0, assistantMcpDiscoveryPolicy_1.buildAssistantMcpDiscoveryPlan)({ + semanticDataNeed, + turnMeaning: input.turnMeaning, + proposedPrimitives: recipe.primitives, + requiredAxes: recipe.axes + }); + const review = (0, assistantMcpCatalogIndex_1.reviewAssistantMcpDiscoveryPlanAgainstCatalog)(plan); + const plannerStatus = statusFrom(plan, review); + if (plannerStatus === "ready_for_execution") { + pushReason(reasonCodes, "planner_ready_for_guarded_mcp_execution"); + } + else if (plannerStatus === "blocked") { + pushReason(reasonCodes, "planner_blocked_by_policy_or_catalog"); + } + else { + pushReason(reasonCodes, "planner_needs_more_user_or_scope_context"); + } + return { + schema_version: exports.ASSISTANT_MCP_DISCOVERY_PLANNER_SCHEMA_VERSION, + policy_owner: "assistantMcpDiscoveryPlanner", + planner_status: plannerStatus, + semantic_data_need: semanticDataNeed, + proposed_primitives: recipe.primitives, + required_axes: recipe.axes, + discovery_plan: plan, + catalog_review: review, + reason_codes: reasonCodes + }; +} diff --git a/llm_normalizer/backend/src/services/assistantMcpDiscoveryPlanner.ts b/llm_normalizer/backend/src/services/assistantMcpDiscoveryPlanner.ts new file mode 100644 index 0000000..8f65d4a --- /dev/null +++ b/llm_normalizer/backend/src/services/assistantMcpDiscoveryPlanner.ts @@ -0,0 +1,215 @@ +import { + buildAssistantMcpDiscoveryPlan, + type AssistantMcpDiscoveryPlanContract, + type AssistantMcpDiscoveryPrimitive, + type AssistantMcpDiscoveryTurnMeaningRef +} from "./assistantMcpDiscoveryPolicy"; +import { + reviewAssistantMcpDiscoveryPlanAgainstCatalog, + type AssistantMcpCatalogPlanReview +} from "./assistantMcpCatalogIndex"; + +export const ASSISTANT_MCP_DISCOVERY_PLANNER_SCHEMA_VERSION = "assistant_mcp_discovery_planner_v1" as const; + +export type AssistantMcpDiscoveryPlannerStatus = "ready_for_execution" | "needs_clarification" | "blocked"; + +export interface AssistantMcpDiscoveryPlannerInput { + semanticDataNeed?: string | null; + turnMeaning?: AssistantMcpDiscoveryTurnMeaningRef | null; +} + +export interface AssistantMcpDiscoveryPlannerContract { + schema_version: typeof ASSISTANT_MCP_DISCOVERY_PLANNER_SCHEMA_VERSION; + policy_owner: "assistantMcpDiscoveryPlanner"; + planner_status: AssistantMcpDiscoveryPlannerStatus; + semantic_data_need: string | null; + proposed_primitives: AssistantMcpDiscoveryPrimitive[]; + required_axes: string[]; + discovery_plan: AssistantMcpDiscoveryPlanContract; + catalog_review: AssistantMcpCatalogPlanReview; + reason_codes: string[]; +} + +interface PlannerRecipe { + semanticDataNeed: string; + primitives: AssistantMcpDiscoveryPrimitive[]; + axes: string[]; + reason: string; +} + +function toNonEmptyString(value: unknown): string | null { + if (value === null || value === undefined) { + return null; + } + const text = String(value).trim(); + return text.length > 0 ? text : null; +} + +function lower(value: unknown): string { + return String(value ?? "").trim().toLowerCase(); +} + +function normalizeReasonCode(value: string): string | null { + const normalized = value + .trim() + .replace(/[^\p{L}\p{N}_.:-]+/gu, "_") + .replace(/^_+|_+$/g, "") + .toLowerCase(); + return normalized.length > 0 ? normalized.slice(0, 120) : null; +} + +function pushReason(target: string[], value: string): void { + const normalized = normalizeReasonCode(value); + if (normalized && !target.includes(normalized)) { + target.push(normalized); + } +} + +function pushUnique(target: string[], value: string): void { + const text = value.trim(); + if (text && !target.includes(text)) { + target.push(text); + } +} + +function hasEntity(meaning: AssistantMcpDiscoveryTurnMeaningRef | null | undefined): boolean { + return (meaning?.explicit_entity_candidates?.length ?? 0) > 0; +} + +function addScopeAxes(axes: string[], meaning: AssistantMcpDiscoveryTurnMeaningRef | null | undefined): void { + if (hasEntity(meaning)) { + pushUnique(axes, "counterparty"); + } + if (toNonEmptyString(meaning?.explicit_organization_scope)) { + pushUnique(axes, "organization"); + } + if (toNonEmptyString(meaning?.explicit_date_scope)) { + pushUnique(axes, "period"); + } +} + +function includesAny(text: string, tokens: string[]): boolean { + return tokens.some((token) => text.includes(token)); +} + +function recipeFor(input: AssistantMcpDiscoveryPlannerInput): PlannerRecipe { + const meaning = input.turnMeaning ?? null; + const domain = lower(meaning?.asked_domain_family); + const action = lower(meaning?.asked_action_family); + const unsupported = lower(meaning?.unsupported_but_understood_family); + const combined = `${domain} ${action} ${unsupported}`.trim(); + const axes: string[] = []; + addScopeAxes(axes, meaning); + + if (includesAny(combined, ["turnover", "revenue", "payment", "payout", "value"])) { + pushUnique(axes, "aggregate_axis"); + pushUnique(axes, "amount"); + pushUnique(axes, "coverage_target"); + return { + semanticDataNeed: "counterparty value-flow evidence", + primitives: ["resolve_entity_reference", "query_movements", "aggregate_by_axis", "probe_coverage"], + axes, + reason: "planner_selected_value_flow_recipe" + }; + } + + if (includesAny(combined, ["document", "documents"])) { + pushUnique(axes, "coverage_target"); + return { + semanticDataNeed: "document evidence", + primitives: ["resolve_entity_reference", "query_documents", "probe_coverage"], + axes, + reason: "planner_selected_document_recipe" + }; + } + + if (includesAny(combined, ["lifecycle", "activity", "duration", "age"])) { + pushUnique(axes, "document_date"); + pushUnique(axes, "coverage_target"); + pushUnique(axes, "evidence_basis"); + return { + semanticDataNeed: "counterparty lifecycle evidence", + primitives: ["resolve_entity_reference", "query_documents", "probe_coverage", "explain_evidence_basis"], + axes, + reason: "planner_selected_lifecycle_recipe" + }; + } + + if (includesAny(combined, ["metadata", "schema", "catalog"])) { + pushUnique(axes, "metadata_scope"); + return { + semanticDataNeed: "1C metadata evidence", + primitives: ["inspect_1c_metadata"], + axes, + reason: "planner_selected_metadata_recipe" + }; + } + + if (hasEntity(meaning)) { + pushUnique(axes, "business_entity"); + return { + semanticDataNeed: "entity discovery evidence", + primitives: ["search_business_entity", "resolve_entity_reference", "probe_coverage"], + axes, + reason: "planner_selected_entity_resolution_recipe" + }; + } + + return { + semanticDataNeed: "unclassified 1C discovery need", + primitives: ["inspect_1c_metadata"], + axes, + reason: "planner_selected_clarification_recipe" + }; +} + +function statusFrom( + plan: AssistantMcpDiscoveryPlanContract, + review: AssistantMcpCatalogPlanReview +): AssistantMcpDiscoveryPlannerStatus { + if (plan.plan_status === "blocked" || review.review_status === "catalog_blocked") { + return "blocked"; + } + if (plan.plan_status !== "allowed" || review.review_status !== "catalog_compatible") { + return "needs_clarification"; + } + return "ready_for_execution"; +} + +export function planAssistantMcpDiscovery( + input: AssistantMcpDiscoveryPlannerInput +): AssistantMcpDiscoveryPlannerContract { + const recipe = recipeFor(input); + const semanticDataNeed = toNonEmptyString(input.semanticDataNeed) ?? recipe.semanticDataNeed; + const reasonCodes: string[] = []; + pushReason(reasonCodes, recipe.reason); + + const plan = buildAssistantMcpDiscoveryPlan({ + semanticDataNeed, + turnMeaning: input.turnMeaning, + proposedPrimitives: recipe.primitives, + requiredAxes: recipe.axes + }); + const review = reviewAssistantMcpDiscoveryPlanAgainstCatalog(plan); + const plannerStatus = statusFrom(plan, review); + + if (plannerStatus === "ready_for_execution") { + pushReason(reasonCodes, "planner_ready_for_guarded_mcp_execution"); + } else if (plannerStatus === "blocked") { + pushReason(reasonCodes, "planner_blocked_by_policy_or_catalog"); + } else { + pushReason(reasonCodes, "planner_needs_more_user_or_scope_context"); + } + + return { + schema_version: ASSISTANT_MCP_DISCOVERY_PLANNER_SCHEMA_VERSION, + policy_owner: "assistantMcpDiscoveryPlanner", + planner_status: plannerStatus, + semantic_data_need: semanticDataNeed, + proposed_primitives: recipe.primitives, + required_axes: recipe.axes, + discovery_plan: plan, + catalog_review: review, + reason_codes: reasonCodes + }; +} diff --git a/llm_normalizer/backend/tests/assistantMcpDiscoveryPlanner.test.ts b/llm_normalizer/backend/tests/assistantMcpDiscoveryPlanner.test.ts new file mode 100644 index 0000000..36661ba --- /dev/null +++ b/llm_normalizer/backend/tests/assistantMcpDiscoveryPlanner.test.ts @@ -0,0 +1,98 @@ +import { describe, expect, it } from "vitest"; +import { planAssistantMcpDiscovery } from "../src/services/assistantMcpDiscoveryPlanner"; + +describe("assistant MCP discovery planner", () => { + it("builds a catalog-compatible value-flow discovery plan from current turn meaning", () => { + const result = planAssistantMcpDiscovery({ + turnMeaning: { + asked_domain_family: "counterparty_value", + asked_action_family: "turnover", + explicit_entity_candidates: ["SVK"], + explicit_date_scope: "2020" + } + }); + + expect(result.planner_status).toBe("ready_for_execution"); + expect(result.semantic_data_need).toBe("counterparty value-flow evidence"); + expect(result.proposed_primitives).toEqual([ + "resolve_entity_reference", + "query_movements", + "aggregate_by_axis", + "probe_coverage" + ]); + expect(result.required_axes).toEqual(["counterparty", "period", "aggregate_axis", "amount", "coverage_target"]); + expect(result.catalog_review.review_status).toBe("catalog_compatible"); + expect(result.discovery_plan.answer_may_use_raw_model_claims).toBe(false); + }); + + it("keeps a value-flow plan in clarification state when period axis is missing", () => { + const result = planAssistantMcpDiscovery({ + turnMeaning: { + asked_domain_family: "counterparty_value", + asked_action_family: "turnover", + explicit_entity_candidates: ["SVK"] + } + }); + + expect(result.planner_status).toBe("needs_clarification"); + expect(result.catalog_review.review_status).toBe("needs_more_axes"); + expect(result.catalog_review.missing_axes_by_primitive.query_movements).toContainEqual(["period", "counterparty"]); + expect(result.reason_codes).toContain("planner_needs_more_user_or_scope_context"); + }); + + it("builds a document discovery plan without falling back to movement primitives", () => { + const result = planAssistantMcpDiscovery({ + turnMeaning: { + asked_domain_family: "counterparty_documents", + asked_action_family: "list_documents", + explicit_entity_candidates: ["SVK"] + } + }); + + expect(result.planner_status).toBe("ready_for_execution"); + expect(result.proposed_primitives).toEqual(["resolve_entity_reference", "query_documents", "probe_coverage"]); + expect(result.proposed_primitives).not.toContain("query_movements"); + expect(result.required_axes).toEqual(["counterparty", "coverage_target"]); + }); + + it("builds an inference-safe lifecycle plan with evidence explanation", () => { + const result = planAssistantMcpDiscovery({ + turnMeaning: { + asked_domain_family: "counterparty_lifecycle", + asked_action_family: "activity_duration", + explicit_entity_candidates: ["SVK"] + } + }); + + expect(result.planner_status).toBe("ready_for_execution"); + expect(result.proposed_primitives).toEqual([ + "resolve_entity_reference", + "query_documents", + "probe_coverage", + "explain_evidence_basis" + ]); + expect(result.required_axes).toEqual(["counterparty", "document_date", "coverage_target", "evidence_basis"]); + }); + + it("uses metadata-only planning when the user asks about available schema surface", () => { + const result = planAssistantMcpDiscovery({ + turnMeaning: { + asked_domain_family: "metadata", + asked_action_family: "inspect_catalog" + } + }); + + expect(result.planner_status).toBe("ready_for_execution"); + expect(result.proposed_primitives).toEqual(["inspect_1c_metadata"]); + expect(result.required_axes).toEqual(["metadata_scope"]); + expect(result.catalog_review.evidence_floors.inspect_1c_metadata).toBe("source_summary"); + }); + + it("does not mark an unclassified turn as executable without turn meaning context", () => { + const result = planAssistantMcpDiscovery({}); + + expect(result.planner_status).toBe("needs_clarification"); + expect(result.discovery_plan.plan_status).toBe("needs_clarification"); + expect(result.reason_codes).toContain("planner_needs_more_user_or_scope_context"); + }); +});