From ce48fa83a54693970b0a5f2b6ede7edbe31c9852 Mon Sep 17 00:00:00 2001 From: dctouch Date: Wed, 22 Apr 2026 12:53:48 +0300 Subject: [PATCH] =?UTF-8?q?ARCH:=20=D0=B4=D0=BE=D0=B1=D0=B8=D1=82=D1=8C=20?= =?UTF-8?q?entity-resolution=20chain=20=D0=B8=20=D0=BE=D1=87=D0=B8=D1=81?= =?UTF-8?q?=D1=82=D0=B8=D1=82=D1=8C=20stale=20runtime?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...rness_phase25_entity_resolution_chain.json | 44 ++ .../services/assistantContinuityPolicy.js | 16 +- .../assistantMcpDiscoveryAnswerAdapter.js | 167 +++++- .../assistantMcpDiscoveryPilotExecutor.js | 417 ++++++++++++++- .../services/assistantMcpDiscoveryPlanner.js | 19 + .../services/assistantMcpDiscoveryPolicy.js | 4 + .../assistantMcpDiscoveryTurnInputAdapter.js | 149 +++++- .../assistantMcpDiscoveryAnswerAdapter.ts | 192 ++++++- .../assistantMcpDiscoveryPilotExecutor.ts | 495 +++++++++++++++++- .../services/assistantMcpDiscoveryPlanner.ts | 32 ++ .../assistantMcpDiscoveryTurnInputAdapter.ts | 140 ++++- ...assistantMcpDiscoveryAnswerAdapter.test.ts | 123 +++++ ...assistantMcpDiscoveryPilotExecutor.test.ts | 189 +++++++ .../assistantMcpDiscoveryPlanner.test.ts | 37 ++ ...istantMcpDiscoveryTurnInputAdapter.test.ts | 76 +++ 15 files changed, 2024 insertions(+), 76 deletions(-) create mode 100644 docs/orchestration/address_truth_harness_phase25_entity_resolution_chain.json diff --git a/docs/orchestration/address_truth_harness_phase25_entity_resolution_chain.json b/docs/orchestration/address_truth_harness_phase25_entity_resolution_chain.json new file mode 100644 index 0000000..ac02f0d --- /dev/null +++ b/docs/orchestration/address_truth_harness_phase25_entity_resolution_chain.json @@ -0,0 +1,44 @@ +{ + "schema_version": "domain_truth_harness_spec_v1", + "scenario_id": "address_truth_harness_phase25_entity_resolution_chain", + "domain": "address_phase25_entity_resolution_chain", + "title": "Phase 25 entity-resolution grounding replay", + "description": "Targeted AGENT replay for the first Big Block C slice where the assistant must use MCP discovery to ground a business entity in the checked 1C catalog and answer honestly without pretending that documents, movements, or value-flow evidence were already checked.", + "bindings": {}, + "steps": [ + { + "step_id": "step_01_resolve_counterparty_from_catalog", + "title": "Raw counterparty search wording resolves a grounded 1C entity without leaking downstream business facts", + "question": "найди в 1С контрагента Группа СВК", + "allowed_reply_types": [ + "factual_with_explanation", + "partial_coverage" + ], + "required_answer_patterns_all": [ + "(?i)группа\\s+свк", + "(?i)контрагент", + "(?i)документ|движени|денежн" + ], + "required_answer_patterns_any": [ + "(?i)каталог", + "(?i)1с", + "(?i)наш[её]л", + "(?i)найден" + ], + "forbidden_answer_patterns": [ + "(?i)получили", + "(?i)заплатили", + "(?i)нетто", + "(?i)оборот", + "(?i)выручк", + "(?i)сумм(а|ы)" + ], + "criticality": "critical", + "semantic_tags": [ + "entity_resolution", + "catalog_grounding", + "bounded_autonomy" + ] + } + ] +} diff --git a/llm_normalizer/backend/dist/services/assistantContinuityPolicy.js b/llm_normalizer/backend/dist/services/assistantContinuityPolicy.js index 1242337..431aa5d 100644 --- a/llm_normalizer/backend/dist/services/assistantContinuityPolicy.js +++ b/llm_normalizer/backend/dist/services/assistantContinuityPolicy.js @@ -73,6 +73,13 @@ function readAssistantMcpDiscoveryTurnMeaning(debug) { const turnInput = toRecordObject(entry?.turn_input); return toRecordObject(turnInput?.turn_meaning_ref); } +function readAssistantMcpDiscoveryTurnMeaningMetadataAmbiguityEntitySets(debug, toNonEmptyString = fallbackToNonEmptyString) { + const values = readAssistantMcpDiscoveryTurnMeaning(debug)?.metadata_ambiguity_entity_sets; + if (!Array.isArray(values)) { + return []; + } + return values.map((item) => toNonEmptyString(item)).filter((item) => Boolean(item)); +} function readAssistantMcpDiscoveryActionFamily(debug, toNonEmptyString = fallbackToNonEmptyString) { return toNonEmptyString(readAssistantMcpDiscoveryTurnMeaning(debug)?.asked_action_family); } @@ -96,14 +103,15 @@ function readAssistantMcpDiscoveryMetadataSelectedEntitySet(debug, toNonEmptyStr return toNonEmptyString(readAssistantMcpDiscoveryDerivedMetadataSurface(debug)?.selected_entity_set); } function readAssistantMcpDiscoveryMetadataAmbiguityDetected(debug) { - return readAssistantMcpDiscoveryDerivedMetadataSurface(debug)?.ambiguity_detected === true; + return (readAssistantMcpDiscoveryDerivedMetadataSurface(debug)?.ambiguity_detected === true || + readAssistantMcpDiscoveryTurnMeaningMetadataAmbiguityEntitySets(debug).length > 0); } function readAssistantMcpDiscoveryMetadataAmbiguityEntitySets(debug, toNonEmptyString = fallbackToNonEmptyString) { const values = readAssistantMcpDiscoveryDerivedMetadataSurface(debug)?.ambiguity_entity_sets; - if (!Array.isArray(values)) { - return []; + if (Array.isArray(values)) { + return values.map((item) => toNonEmptyString(item)).filter((item) => Boolean(item)); } - return values.map((item) => toNonEmptyString(item)).filter((item) => Boolean(item)); + return readAssistantMcpDiscoveryTurnMeaningMetadataAmbiguityEntitySets(debug, toNonEmptyString); } function mapAssistantMcpDiscoveryPilotScopeToAddressIntent(pilotScope, actionFamily) { if (pilotScope === "counterparty_lifecycle_query_documents_v1") { diff --git a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryAnswerAdapter.js b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryAnswerAdapter.js index 7c5dae4..07d13c0 100644 --- a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryAnswerAdapter.js +++ b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryAnswerAdapter.js @@ -51,6 +51,11 @@ function modeFor(pilot) { if (pilot.pilot_status === "skipped_needs_clarification") { return "needs_clarification"; } + if (pilot.pilot_scope === "entity_resolution_search_v1" && + (pilot.reason_codes.includes("pilot_entity_resolution_ambiguity_requires_clarification") || + pilot.derived_entity_resolution?.resolution_status === "ambiguous")) { + return "needs_clarification"; + } if (pilot.evidence.answer_permission === "confirmed_answer") { return "confirmed_with_bounded_inference"; } @@ -73,10 +78,91 @@ function isMovementPilot(pilot) { function isMetadataPilot(pilot) { return pilot.pilot_scope === "metadata_inspection_v1"; } +function isEntityResolutionPilot(pilot) { + return pilot.pilot_scope === "entity_resolution_search_v1"; +} function isMetadataLaneChoiceClarification(pilot) { return (pilot.reason_codes.includes("planner_selected_metadata_lane_clarification_recipe") || pilot.dry_run.reason_codes.includes("planner_selected_metadata_lane_clarification_recipe")); } +function askedActionFamily(pilot) { + const action = pilot.evidence.query_plan.turn_meaning_ref?.asked_action_family; + if (typeof action !== "string") { + return null; + } + const normalized = action.trim().toLowerCase(); + return normalized.length > 0 ? normalized : null; +} +function unsupportedFamily(pilot) { + const unsupported = pilot.evidence.query_plan.turn_meaning_ref?.unsupported_but_understood_family; + if (typeof unsupported !== "string") { + return null; + } + const normalized = unsupported.trim().toLowerCase(); + return normalized.length > 0 ? normalized : null; +} +function firstEntityCandidate(pilot) { + const values = Array.isArray(pilot.evidence.query_plan.turn_meaning_ref?.explicit_entity_candidates) + ? pilot.evidence.query_plan.turn_meaning_ref?.explicit_entity_candidates + : []; + for (const value of values) { + const text = String(value ?? "").trim(); + if (text) { + return text; + } + } + return null; +} +function isMovementLaneClarification(pilot) { + return (isMovementPilot(pilot) || + pilot.reason_codes.includes("planner_selected_movement_recipe") || + pilot.dry_run.reason_codes.includes("planner_selected_movement_recipe") || + askedActionFamily(pilot) === "list_movements" || + unsupportedFamily(pilot) === "movement_evidence"); +} +function isDocumentLaneClarification(pilot) { + return (isDocumentPilot(pilot) || + pilot.reason_codes.includes("planner_selected_document_recipe") || + pilot.dry_run.reason_codes.includes("planner_selected_document_recipe") || + askedActionFamily(pilot) === "list_documents" || + unsupportedFamily(pilot) === "document_evidence"); +} +function laneScopeSuffix(pilot) { + const entity = firstEntityCandidate(pilot); + return entity ? ` по "${entity}"` : ""; +} +function dryRunMissingAxis(pilot, axis) { + return pilot.dry_run.execution_steps.some((step) => step.missing_axis_options.some((option) => option.includes(axis))); +} +function clarificationNeedRu(pilot) { + const needsPeriod = dryRunMissingAxis(pilot, "period"); + const needsOrganization = dryRunMissingAxis(pilot, "organization"); + if (needsPeriod && needsOrganization) { + return { subject: "проверяемый период и организацию", verb: "нужно" }; + } + if (needsPeriod) { + return { subject: "проверяемый период", verb: "нужен" }; + } + if (needsOrganization) { + return { subject: "организацию", verb: "нужно" }; + } + return { subject: "контекст проверки", verb: "нужно" }; +} +function clarificationNextStepLine(pilot, laneLabel) { + const needsPeriod = dryRunMissingAxis(pilot, "period"); + const needsOrganization = dryRunMissingAxis(pilot, "organization"); + const scopeSuffix = laneScopeSuffix(pilot); + if (needsPeriod && needsOrganization) { + return `Уточните период и организацию, и я продолжу поиск по ${laneLabel}${scopeSuffix} в 1С.`; + } + if (needsPeriod) { + return `Уточните период, и я продолжу поиск по ${laneLabel}${scopeSuffix} в 1С.`; + } + if (needsOrganization) { + return `Уточните организацию, и я продолжу поиск по ${laneLabel}${scopeSuffix} в 1С.`; + } + return `Уточните контекст проверки, и я продолжу поиск по ${laneLabel}${scopeSuffix} в 1С.`; +} function metadataRouteFamilyLabelRu(routeFamily) { if (routeFamily === "document_evidence") { return "контур документов"; @@ -92,6 +178,17 @@ function metadataRouteFamilyLabelRu(routeFamily) { function headlineFor(mode, pilot) { const askedMonthlyBreakdown = pilot.derived_bidirectional_value_flow?.aggregation_axis === "month" || pilot.derived_value_flow?.aggregation_axis === "month"; + if (isEntityResolutionPilot(pilot) && mode === "confirmed_with_bounded_inference") { + return "По каталогу 1С найден вероятный контрагент; это заземление сущности для следующего шага, а не еще бизнес-ответ по данным."; + } + if (isEntityResolutionPilot(pilot) && mode === "needs_clarification") { + return "По каталогу 1С нашлось несколько похожих контрагентов, и без уточнения нельзя честно выбрать правильную сущность."; + } + if (isEntityResolutionPilot(pilot) && + mode === "checked_sources_only" && + pilot.derived_entity_resolution?.resolution_status === "not_found") { + return "По текущему каталожному поиску 1С точный контрагент пока не подтвержден."; + } if (isMovementPilot(pilot) && mode === "confirmed_with_bounded_inference") { return "РџРѕ данным 1РЎ найдены строки движений; ответ ограничен проверенным периодом Рё найденными строками."; } @@ -134,8 +231,13 @@ function headlineFor(mode, pilot) { if (mode === "needs_clarification" && isMetadataLaneChoiceClarification(pilot)) { return "По подтвержденной metadata-поверхности видно несколько конкурирующих data-lane, и без явного выбора дальше идти нельзя."; } - if (mode === "needs_clarification" && isMetadataLaneChoiceClarification(pilot)) { - return "Уточните, в какой контур идти дальше: по документам или по движениям/регистрам."; + if (mode === "needs_clarification" && isMovementLaneClarification(pilot)) { + const need = clarificationNeedRu(pilot); + return `Могу идти дальше по движениям/регистрам${laneScopeSuffix(pilot)}, но для запуска поиска в 1С ${need.verb} ${need.subject}.`; + } + if (mode === "needs_clarification" && isDocumentLaneClarification(pilot)) { + const need = clarificationNeedRu(pilot); + return `Могу идти дальше по документам${laneScopeSuffix(pilot)}, но для запуска поиска в 1С ${need.verb} ${need.subject}.`; } if (mode === "needs_clarification") { return "Нужно уточнить контекст перед поиском в 1С."; @@ -146,9 +248,26 @@ function headlineFor(mode, pilot) { return "Я проверил доступный контур, но подтвержденного факта для ответа не получил."; } function nextStepFor(mode, pilot) { + if (isEntityResolutionPilot(pilot) && mode === "needs_clarification") { + return "Уточните точное название контрагента или добавьте ИНН, и я продолжу уже по нужной сущности в 1С."; + } + if (isEntityResolutionPilot(pilot) && mode === "confirmed_with_bounded_inference") { + return "Теперь могу продолжить уже по найденному контрагенту и искать документы, движения или денежный поток."; + } + if (isEntityResolutionPilot(pilot) && + mode === "checked_sources_only" && + pilot.derived_entity_resolution?.resolution_status === "not_found") { + return "Дайте точное название или ИНН, и я повторю поиск по каталогу 1С более прицельно."; + } if (mode === "needs_clarification" && isMetadataLaneChoiceClarification(pilot)) { return "Уточните, в какой контур идти дальше: по документам или по движениям/регистрам."; } + if (mode === "needs_clarification" && isMovementLaneClarification(pilot)) { + return clarificationNextStepLine(pilot, "движениям/регистрам"); + } + if (mode === "needs_clarification" && isDocumentLaneClarification(pilot)) { + return clarificationNextStepLine(pilot, "документам"); + } if (mode === "needs_clarification") { return "Уточните контрагента, период или организацию, и я смогу выполнить проверку по 1С."; } @@ -196,6 +315,11 @@ function buildMustNotClaim(pilot) { claims.push("Do not claim a document/register exists outside the checked metadata probe results."); claims.push("Do not present the inferred next checked lane as already executed data retrieval."); } + if (isEntityResolutionPilot(pilot)) { + claims.push("Do not present catalog grounding as confirmed business activity, turnover, or document evidence."); + claims.push("Do not claim legal identity uniqueness when several catalog candidates are still plausible."); + claims.push("Do not imply that the resolved entity has already been used in a downstream data probe."); + } if (pilot.evidence.confirmed_facts.length === 0) { claims.push("Do not claim a confirmed business fact when confirmed_facts is empty."); } @@ -279,6 +403,32 @@ function derivedMetadataInferenceLine(pilot) { } return `По подтвержденной metadata-поверхности следующий проверяемый шаг можно ограниченно оценить как ${routeLabel} через family «${surface.selected_entity_set}». Это еще не выполненный data-fetch, а только grounded выбор следующего контура.`; } +function derivedEntityResolutionConfirmedLine(pilot) { + const resolution = pilot.derived_entity_resolution; + if (!resolution || resolution.resolution_status !== "resolved" || !resolution.resolved_entity) { + return null; + } + const requested = resolution.requested_entity ? ` по запросу "${resolution.requested_entity}"` : ""; + const confidence = resolution.confidence === "high" + ? " Точность совпадения выглядит высокой." + : resolution.confidence === "medium" + ? " Совпадение выглядит достаточно сильным, но это все еще catalog grounding." + : " Совпадение выглядит вероятным, но его лучше считать рабочим заземлением сущности."; + return `В текущем каталожном срезе 1С${requested} найден контрагент "${resolution.resolved_entity}".${confidence}`; +} +function derivedEntityResolutionInferenceLine(pilot) { + const resolution = pilot.derived_entity_resolution; + if (!resolution) { + return null; + } + if (resolution.resolution_status === "resolved") { + return "Сейчас подтверждено только заземление сущности по каталогу 1С; документы, движения и денежные показатели по ней еще не проверялись."; + } + if (resolution.resolution_status === "ambiguous" && resolution.ambiguity_candidates.length > 0) { + return `В checked catalog slice есть несколько близких кандидатов: ${resolution.ambiguity_candidates.join(", ")}. Без уточнения нельзя честно выбрать одного контрагента для следующего шага.`; + } + return null; +} function derivedValueFlowConfirmedLine(pilot) { const flow = pilot.derived_value_flow; if (!flow) { @@ -365,11 +515,14 @@ function buildAssistantMcpDiscoveryAnswerDraft(pilot) { if (pilot.evidence.inferred_facts.length > 0) { pushReason(reasonCodes, "answer_contains_bounded_inference"); } - const derivedInferenceLine = derivedActivityInferenceLine(pilot) ?? derivedMetadataInferenceLine(pilot); + const derivedInferenceLine = derivedActivityInferenceLine(pilot) ?? + derivedMetadataInferenceLine(pilot) ?? + derivedEntityResolutionInferenceLine(pilot); const inferenceLines = derivedInferenceLine ? [derivedInferenceLine] : pilot.evidence.inferred_facts; const derivedMetadataLine = derivedMetadataConfirmedLine(pilot); + const derivedEntityResolutionLine = derivedEntityResolutionConfirmedLine(pilot); const derivedValueLine = derivedBidirectionalValueFlowConfirmedLine(pilot) ?? derivedValueFlowConfirmedLine(pilot); const monthlyConfirmedLines = derivedBidirectionalValueFlowMonthlyLines(pilot).length > 0 ? derivedBidirectionalValueFlowMonthlyLines(pilot) @@ -379,9 +532,11 @@ function buildAssistantMcpDiscoveryAnswerDraft(pilot) { } const confirmedLines = derivedValueLine ? [...pilot.evidence.confirmed_facts, derivedValueLine, ...monthlyConfirmedLines] - : derivedMetadataLine - ? [...pilot.evidence.confirmed_facts, derivedMetadataLine] - : pilot.evidence.confirmed_facts; + : derivedEntityResolutionLine + ? [...pilot.evidence.confirmed_facts, derivedEntityResolutionLine] + : derivedMetadataLine + ? [...pilot.evidence.confirmed_facts, derivedMetadataLine] + : pilot.evidence.confirmed_facts; return { schema_version: exports.ASSISTANT_MCP_DISCOVERY_ANSWER_DRAFT_SCHEMA_VERSION, policy_owner: "assistantMcpDiscoveryAnswerAdapter", diff --git a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPilotExecutor.js b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPilotExecutor.js index f3a1b3a..8b28e42 100644 --- a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPilotExecutor.js +++ b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPilotExecutor.js @@ -11,6 +11,40 @@ const DEFAULT_DEPS = { executeAddressMcpQuery: addressMcpClient_1.executeAddressMcpQuery, executeAddressMcpMetadata: addressMcpClient_1.executeAddressMcpMetadata }; +const ENTITY_RESOLUTION_COUNTERPARTY_LOOKUP_LIMIT = 1000; +const ENTITY_RESOLUTION_COUNTERPARTY_QUERY_TEMPLATE = ` +ВЫБРАТЬ ПЕРВЫЕ __LIMIT__ + ПРЕДСТАВЛЕНИЕ(Контрагенты.Ссылка) КАК Контрагент, + ПРЕДСТАВЛЕНИЕ(Контрагенты.Ссылка) КАК Counterparty, + Контрагенты.Ссылка КАК КонтрагентСсылка, + Контрагенты.Ссылка КАК CounterpartyRef, + Контрагенты.Наименование КАК Наименование +ИЗ + Справочник.Контрагенты КАК Контрагенты +`; +const ENTITY_RESOLUTION_STOPWORDS = new Set([ + "ооо", + "ао", + "зао", + "ип", + "llc", + "ltd", + "company", + "контрагент", + "counterparty", + "поставщик", + "supplier", + "клиент", + "customer", + "в", + "1с", + "1c", + "найди", + "найти", + "поищи", + "search", + "find" +]); function toNonEmptyString(value) { if (value === null || value === undefined) { return null; @@ -99,7 +133,149 @@ function buildValueFlowFilters(planner) { sort: "period_asc" }; } +function normalizeEntityResolutionText(value) { + return String(value ?? "") + .toLowerCase() + .replace(/ё/g, "е") + .replace(/[«»"'`]/g, " ") + .replace(/[^\p{L}\p{N}\s-]+/gu, " ") + .replace(/\s+/g, " ") + .trim(); +} +function tokenizeEntityResolutionText(value) { + return normalizeEntityResolutionText(value) + .split(" ") + .map((token) => token.trim()) + .filter((token) => token.length >= 2 && !ENTITY_RESOLUTION_STOPWORDS.has(token)); +} +function isLowQualityEntityResolutionAnchor(value) { + return tokenizeEntityResolutionText(value).length <= 0; +} +function entityResolutionCandidateName(row) { + const candidates = [ + row["Контрагент"], + row["Counterparty"], + row["Наименование"], + row["name"], + row["Name"], + row["registrator"], + row["Registrator"] + ]; + for (const candidate of candidates) { + const text = toNonEmptyString(candidate); + if (text) { + return text; + } + } + return null; +} +function entityResolutionCandidateRef(row) { + const candidates = [row["КонтрагентСсылка"], row["CounterpartyRef"], row["ref"], row["Ref"]]; + for (const candidate of candidates) { + const text = toNonEmptyString(candidate); + if (text) { + return text; + } + } + return null; +} +function scoreEntityResolutionCandidate(name, requested) { + const normalizedName = normalizeEntityResolutionText(name); + const normalizedRequested = normalizeEntityResolutionText(requested); + const requestedTokens = tokenizeEntityResolutionText(requested); + if (!normalizedName || !normalizedRequested || requestedTokens.length <= 0) { + return null; + } + let score = 0; + if (normalizedName === normalizedRequested) { + score += 10_000; + } + else if (normalizedName.includes(normalizedRequested)) { + score += 5_000; + } + else if (normalizedRequested.includes(normalizedName) && normalizedName.length >= 4) { + score += 2_000; + } + for (const token of requestedTokens) { + if (!normalizedName.includes(token)) { + return null; + } + score += Math.max(40, token.length * 20); + } + score -= Math.abs(normalizedName.length - normalizedRequested.length); + return score; +} +function deriveEntityResolution(result, requestedEntity) { + if (!result || result.error || !requestedEntity) { + return null; + } + const checkedCandidates = uniqueCandidateStrings(result.raw_rows + .map((row) => entityResolutionCandidateName(row)) + .filter((value) => Boolean(value))); + const scoredCandidates = checkedCandidates + .map((name) => { + const score = scoreEntityResolutionCandidate(name, requestedEntity); + return score === null ? null : { name, score }; + }) + .filter((value) => Boolean(value)) + .sort((left, right) => right.score - left.score || left.name.length - right.name.length || left.name.localeCompare(right.name, "ru")); + if (scoredCandidates.length <= 0) { + return { + requested_entity: requestedEntity, + resolution_status: "not_found", + resolved_entity: null, + resolved_reference: null, + matched_rows: result.rows.length, + checked_candidates: checkedCandidates.slice(0, 12), + ambiguity_candidates: [], + confidence: null, + inference_basis: "catalog_counterparty_search_rows" + }; + } + const bestCandidate = scoredCandidates[0]; + const bestNormalized = normalizeEntityResolutionText(bestCandidate.name); + const requestedNormalized = normalizeEntityResolutionText(requestedEntity); + const requestedTokens = tokenizeEntityResolutionText(requestedEntity); + const exactMatch = bestNormalized === requestedNormalized; + const strongContains = requestedTokens.length > 1 && bestNormalized.includes(requestedNormalized); + const topCandidates = scoredCandidates.filter((candidate) => candidate.score === bestCandidate.score); + if (topCandidates.length > 1 && !exactMatch && !strongContains) { + return { + requested_entity: requestedEntity, + resolution_status: "ambiguous", + resolved_entity: null, + resolved_reference: null, + matched_rows: result.rows.length, + checked_candidates: checkedCandidates.slice(0, 12), + ambiguity_candidates: topCandidates.map((candidate) => candidate.name).slice(0, 6), + confidence: "low", + inference_basis: "catalog_counterparty_search_rows" + }; + } + const matchedRow = result.raw_rows.find((row) => normalizeEntityResolutionText(entityResolutionCandidateName(row)) === bestNormalized) ?? null; + return { + requested_entity: requestedEntity, + resolution_status: "resolved", + resolved_entity: bestCandidate.name, + resolved_reference: matchedRow ? entityResolutionCandidateRef(matchedRow) : null, + matched_rows: result.rows.length, + checked_candidates: checkedCandidates.slice(0, 12), + ambiguity_candidates: [], + confidence: exactMatch ? "high" : strongContains ? "medium" : "low", + inference_basis: "catalog_counterparty_search_rows" + }; +} +function uniqueCandidateStrings(values) { + const result = []; + for (const value of values) { + pushUnique(result, value); + } + return result; +} function isLifecyclePilotEligible(planner) { + if (planner.selected_chain_id === "lifecycle") { + return true; + } const meaning = planner.discovery_plan.turn_meaning_ref; const domain = String(meaning?.asked_domain_family ?? "").toLowerCase(); const action = String(meaning?.asked_action_family ?? "").toLowerCase(); @@ -108,6 +284,9 @@ function isLifecyclePilotEligible(planner) { (combined.includes("lifecycle") || combined.includes("activity") || combined.includes("duration") || combined.includes("age"))); } function isDocumentEvidencePilotEligible(planner) { + if (planner.selected_chain_id === "document_evidence") { + return true; + } const meaning = planner.discovery_plan.turn_meaning_ref; const domain = String(meaning?.asked_domain_family ?? "").toLowerCase(); const action = String(meaning?.asked_action_family ?? "").toLowerCase(); @@ -117,6 +296,9 @@ function isDocumentEvidencePilotEligible(planner) { (combined.includes("document") || combined.includes("list_documents"))); } function isMovementEvidencePilotEligible(planner) { + if (planner.selected_chain_id === "movement_evidence") { + return true; + } const meaning = planner.discovery_plan.turn_meaning_ref; const domain = String(meaning?.asked_domain_family ?? "").toLowerCase(); const action = String(meaning?.asked_action_family ?? "").toLowerCase(); @@ -131,6 +313,9 @@ function isMovementEvidencePilotEligible(planner) { combined.includes("list_movements"))); } function isValueFlowPilotEligible(planner) { + if (planner.selected_chain_id === "value_flow") { + return true; + } const meaning = planner.discovery_plan.turn_meaning_ref; const domain = String(meaning?.asked_domain_family ?? "").toLowerCase(); const action = String(meaning?.asked_action_family ?? "").toLowerCase(); @@ -144,6 +329,10 @@ function isValueFlowPilotEligible(planner) { combined.includes("value"))); } function isMetadataPilotEligible(planner) { + if (planner.selected_chain_id === "metadata_inspection" || + planner.selected_chain_id === "metadata_lane_clarification") { + return true; + } const meaning = planner.discovery_plan.turn_meaning_ref; const domain = String(meaning?.asked_domain_family ?? "").toLowerCase(); const action = String(meaning?.asked_action_family ?? "").toLowerCase(); @@ -158,6 +347,22 @@ function isMetadataPilotEligible(planner) { combined.includes("inspect_registers") || combined.includes("inspect_fields"))); } +function isEntityResolutionPilotEligible(planner) { + if (planner.selected_chain_id === "entity_resolution") { + return true; + } + const meaning = planner.discovery_plan.turn_meaning_ref; + const domain = String(meaning?.asked_domain_family ?? "").toLowerCase(); + const action = String(meaning?.asked_action_family ?? "").toLowerCase(); + const unsupported = String(meaning?.unsupported_but_understood_family ?? "").toLowerCase(); + const semanticNeed = String(planner.semantic_data_need ?? "").toLowerCase(); + const combined = `${domain} ${action} ${unsupported} ${semanticNeed}`; + return (planner.proposed_primitives.includes("search_business_entity") && + (combined.includes("entity_resolution") || + combined.includes("search_business_entity") || + combined.includes("entity discovery") || + combined.includes("counterparty search"))); +} function metadataScopeForPlanner(planner) { const entityCandidate = firstEntityCandidate(planner); if (entityCandidate) { @@ -441,6 +646,15 @@ function summarizeMetadataRows(result) { } return `${result.fetched_rows} MCP metadata rows fetched`; } +function summarizeEntityResolutionRows(result) { + if (result.error) { + return null; + } + if (result.fetched_rows <= 0) { + return "0 MCP catalog rows fetched"; + } + return `${result.fetched_rows} MCP catalog rows fetched for entity search`; +} function metadataRowText(row, keys) { for (const key of keys) { const text = toNonEmptyString(row[key]); @@ -475,6 +689,18 @@ function metadataEntitySet(row) { "kind" ]); } +function inferMetadataEntitySetFromObjectName(objectName) { + const text = String(objectName ?? "").trim(); + if (!text) { + return null; + } + const dotIndex = text.indexOf("."); + if (dotIndex <= 0) { + return null; + } + const entitySet = text.slice(0, dotIndex).trim(); + return entitySet.length > 0 ? entitySet : null; +} function metadataChildNames(value) { if (!Array.isArray(value)) { return []; @@ -604,7 +830,7 @@ function deriveMetadataSurface(result, metadataScope, requestedMetaTypes) { if (objectName) { pushUnique(matchedObjects, objectName); } - const entitySet = metadataEntitySet(row); + const entitySet = metadataEntitySet(row) ?? inferMetadataEntitySetFromObjectName(objectName); if (entitySet) { pushUnique(availableEntitySets, entitySet); } @@ -678,6 +904,53 @@ function buildMetadataUnknownFacts(surface, metadataScope) { } return ["No matching 1C metadata objects were confirmed by this MCP metadata probe"]; } +function buildEntityResolutionConfirmedFacts(resolution) { + if (!resolution || resolution.resolution_status !== "resolved" || !resolution.resolved_entity) { + return []; + } + if (resolution.requested_entity && normalizeEntityResolutionText(resolution.requested_entity) === normalizeEntityResolutionText(resolution.resolved_entity)) { + return [`В проверенном каталожном срезе 1С найден контрагент: ${resolution.resolved_entity}`]; + } + return [ + `В проверенном каталожном срезе 1С найден наиболее вероятный контрагент: ${resolution.resolved_entity}` + ]; +} +function buildEntityResolutionInferredFacts(resolution) { + if (!resolution) { + return []; + } + if (resolution.resolution_status === "resolved") { + const facts = ["Пока проверено только заземление сущности по каталогу 1С; документы, движения и денежные показатели еще не проверялись"]; + if (resolution.requested_entity && resolution.resolved_entity) { + const requestedNormalized = normalizeEntityResolutionText(resolution.requested_entity); + const resolvedNormalized = normalizeEntityResolutionText(resolution.resolved_entity); + if (requestedNormalized !== resolvedNormalized) { + facts.push("Контрагент выбран как ближайшее подтвержденное совпадение имени в проверенном каталоге 1С"); + } + } + return facts; + } + if (resolution.resolution_status === "ambiguous") { + return ["В проверенном каталожном срезе осталось несколько близких кандидатов, поэтому точного контрагента в 1С еще нужно уточнить"]; + } + return []; +} +function buildEntityResolutionUnknownFacts(resolution, requestedEntity) { + if (!resolution) { + return ["По проверенному каталожному поиску 1С не удалось заземлить сущность контрагента"]; + } + const unknownFacts = ["Документы, движения и денежные показатели по этому контрагенту еще не проверялись; пока был только каталожный поиск"]; + if (resolution.resolution_status === "ambiguous" && resolution.ambiguity_candidates.length > 0) { + unknownFacts.unshift(`Точное заземление контрагента в 1С остается неоднозначным между вариантами: ${resolution.ambiguity_candidates.join(", ")}`); + return unknownFacts; + } + if (resolution.resolution_status === "not_found") { + unknownFacts.unshift(requestedEntity + ? `В проверенном каталожном срезе 1С не подтвержден контрагент с именем "${requestedEntity}"` + : "В проверенном каталожном срезе 1С не подтвержден подходящий контрагент"); + } + return unknownFacts; +} function rowDateValue(row) { const candidates = [ row["Период"], @@ -1149,19 +1422,24 @@ function buildEmptyEvidence(planner, dryRun, probeResults, reason) { }); } function pilotScopeForPlanner(planner) { - if (isMetadataPilotEligible(planner)) { - return "metadata_inspection_v1"; + switch (planner.selected_chain_id) { + case "metadata_lane_clarification": + case "metadata_inspection": + return "metadata_inspection_v1"; + case "movement_evidence": + return "counterparty_movement_evidence_query_movements_v1"; + case "value_flow": + return valueFlowPilotProfile(planner).scope; + case "document_evidence": + return "counterparty_document_evidence_query_documents_v1"; + case "lifecycle": + return "counterparty_lifecycle_query_documents_v1"; + case "entity_resolution": + return "entity_resolution_search_v1"; } - if (isMovementEvidencePilotEligible(planner)) { - return "counterparty_movement_evidence_query_movements_v1"; - } - if (isValueFlowPilotEligible(planner)) { - return valueFlowPilotProfile(planner).scope; - } - if (isDocumentEvidencePilotEligible(planner)) { - return "counterparty_document_evidence_query_documents_v1"; - } - return "counterparty_lifecycle_query_documents_v1"; +} +function isLivePilotChainSupported(chainId) { + return true; } async function executeAssistantMcpDiscoveryPilot(planner, deps = DEFAULT_DEPS) { const runtimeDeps = { @@ -1191,6 +1469,7 @@ async function executeAssistantMcpDiscoveryPilot(planner, deps = DEFAULT_DEPS) { evidence, source_rows_summary: null, derived_metadata_surface: null, + derived_entity_resolution: null, derived_activity_period: null, derived_value_flow: null, derived_bidirectional_value_flow: null, @@ -1214,6 +1493,7 @@ async function executeAssistantMcpDiscoveryPilot(planner, deps = DEFAULT_DEPS) { evidence, source_rows_summary: null, derived_metadata_surface: null, + derived_entity_resolution: null, derived_activity_period: null, derived_value_flow: null, derived_bidirectional_value_flow: null, @@ -1226,7 +1506,15 @@ async function executeAssistantMcpDiscoveryPilot(planner, deps = DEFAULT_DEPS) { const movementPilotEligible = isMovementEvidencePilotEligible(planner); const lifecyclePilotEligible = isLifecyclePilotEligible(planner); const valueFlowPilotEligible = isValueFlowPilotEligible(planner); - if (!metadataPilotEligible && !documentPilotEligible && !movementPilotEligible && !lifecyclePilotEligible && !valueFlowPilotEligible) { + const entityResolutionPilotEligible = isEntityResolutionPilotEligible(planner); + const livePilotChainSupported = isLivePilotChainSupported(planner.selected_chain_id); + if (!livePilotChainSupported || + (!metadataPilotEligible && + !documentPilotEligible && + !movementPilotEligible && + !lifecyclePilotEligible && + !valueFlowPilotEligible && + !entityResolutionPilotEligible)) { pushReason(reasonCodes, "pilot_scope_unsupported_for_live_execution"); for (const step of dryRun.execution_steps) { skippedPrimitives.push(step.primitive_id); @@ -1246,6 +1534,7 @@ async function executeAssistantMcpDiscoveryPilot(planner, deps = DEFAULT_DEPS) { evidence, source_rows_summary: null, derived_metadata_surface: null, + derived_entity_resolution: null, derived_activity_period: null, derived_value_flow: null, derived_bidirectional_value_flow: null, @@ -1309,6 +1598,96 @@ async function executeAssistantMcpDiscoveryPilot(planner, deps = DEFAULT_DEPS) { evidence, source_rows_summary: sourceRowsSummary, derived_metadata_surface: derivedMetadataSurface, + derived_entity_resolution: null, + derived_activity_period: null, + derived_value_flow: null, + derived_bidirectional_value_flow: null, + query_limitations: queryLimitations, + reason_codes: reasonCodes + }; + } + if (entityResolutionPilotEligible) { + let queryResult = null; + const requestedEntity = counterparty; + if (isLowQualityEntityResolutionAnchor(requestedEntity)) { + pushReason(reasonCodes, "pilot_entity_resolution_anchor_missing_or_low_quality"); + const evidence = buildEmptyEvidence(planner, dryRun, probeResults, "Entity-resolution needs a clearer counterparty name"); + return { + schema_version: exports.ASSISTANT_MCP_DISCOVERY_PILOT_EXECUTOR_SCHEMA_VERSION, + policy_owner: "assistantMcpDiscoveryPilotExecutor", + pilot_status: "skipped_needs_clarification", + pilot_scope: "entity_resolution_search_v1", + dry_run: dryRun, + mcp_execution_performed: false, + executed_primitives: executedPrimitives, + skipped_primitives: skippedPrimitives, + probe_results: probeResults, + evidence, + source_rows_summary: null, + derived_metadata_surface: null, + derived_entity_resolution: null, + derived_activity_period: null, + derived_value_flow: null, + derived_bidirectional_value_flow: null, + query_limitations: ["Entity-resolution needs a clearer counterparty name"], + reason_codes: reasonCodes + }; + } + for (const step of dryRun.execution_steps) { + if (step.primitive_id !== "search_business_entity") { + skippedPrimitives.push(step.primitive_id); + probeResults.push(skippedProbeResult(step, "pilot_only_executes_search_business_entity")); + continue; + } + queryResult = await runtimeDeps.executeAddressMcpQuery({ + query: ENTITY_RESOLUTION_COUNTERPARTY_QUERY_TEMPLATE.replaceAll("__LIMIT__", String(ENTITY_RESOLUTION_COUNTERPARTY_LOOKUP_LIMIT)), + limit: ENTITY_RESOLUTION_COUNTERPARTY_LOOKUP_LIMIT + }); + pushUnique(executedPrimitives, step.primitive_id); + probeResults.push(queryResultToProbeResult(step.primitive_id, queryResult)); + if (queryResult.error) { + pushUnique(queryLimitations, queryResult.error); + pushReason(reasonCodes, "pilot_search_business_entity_mcp_error"); + } + else { + pushReason(reasonCodes, "pilot_search_business_entity_mcp_executed"); + } + } + const sourceRowsSummary = queryResult ? summarizeEntityResolutionRows(queryResult) : null; + const derivedEntityResolution = deriveEntityResolution(queryResult, requestedEntity); + if (derivedEntityResolution?.resolution_status === "resolved") { + pushReason(reasonCodes, "pilot_derived_entity_resolution_from_catalog_rows"); + } + if (derivedEntityResolution?.resolution_status === "ambiguous") { + pushReason(reasonCodes, "pilot_entity_resolution_ambiguity_requires_clarification"); + } + if (derivedEntityResolution?.resolution_status === "not_found") { + pushReason(reasonCodes, "pilot_entity_resolution_not_found_in_checked_catalog"); + } + const evidence = (0, assistantMcpDiscoveryPolicy_1.resolveAssistantMcpDiscoveryEvidence)({ + plan: planner.discovery_plan, + probeResults, + confirmedFacts: buildEntityResolutionConfirmedFacts(derivedEntityResolution), + inferredFacts: buildEntityResolutionInferredFacts(derivedEntityResolution), + unknownFacts: buildEntityResolutionUnknownFacts(derivedEntityResolution, requestedEntity), + sourceRowsSummary, + queryLimitations, + recommendedNextProbe: "resolve_entity_reference" + }); + return { + schema_version: exports.ASSISTANT_MCP_DISCOVERY_PILOT_EXECUTOR_SCHEMA_VERSION, + policy_owner: "assistantMcpDiscoveryPilotExecutor", + pilot_status: "executed", + pilot_scope: "entity_resolution_search_v1", + dry_run: dryRun, + mcp_execution_performed: executedPrimitives.length > 0, + executed_primitives: executedPrimitives, + skipped_primitives: skippedPrimitives, + probe_results: probeResults, + evidence, + source_rows_summary: sourceRowsSummary, + derived_metadata_surface: null, + derived_entity_resolution: derivedEntityResolution, derived_activity_period: null, derived_value_flow: null, derived_bidirectional_value_flow: null, @@ -1336,6 +1715,7 @@ async function executeAssistantMcpDiscoveryPilot(planner, deps = DEFAULT_DEPS) { evidence, source_rows_summary: null, derived_metadata_surface: null, + derived_entity_resolution: null, derived_activity_period: null, derived_value_flow: null, derived_bidirectional_value_flow: null, @@ -1389,6 +1769,7 @@ async function executeAssistantMcpDiscoveryPilot(planner, deps = DEFAULT_DEPS) { evidence, source_rows_summary: sourceRowsSummary, derived_metadata_surface: null, + derived_entity_resolution: null, derived_activity_period: null, derived_value_flow: null, derived_bidirectional_value_flow: null, @@ -1416,6 +1797,7 @@ async function executeAssistantMcpDiscoveryPilot(planner, deps = DEFAULT_DEPS) { evidence, source_rows_summary: null, derived_metadata_surface: null, + derived_entity_resolution: null, derived_activity_period: null, derived_value_flow: null, derived_bidirectional_value_flow: null, @@ -1469,6 +1851,7 @@ async function executeAssistantMcpDiscoveryPilot(planner, deps = DEFAULT_DEPS) { evidence, source_rows_summary: sourceRowsSummary, derived_metadata_surface: null, + derived_entity_resolution: null, derived_activity_period: null, derived_value_flow: null, derived_bidirectional_value_flow: null, @@ -1501,6 +1884,7 @@ async function executeAssistantMcpDiscoveryPilot(planner, deps = DEFAULT_DEPS) { evidence, source_rows_summary: null, derived_metadata_surface: null, + derived_entity_resolution: null, derived_activity_period: null, derived_value_flow: null, derived_bidirectional_value_flow: null, @@ -1593,6 +1977,7 @@ async function executeAssistantMcpDiscoveryPilot(planner, deps = DEFAULT_DEPS) { evidence, source_rows_summary: sourceRowsSummary, derived_metadata_surface: null, + derived_entity_resolution: null, derived_activity_period: null, derived_value_flow: null, derived_bidirectional_value_flow: derivedBidirectionalValueFlow, @@ -1618,6 +2003,7 @@ async function executeAssistantMcpDiscoveryPilot(planner, deps = DEFAULT_DEPS) { evidence, source_rows_summary: null, derived_metadata_surface: null, + derived_entity_resolution: null, derived_activity_period: null, derived_value_flow: null, derived_bidirectional_value_flow: null, @@ -1690,6 +2076,7 @@ async function executeAssistantMcpDiscoveryPilot(planner, deps = DEFAULT_DEPS) { evidence, source_rows_summary: sourceRowsSummary, derived_metadata_surface: null, + derived_entity_resolution: null, derived_activity_period: null, derived_value_flow: derivedValueFlow, derived_bidirectional_value_flow: null, @@ -1716,6 +2103,7 @@ async function executeAssistantMcpDiscoveryPilot(planner, deps = DEFAULT_DEPS) { evidence, source_rows_summary: null, derived_metadata_surface: null, + derived_entity_resolution: null, derived_activity_period: null, derived_value_flow: null, derived_bidirectional_value_flow: null, @@ -1773,6 +2161,7 @@ async function executeAssistantMcpDiscoveryPilot(planner, deps = DEFAULT_DEPS) { evidence, source_rows_summary: sourceRowsSummary, derived_metadata_surface: null, + derived_entity_resolution: null, derived_activity_period: derivedActivityPeriod, derived_value_flow: null, derived_bidirectional_value_flow: null, diff --git a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPlanner.js b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPlanner.js index 8eff4a5..a396ef8 100644 --- a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPlanner.js +++ b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPlanner.js @@ -86,6 +86,8 @@ function recipeFor(input) { pushUnique(axes, "lane_family_choice"); return { semanticDataNeed: "metadata lane clarification", + chainId: "metadata_lane_clarification", + chainSummary: "Preserve the ambiguous metadata surface and ask the user to choose the next data lane before running MCP probes.", primitives: [], axes, reason: "planner_selected_metadata_lane_clarification_recipe" @@ -100,6 +102,8 @@ function recipeFor(input) { } return { semanticDataNeed: "counterparty value-flow evidence", + chainId: "value_flow", + chainSummary: "Resolve the business entity, query scoped movements, aggregate checked amounts, then probe coverage before answering.", primitives: ["resolve_entity_reference", "query_movements", "aggregate_by_axis", "probe_coverage"], axes, reason: requestedAggregationAxis === "month" @@ -113,6 +117,8 @@ function recipeFor(input) { pushUnique(axes, "evidence_basis"); return { semanticDataNeed: "counterparty lifecycle evidence", + chainId: "lifecycle", + chainSummary: "Resolve the business entity, query supporting documents, probe coverage, then explain the evidence basis for the inferred activity window.", primitives: ["resolve_entity_reference", "query_documents", "probe_coverage", "explain_evidence_basis"], axes, reason: "planner_selected_lifecycle_recipe" @@ -122,6 +128,8 @@ function recipeFor(input) { pushUnique(axes, "metadata_scope"); return { semanticDataNeed: "1C metadata evidence", + chainId: "metadata_inspection", + chainSummary: "Inspect the 1C metadata surface first, then ground the next safe lane from confirmed schema evidence.", primitives: ["inspect_1c_metadata"], axes, reason: "planner_selected_metadata_recipe" @@ -131,6 +139,8 @@ function recipeFor(input) { pushUnique(axes, "coverage_target"); return { semanticDataNeed: "movement evidence", + chainId: "movement_evidence", + chainSummary: "Resolve the business entity, fetch scoped movement rows, and probe coverage without pretending to have a full movement universe.", primitives: ["resolve_entity_reference", "query_movements", "probe_coverage"], axes, reason: "planner_selected_movement_recipe" @@ -140,6 +150,8 @@ function recipeFor(input) { pushUnique(axes, "coverage_target"); return { semanticDataNeed: "document evidence", + chainId: "document_evidence", + chainSummary: "Resolve the business entity, fetch scoped document rows, and probe coverage before stating the checked document evidence.", primitives: ["resolve_entity_reference", "query_documents", "probe_coverage"], axes, reason: "planner_selected_document_recipe" @@ -147,8 +159,11 @@ function recipeFor(input) { } if (hasEntity(meaning)) { pushUnique(axes, "business_entity"); + pushUnique(axes, "coverage_target"); return { semanticDataNeed: "entity discovery evidence", + chainId: "entity_resolution", + chainSummary: "Search candidate business entities, resolve the most relevant 1C reference, and prove whether the entity grounding is stable enough for the next probe.", primitives: ["search_business_entity", "resolve_entity_reference", "probe_coverage"], axes, reason: "planner_selected_entity_resolution_recipe" @@ -156,6 +171,8 @@ function recipeFor(input) { } return { semanticDataNeed: "unclassified 1C discovery need", + chainId: "metadata_inspection", + chainSummary: "Start with metadata inspection instead of guessing a deeper fact route when the business need is still under-specified.", primitives: ["inspect_1c_metadata"], axes, reason: "planner_selected_clarification_recipe" @@ -202,6 +219,8 @@ function planAssistantMcpDiscovery(input) { policy_owner: "assistantMcpDiscoveryPlanner", planner_status: plannerStatus, semantic_data_need: semanticDataNeed, + selected_chain_id: recipe.chainId, + selected_chain_summary: recipe.chainSummary, proposed_primitives: recipe.primitives, required_axes: recipe.axes, discovery_plan: plan, diff --git a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPolicy.js b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPolicy.js index 8ea5d37..af56528 100644 --- a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPolicy.js +++ b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPolicy.js @@ -79,6 +79,7 @@ function normalizeTurnMeaning(value) { const dateScope = toNonEmptyString(value.explicit_date_scope); const unsupported = toNonEmptyString(value.unsupported_but_understood_family); const entities = toStringList(value.explicit_entity_candidates); + const metadataAmbiguityEntitySets = toStringList(value.metadata_ambiguity_entity_sets); if (domain) { result.asked_domain_family = domain; } @@ -91,6 +92,9 @@ function normalizeTurnMeaning(value) { if (entities.length > 0) { result.explicit_entity_candidates = entities; } + if (metadataAmbiguityEntitySets.length > 0) { + result.metadata_ambiguity_entity_sets = metadataAmbiguityEntitySets; + } if (organization) { result.explicit_organization_scope = organization; } diff --git a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryTurnInputAdapter.js b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryTurnInputAdapter.js index 1fd467a..95241f7 100644 --- a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryTurnInputAdapter.js +++ b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryTurnInputAdapter.js @@ -36,6 +36,22 @@ function pushUnique(target, value) { target.push(text); } } +function canonicalizeEntityResolutionCandidate(value) { + return normalizeEntityResolutionCandidate(value) + .replace(/^(?:\u0441\s+\u043d\u0430\u0438\u043c\u0435\u043d\u043e\u0432\u0430\u043d\u0438\u0435\u043c\s+)/iu, "") + .replace(/\s+(?:\u0432\s+\u0441\u0438\u0441\u0442\u0435\u043c\u0435\s*1\u0421|\u0432\s+1c|in\s+(?:the\s+)?1c\s+system|in\s+1c)\s*$/iu, "") + .trim(); +} +function pushNormalizedEntityResolutionCandidate(target, value) { + const text = toNonEmptyString(value); + if (!text) { + return; + } + const normalized = canonicalizeEntityResolutionCandidate(text); + if (normalized && !target.includes(normalized)) { + target.push(normalized); + } +} function compactLower(value) { return String(value ?? "") .toLowerCase() @@ -263,11 +279,11 @@ function hasMetadataSignal(text) { if (/(?:\u043c\u0435\u0442\u0430\u0434\u0430\u043d|schema|catalog|metadata\s+surface|\u0441\u0442\u0440\u0443\u043a\u0442\u0443\u0440[\u0430\u044b]\s+1\u0441|\u0441\u0445\u0435\u043c[\u0430\u044b]\s+1\u0441)/iu.test(text)) { return true; } - return (/(?:\u0440\u0435\u0433\u0438\u0441\u0442\u0440\u044b|\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u044b|\u0441\u043f\u0440\u0430\u0432\u043e\u0447\u043d\u0438\u043a\u0438|\u043f\u043e\u043b(?:\u0435|\u044f)|registers?|documents?|catalogs?|fields?)/iu.test(text) && - /(?:\u0435\u0441\u0442\u044c|\u0434\u043e\u0441\u0442\u0443\u043f\u043d|\u0432\s+1\u0441|available|exist)/iu.test(text)); + return (/(?:\u043e\u0431\u044a\u0435\u043a\u0442(?:\u044b|\u0430|\u043e\u0432)?|\u0440\u0435\u0433\u0438\u0441\u0442\u0440\u044b|\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u044b|\u0441\u043f\u0440\u0430\u0432\u043e\u0447\u043d\u0438\u043a\u0438|\u043f\u043e\u043b(?:\u0435|\u044f)|objects?|registers?|documents?|catalogs?|fields?)/iu.test(text) && + /(?:\u0435\u0441\u0442\u044c|\u043a\u0430\u043a\u0438\u0435|\u0434\u043e\u0441\u0442\u0443\u043f\u043d|\u0432\s+1\u0441|1\u0441|available|exist|which)/iu.test(text)); } function hasMetadataObjectHint(text) { - return /(?:\u0440\u0435\u0433\u0438\u0441\u0442\u0440(?:\u044b)?|\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442(?:\u044b)?|\u0441\u043f\u0440\u0430\u0432\u043e\u0447\u043d\u0438\u043a(?:\u0438)?|\u043f\u043e\u043b(?:\u0435|\u044f)|registers?|documents?|catalogs?|fields?)/iu.test(text); + return /(?:\u043e\u0431\u044a\u0435\u043a\u0442(?:\u044b|\u0430|\u043e\u0432)?|\u0440\u0435\u0433\u0438\u0441\u0442\u0440(?:\u044b)?|\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442(?:\u044b)?|\u0441\u043f\u0440\u0430\u0432\u043e\u0447\u043d\u0438\u043a(?:\u0438)?|\u043f\u043e\u043b(?:\u0435|\u044f)|objects?|registers?|documents?|catalogs?|fields?)/iu.test(text); } function hasDocumentEvidenceFollowupSignal(text) { return /(?:\u043f\u043e\s+\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442(?:\u0430\u043c|\u044b)?|\u0434\u0430\u0432\u0430\u0439\s+\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442(?:\u044b)?|\u0438\u0449\u0438\s+\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442(?:\u044b)?|\u043f\u043e\u043a\u0430\u0436\u0438\s+\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442(?:\u044b)?|(?:\u043f\u043e\u043a\u0430\u0436\u0438|\u043a\u0430\u043a\u0438\u0435|\u0441\u043f\u0438\u0441\u043e\u043a|\u0434\u0430\u0439|\u0438\u0449\u0438)\s+(?:\u0441\u0447(?:[еe]т|\u0435\u0442)[-\u2011 ]?\u0444\u0430\u043a\u0442\u0443\u0440(?:\u044b|\u0430)?|\u043d\u0430\u043a\u043b\u0430\u0434\u043d(?:\u044b\u0435|\u0430\u044f)?|\u0430\u043a\u0442(?:\u044b)?|\u0440\u0435\u0430\u043b\u0438\u0437\u0430\u0446(?:\u0438\u0438|\u0438\u044e)|invoice(?:s)?|bill(?:s)?|waybill(?:s)?)|document(?:s)?\s+(?:then|next)?|(?:then|next)\s+documents?|go\s+to\s+documents?)/iu.test(text); @@ -278,7 +294,39 @@ function hasMovementEvidenceFollowupSignal(text) { function hasMetadataDownstreamContinuationSignal(text) { return /(?:\u0434\u0430\u0432\u0430\u0439\s+\u0434\u0430\u043b\u044c\u0448\u0435|\u0438\u0434(?:\u0435|\u0451)\u043c\s+\u0434\u0430\u043b\u044c\u0448\u0435|\u043f\u043e\u0448\u043b(?:\u0438|\u0451\u043c)\s+\u0434\u0430\u043b\u044c\u0448\u0435|\u043f\u0440\u043e\u0434\u043e\u043b\u0436\u0430\u0439|\u0438\u0449\u0438\s+\u0434\u0430\u043b\u044c\u0448\u0435|\u0438\u0449\u0438\s+\u0434\u0430\u043d\u043d\u044b\u0435|\u043f\u043e\u043a\u0430\u0436\u0438\s+\u0434\u0430\u043d\u043d\u044b\u0435|\u043f\u043e\u043a\u0430\u0436\u0438\s+\u0441\u0442\u0440\u043e\u043a\u0438|\u0433\u043b\u0443\u0431\u0436\u0435|\u0447\u0442\u043e\s+\u0434\u0430\u043b\u044c\u0448\u0435|continue|go\s+ahead|go\s+deeper|look\s+deeper|drill\s+down|show\s+(?:data|rows))/iu.test(text); } +function hasEntityResolutionSignal(text) { + const hasSearchVerb = /(?:найд(?:и|ите|ем|у)|поищ(?:и|ите|ем)|найти|поиск|search|find|look\s*up)/iu.test(text); + const hasEntityNoun = /(?:контрагент(?:а|ов)?|поставщик(?:а|ов)?|клиент(?:а|ов)?|counterpart(?:y|ies)|supplier(?:s)?|customer(?:s)?)/iu.test(text); + return hasSearchVerb && hasEntityNoun; +} +function normalizeEntityResolutionCandidate(value) { + return value + .replace(/^(?:в\s*1с\s+|в\s+1c\s+|по\s+имени\s+)/iu, "") + .replace(/[?!.]+$/gu, "") + .replace(/^(?:контрагент(?:а|ов)?|поставщик(?:а|ов)?|клиент(?:а|ов)?)\s+/iu, "") + .replace(/^(?:counterpart(?:y|ies)|supplier(?:s)?|customer(?:s)?)\s+/iu, "") + .replace(/^[«"'\s]+|[»"'\s]+$/gu, "") + .replace(/\s+/g, " ") + .trim(); +} +function rawEntityResolutionCandidate(text) { + const patterns = [ + /(?:найд(?:и|ите|ем|у)|поищ(?:и|ите|ем)|найти|search|find|look\s*up)\s+(?:в\s*1с\s+|в\s+1c\s+)?(?:контрагент(?:а|ов)?|поставщик(?:а|ов)?|клиент(?:а|ов)?|counterpart(?:y|ies)|supplier(?:s)?|customer(?:s)?)\s+(.+)$/iu, + /(?:контрагент(?:а|ов)?|поставщик(?:а|ов)?|клиент(?:а|ов)?|counterpart(?:y|ies)|supplier(?:s)?|customer(?:s)?)\s+(.+?)\s+(?:найд(?:и|ите|ем|у)|поищ(?:и|ите|ем)|найти|search|find|look\s*up)\b/iu + ]; + for (const pattern of patterns) { + const match = text.match(pattern); + const candidate = normalizeEntityResolutionCandidate(match?.[1] ?? ""); + if (candidate.length >= 2) { + return candidate; + } + } + return null; +} function metadataActionFromRawText(text) { + if (/(?:\u043e\u0431\u044a\u0435\u043a\u0442(?:\u044b|\u0430|\u043e\u0432)?|objects?)/iu.test(text)) { + return "inspect_surface"; + } if (/(?:\u043f\u043e\u043b(?:\u0435|\u044f)|field)/iu.test(text)) { return "inspect_fields"; } @@ -293,6 +341,18 @@ function metadataActionFromRawText(text) { } return "inspect_catalog"; } +function metadataScopeHintFromRawText(text) { + if (/(?:\u043d\u0434\u0441|vat)/iu.test(text)) { + return "\u041d\u0414\u0421"; + } + if (/(?:\u0441\u043a\u043b\u0430\u0434|inventory|stock|warehouse|\u043d\u043e\u043c\u0435\u043d\u043a\u043b\u0430\u0442\u0443\u0440)/iu.test(text)) { + return "\u0441\u043a\u043b\u0430\u0434"; + } + if (/(?:\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|counterparty|customer|client|supplier|vendor)/iu.test(text)) { + return "\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442"; + } + return null; +} function hasExplicitDateScopeLiteral(text) { return /(?:\b(?:19|20)\d{2}\b|\b\d{4}-\d{2}-\d{2}\b|\b\d{4}-\d{2}\b)/iu.test(text); } @@ -322,6 +382,10 @@ function semanticNeedFor(input) { if (input.valueFlowSignal || /(?:turnover|revenue|payment|payout|value|net|netting|balance|cashflow)/iu.test(combined)) { return "counterparty value-flow evidence"; } + if (input.entityResolutionSignal || + /(?:entity_resolution|search_business_entity|resolve_entity_reference|entity\s+discovery|counterparty\s+search)/iu.test(combined)) { + return "entity discovery evidence"; + } if (/(?:movement|movements|bank_operations|movement_evidence|list_movements)/iu.test(combined)) { return "movement evidence"; } @@ -337,6 +401,9 @@ function shouldRunDiscovery(input) { if (input.metadataSignal) { return true; } + if (input.entityResolutionSignal) { + return true; + } if (input.valueFlowSignal && !input.explicitIntentCandidate) { return true; } @@ -355,15 +422,23 @@ function buildAssistantMcpDiscoveryTurnInput(input) { const predecomposeEntities = collectPredecomposeEntities(predecomposeContract); const followupSeed = collectFollowupDiscoverySeed(followupContext); const reasonCodes = []; - const rawText = compactLower(`${input.userMessage ?? ""} ${input.effectiveMessage ?? ""}`); + const rawUserText = toNonEmptyString(input.userMessage); + const rawEffectiveText = toNonEmptyString(input.effectiveMessage); + const rawSignalSourceText = `${rawUserText ?? ""} ${rawEffectiveText ?? ""}`.trim(); + const rawEntitySourceText = rawUserText ?? rawEffectiveText ?? rawSignalSourceText; + const rawText = compactLower(rawSignalSourceText); const rawLifecycleSignal = hasLifecycleSignal(rawText); const rawBidirectionalValueFlowSignal = !rawLifecycleSignal && hasBidirectionalValueFlowSignal(rawText); const rawValueFlowSignal = !rawLifecycleSignal && (hasValueFlowSignal(rawText) || rawBidirectionalValueFlowSignal); const rawMetadataSignal = !rawLifecycleSignal && !rawValueFlowSignal && hasMetadataSignal(rawText); + const rawEntityResolutionSignal = !rawLifecycleSignal && !rawValueFlowSignal && !rawMetadataSignal && hasEntityResolutionSignal(rawText); const rawPayoutSignal = rawValueFlowSignal && !rawBidirectionalValueFlowSignal && hasPayoutSignal(rawText); const monthlyAggregationSignal = hasMonthlyAggregationSignal(rawText); const explicitDateScopeLiteralDetected = hasExplicitDateScopeLiteral(rawText); const rawDateScope = collectDateScopeFromRawText(rawText); + const rawMetadataScopeHint = rawMetadataSignal ? metadataScopeHintFromRawText(rawText) : null; + const rawEntityCandidate = rawEntityResolutionSignal ? rawEntityResolutionCandidate(rawEntitySourceText) : null; + const entityResolutionSignal = rawEntityResolutionSignal || Boolean(rawEntityCandidate); const metadataDocumentHintSignal = hasDocumentEvidenceFollowupSignal(rawText); const metadataMovementHintSignal = hasMovementEvidenceFollowupSignal(rawText); const rawDomain = toNonEmptyString(assistantTurnMeaning?.asked_domain_family); @@ -508,13 +583,26 @@ function buildAssistantMcpDiscoveryTurnInput(input) { unsupported: unsupported ?? seededUnsupported, lifecycleSignal, valueFlowSignal, - metadataSignal: rawMetadataSignal || effectiveMetadataFollowupSeedApplicable + metadataSignal: rawMetadataSignal || effectiveMetadataFollowupSeedApplicable, + entityResolutionSignal }); - const entityCandidates = collectEntityCandidates(assistantTurnMeaning?.explicit_entity_candidates); - pushUnique(entityCandidates, predecomposeEntities.counterparty); - pushUnique(entityCandidates, followupSeed.counterparty); + const entityCandidates = entityResolutionSignal ? [] : collectEntityCandidates(assistantTurnMeaning?.explicit_entity_candidates); + if (entityResolutionSignal) { + pushNormalizedEntityResolutionCandidate(entityCandidates, rawEntityCandidate); + for (const candidate of collectEntityCandidates(assistantTurnMeaning?.explicit_entity_candidates)) { + pushNormalizedEntityResolutionCandidate(entityCandidates, candidate); + } + pushNormalizedEntityResolutionCandidate(entityCandidates, predecomposeEntities.counterparty); + pushNormalizedEntityResolutionCandidate(entityCandidates, followupSeed.counterparty); + } + else { + pushUnique(entityCandidates, predecomposeEntities.counterparty); + pushUnique(entityCandidates, followupSeed.counterparty); + pushUnique(entityCandidates, rawEntityCandidate); + } if ((rawMetadataSignal || metadataFollowupSeedApplicable) && !followupSeed.counterparty) { pushUnique(entityCandidates, followupSeed.discoveryEntity); + pushUnique(entityCandidates, rawMetadataScopeHint); } if (valueFlowSignal && !predecomposeEntities.counterparty && !followupSeed.counterparty) { pushUnique(entityCandidates, predecomposeEntities.organization); @@ -533,9 +621,11 @@ function buildAssistantMcpDiscoveryTurnInput(input) { ? "movements" : metadataGroundedDocumentLaneApplicable ? "documents" - : rawMetadataSignal || effectiveMetadataFollowupSeedApplicable - ? "metadata" - : rawDomain ?? seededDomain, + : entityResolutionSignal + ? "entity_resolution" + : rawMetadataSignal || effectiveMetadataFollowupSeedApplicable + ? "metadata" + : rawDomain ?? seededDomain, asked_action_family: lifecycleSignal ? "activity_duration" : valueFlowSignal @@ -548,9 +638,11 @@ function buildAssistantMcpDiscoveryTurnInput(input) { ? "list_movements" : metadataGroundedDocumentLaneApplicable ? "list_documents" - : rawMetadataSignal || effectiveMetadataFollowupSeedApplicable - ? metadataActionFromRawText(rawText) ?? seededAction - : rawAction ?? seededAction, + : entityResolutionSignal + ? "search_business_entity" + : rawMetadataSignal || effectiveMetadataFollowupSeedApplicable + ? metadataActionFromRawText(rawText) ?? seededAction + : rawAction ?? seededAction, asked_aggregation_axis: monthlyAggregationSignal ? "month" : rawAggregationAxis, explicit_entity_candidates: entityCandidates, metadata_ambiguity_entity_sets: metadataAmbiguityLaneClarificationApplicable && followupSeed.metadataAmbiguityEntitySets.length > 0 @@ -573,11 +665,13 @@ function buildAssistantMcpDiscoveryTurnInput(input) { ? "document_evidence" : metadataAmbiguityLaneClarificationApplicable ? "metadata_lane_choice_clarification" - : rawMetadataSignal || effectiveMetadataFollowupSeedApplicable - ? "1c_metadata_surface" - : followupDiscoverySeedApplicable - ? seededUnsupported - : null), + : entityResolutionSignal + ? "entity_resolution" + : rawMetadataSignal || effectiveMetadataFollowupSeedApplicable + ? "1c_metadata_surface" + : followupDiscoverySeedApplicable + ? seededUnsupported + : null), stale_replay_forbidden: Boolean(assistantTurnMeaning?.stale_replay_forbidden || unsupported || lifecycleSignal || @@ -585,6 +679,7 @@ function buildAssistantMcpDiscoveryTurnInput(input) { metadataGroundedMovementLaneApplicable || metadataGroundedDocumentLaneApplicable || metadataAmbiguityLaneClarificationApplicable || + entityResolutionSignal || rawMetadataSignal || effectiveMetadataFollowupSeedApplicable || followupDiscoverySeedApplicable) @@ -622,6 +717,7 @@ function buildAssistantMcpDiscoveryTurnInput(input) { lifecycleSignal, valueFlowSignal, metadataSignal: rawMetadataSignal || effectiveMetadataFollowupSeedApplicable, + entityResolutionSignal, semanticDataNeed, explicitIntentCandidate, followupDiscoverySeedApplicable: followupDiscoverySeedApplicable || @@ -645,9 +741,11 @@ function buildAssistantMcpDiscoveryTurnInput(input) { ? "raw_text" : valueFlowSignal ? "raw_text" - : rawMetadataSignal || effectiveMetadataFollowupSeedApplicable + : entityResolutionSignal ? "raw_text" - : "none"; + : rawMetadataSignal || effectiveMetadataFollowupSeedApplicable + ? "raw_text" + : "none"; if (lifecycleSignal) { pushReason(reasonCodes, "mcp_discovery_lifecycle_signal_detected"); } @@ -657,6 +755,15 @@ function buildAssistantMcpDiscoveryTurnInput(input) { if (rawMetadataSignal) { pushReason(reasonCodes, "mcp_discovery_metadata_signal_detected"); } + if (entityResolutionSignal) { + pushReason(reasonCodes, "mcp_discovery_entity_resolution_signal_detected"); + } + if (rawMetadataScopeHint) { + pushReason(reasonCodes, "mcp_discovery_metadata_scope_hint_from_raw_text"); + } + if (rawEntityCandidate) { + pushReason(reasonCodes, "mcp_discovery_entity_scope_from_raw_entity_search"); + } if (payoutSignal) { pushReason(reasonCodes, "mcp_discovery_payout_signal_detected"); } diff --git a/llm_normalizer/backend/src/services/assistantMcpDiscoveryAnswerAdapter.ts b/llm_normalizer/backend/src/services/assistantMcpDiscoveryAnswerAdapter.ts index 1838fc7..84c4df2 100644 --- a/llm_normalizer/backend/src/services/assistantMcpDiscoveryAnswerAdapter.ts +++ b/llm_normalizer/backend/src/services/assistantMcpDiscoveryAnswerAdapter.ts @@ -80,6 +80,13 @@ function modeFor(pilot: AssistantMcpDiscoveryPilotExecutionContract): AssistantM if (pilot.pilot_status === "skipped_needs_clarification") { return "needs_clarification"; } + if ( + pilot.pilot_scope === "entity_resolution_search_v1" && + (pilot.reason_codes.includes("pilot_entity_resolution_ambiguity_requires_clarification") || + pilot.derived_entity_resolution?.resolution_status === "ambiguous") + ) { + return "needs_clarification"; + } if (pilot.evidence.answer_permission === "confirmed_answer") { return "confirmed_with_bounded_inference"; } @@ -109,6 +116,10 @@ function isMetadataPilot(pilot: AssistantMcpDiscoveryPilotExecutionContract): bo return pilot.pilot_scope === "metadata_inspection_v1"; } +function isEntityResolutionPilot(pilot: AssistantMcpDiscoveryPilotExecutionContract): boolean { + return pilot.pilot_scope === "entity_resolution_search_v1"; +} + function isMetadataLaneChoiceClarification(pilot: AssistantMcpDiscoveryPilotExecutionContract): boolean { return ( pilot.reason_codes.includes("planner_selected_metadata_lane_clarification_recipe") || @@ -116,6 +127,104 @@ function isMetadataLaneChoiceClarification(pilot: AssistantMcpDiscoveryPilotExec ); } +function askedActionFamily(pilot: AssistantMcpDiscoveryPilotExecutionContract): string | null { + const action = pilot.evidence.query_plan.turn_meaning_ref?.asked_action_family; + if (typeof action !== "string") { + return null; + } + const normalized = action.trim().toLowerCase(); + return normalized.length > 0 ? normalized : null; +} + +function unsupportedFamily(pilot: AssistantMcpDiscoveryPilotExecutionContract): string | null { + const unsupported = pilot.evidence.query_plan.turn_meaning_ref?.unsupported_but_understood_family; + if (typeof unsupported !== "string") { + return null; + } + const normalized = unsupported.trim().toLowerCase(); + return normalized.length > 0 ? normalized : null; +} + +function firstEntityCandidate(pilot: AssistantMcpDiscoveryPilotExecutionContract): string | null { + const values = Array.isArray(pilot.evidence.query_plan.turn_meaning_ref?.explicit_entity_candidates) + ? pilot.evidence.query_plan.turn_meaning_ref?.explicit_entity_candidates + : []; + for (const value of values) { + const text = String(value ?? "").trim(); + if (text) { + return text; + } + } + return null; +} + +function isMovementLaneClarification(pilot: AssistantMcpDiscoveryPilotExecutionContract): boolean { + return ( + isMovementPilot(pilot) || + pilot.reason_codes.includes("planner_selected_movement_recipe") || + pilot.dry_run.reason_codes.includes("planner_selected_movement_recipe") || + askedActionFamily(pilot) === "list_movements" || + unsupportedFamily(pilot) === "movement_evidence" + ); +} + +function isDocumentLaneClarification(pilot: AssistantMcpDiscoveryPilotExecutionContract): boolean { + return ( + isDocumentPilot(pilot) || + pilot.reason_codes.includes("planner_selected_document_recipe") || + pilot.dry_run.reason_codes.includes("planner_selected_document_recipe") || + askedActionFamily(pilot) === "list_documents" || + unsupportedFamily(pilot) === "document_evidence" + ); +} + +function laneScopeSuffix(pilot: AssistantMcpDiscoveryPilotExecutionContract): string { + const entity = firstEntityCandidate(pilot); + return entity ? ` по "${entity}"` : ""; +} + +function dryRunMissingAxis(pilot: AssistantMcpDiscoveryPilotExecutionContract, axis: string): boolean { + return pilot.dry_run.execution_steps.some((step) => + step.missing_axis_options.some((option) => option.includes(axis)) + ); +} + +function clarificationNeedRu( + pilot: AssistantMcpDiscoveryPilotExecutionContract +): { subject: string; verb: string } { + const needsPeriod = dryRunMissingAxis(pilot, "period"); + const needsOrganization = dryRunMissingAxis(pilot, "organization"); + if (needsPeriod && needsOrganization) { + return { subject: "проверяемый период и организацию", verb: "нужно" }; + } + if (needsPeriod) { + return { subject: "проверяемый период", verb: "нужен" }; + } + if (needsOrganization) { + return { subject: "организацию", verb: "нужно" }; + } + return { subject: "контекст проверки", verb: "нужно" }; +} + +function clarificationNextStepLine( + pilot: AssistantMcpDiscoveryPilotExecutionContract, + laneLabel: string +): string { + const needsPeriod = dryRunMissingAxis(pilot, "period"); + const needsOrganization = dryRunMissingAxis(pilot, "organization"); + const scopeSuffix = laneScopeSuffix(pilot); + if (needsPeriod && needsOrganization) { + return `Уточните период и организацию, и я продолжу поиск по ${laneLabel}${scopeSuffix} в 1С.`; + } + if (needsPeriod) { + return `Уточните период, и я продолжу поиск по ${laneLabel}${scopeSuffix} в 1С.`; + } + if (needsOrganization) { + return `Уточните организацию, и я продолжу поиск по ${laneLabel}${scopeSuffix} в 1С.`; + } + return `Уточните контекст проверки, и я продолжу поиск по ${laneLabel}${scopeSuffix} в 1С.`; +} + function metadataRouteFamilyLabelRu( routeFamily: "document_evidence" | "movement_evidence" | "catalog_drilldown" | null ): string | null { @@ -135,6 +244,19 @@ function headlineFor(mode: AssistantMcpDiscoveryAnswerMode, pilot: AssistantMcpD const askedMonthlyBreakdown = pilot.derived_bidirectional_value_flow?.aggregation_axis === "month" || pilot.derived_value_flow?.aggregation_axis === "month"; + if (isEntityResolutionPilot(pilot) && mode === "confirmed_with_bounded_inference") { + return "По каталогу 1С найден вероятный контрагент; это заземление сущности для следующего шага, а не еще бизнес-ответ по данным."; + } + if (isEntityResolutionPilot(pilot) && mode === "needs_clarification") { + return "По каталогу 1С нашлось несколько похожих контрагентов, и без уточнения нельзя честно выбрать правильную сущность."; + } + if ( + isEntityResolutionPilot(pilot) && + mode === "checked_sources_only" && + pilot.derived_entity_resolution?.resolution_status === "not_found" + ) { + return "По текущему каталожному поиску 1С точный контрагент пока не подтвержден."; + } if (isMovementPilot(pilot) && mode === "confirmed_with_bounded_inference") { return "РџРѕ данным 1РЎ найдены строки движений; ответ ограничен проверенным периодом Рё найденными строками."; } @@ -177,8 +299,13 @@ function headlineFor(mode: AssistantMcpDiscoveryAnswerMode, pilot: AssistantMcpD if (mode === "needs_clarification" && isMetadataLaneChoiceClarification(pilot)) { return "По подтвержденной metadata-поверхности видно несколько конкурирующих data-lane, и без явного выбора дальше идти нельзя."; } - if (mode === "needs_clarification" && isMetadataLaneChoiceClarification(pilot)) { - return "Уточните, в какой контур идти дальше: по документам или по движениям/регистрам."; + if (mode === "needs_clarification" && isMovementLaneClarification(pilot)) { + const need = clarificationNeedRu(pilot); + return `Могу идти дальше по движениям/регистрам${laneScopeSuffix(pilot)}, но для запуска поиска в 1С ${need.verb} ${need.subject}.`; + } + if (mode === "needs_clarification" && isDocumentLaneClarification(pilot)) { + const need = clarificationNeedRu(pilot); + return `Могу идти дальше по документам${laneScopeSuffix(pilot)}, но для запуска поиска в 1С ${need.verb} ${need.subject}.`; } if (mode === "needs_clarification") { return "Нужно уточнить контекст перед поиском в 1С."; @@ -190,9 +317,28 @@ function headlineFor(mode: AssistantMcpDiscoveryAnswerMode, pilot: AssistantMcpD } function nextStepFor(mode: AssistantMcpDiscoveryAnswerMode, pilot: AssistantMcpDiscoveryPilotExecutionContract): string | null { + if (isEntityResolutionPilot(pilot) && mode === "needs_clarification") { + return "Уточните точное название контрагента или добавьте ИНН, и я продолжу уже по нужной сущности в 1С."; + } + if (isEntityResolutionPilot(pilot) && mode === "confirmed_with_bounded_inference") { + return "Теперь могу продолжить уже по найденному контрагенту и искать документы, движения или денежный поток."; + } + if ( + isEntityResolutionPilot(pilot) && + mode === "checked_sources_only" && + pilot.derived_entity_resolution?.resolution_status === "not_found" + ) { + return "Дайте точное название или ИНН, и я повторю поиск по каталогу 1С более прицельно."; + } if (mode === "needs_clarification" && isMetadataLaneChoiceClarification(pilot)) { return "Уточните, в какой контур идти дальше: по документам или по движениям/регистрам."; } + if (mode === "needs_clarification" && isMovementLaneClarification(pilot)) { + return clarificationNextStepLine(pilot, "движениям/регистрам"); + } + if (mode === "needs_clarification" && isDocumentLaneClarification(pilot)) { + return clarificationNextStepLine(pilot, "документам"); + } if (mode === "needs_clarification") { return "Уточните контрагента, период или организацию, и я смогу выполнить проверку по 1С."; } @@ -241,6 +387,11 @@ function buildMustNotClaim(pilot: AssistantMcpDiscoveryPilotExecutionContract): claims.push("Do not claim a document/register exists outside the checked metadata probe results."); claims.push("Do not present the inferred next checked lane as already executed data retrieval."); } + if (isEntityResolutionPilot(pilot)) { + claims.push("Do not present catalog grounding as confirmed business activity, turnover, or document evidence."); + claims.push("Do not claim legal identity uniqueness when several catalog candidates are still plausible."); + claims.push("Do not imply that the resolved entity has already been used in a downstream data probe."); + } if (pilot.evidence.confirmed_facts.length === 0) { claims.push("Do not claim a confirmed business fact when confirmed_facts is empty."); } @@ -335,6 +486,35 @@ function derivedMetadataInferenceLine(pilot: AssistantMcpDiscoveryPilotExecution return `По подтвержденной metadata-поверхности следующий проверяемый шаг можно ограниченно оценить как ${routeLabel} через family «${surface.selected_entity_set}». Это еще не выполненный data-fetch, а только grounded выбор следующего контура.`; } +function derivedEntityResolutionConfirmedLine(pilot: AssistantMcpDiscoveryPilotExecutionContract): string | null { + const resolution = pilot.derived_entity_resolution; + if (!resolution || resolution.resolution_status !== "resolved" || !resolution.resolved_entity) { + return null; + } + const requested = resolution.requested_entity ? ` по запросу "${resolution.requested_entity}"` : ""; + const confidence = + resolution.confidence === "high" + ? " Точность совпадения выглядит высокой." + : resolution.confidence === "medium" + ? " Совпадение выглядит достаточно сильным, но это все еще catalog grounding." + : " Совпадение выглядит вероятным, но его лучше считать рабочим заземлением сущности."; + return `В текущем каталожном срезе 1С${requested} найден контрагент "${resolution.resolved_entity}".${confidence}`; +} + +function derivedEntityResolutionInferenceLine(pilot: AssistantMcpDiscoveryPilotExecutionContract): string | null { + const resolution = pilot.derived_entity_resolution; + if (!resolution) { + return null; + } + if (resolution.resolution_status === "resolved") { + return "Сейчас подтверждено только заземление сущности по каталогу 1С; документы, движения и денежные показатели по ней еще не проверялись."; + } + if (resolution.resolution_status === "ambiguous" && resolution.ambiguity_candidates.length > 0) { + return `В checked catalog slice есть несколько близких кандидатов: ${resolution.ambiguity_candidates.join(", ")}. Без уточнения нельзя честно выбрать одного контрагента для следующего шага.`; + } + return null; +} + function derivedValueFlowConfirmedLine(pilot: AssistantMcpDiscoveryPilotExecutionContract): string | null { const flow = pilot.derived_value_flow; if (!flow) { @@ -436,11 +616,15 @@ export function buildAssistantMcpDiscoveryAnswerDraft( if (pilot.evidence.inferred_facts.length > 0) { pushReason(reasonCodes, "answer_contains_bounded_inference"); } - const derivedInferenceLine = derivedActivityInferenceLine(pilot) ?? derivedMetadataInferenceLine(pilot); + const derivedInferenceLine = + derivedActivityInferenceLine(pilot) ?? + derivedMetadataInferenceLine(pilot) ?? + derivedEntityResolutionInferenceLine(pilot); const inferenceLines = derivedInferenceLine ? [derivedInferenceLine] : pilot.evidence.inferred_facts; const derivedMetadataLine = derivedMetadataConfirmedLine(pilot); + const derivedEntityResolutionLine = derivedEntityResolutionConfirmedLine(pilot); const derivedValueLine = derivedBidirectionalValueFlowConfirmedLine(pilot) ?? derivedValueFlowConfirmedLine(pilot); const monthlyConfirmedLines = derivedBidirectionalValueFlowMonthlyLines(pilot).length > 0 @@ -451,6 +635,8 @@ export function buildAssistantMcpDiscoveryAnswerDraft( } const confirmedLines = derivedValueLine ? [...pilot.evidence.confirmed_facts, derivedValueLine, ...monthlyConfirmedLines] + : derivedEntityResolutionLine + ? [...pilot.evidence.confirmed_facts, derivedEntityResolutionLine] : derivedMetadataLine ? [...pilot.evidence.confirmed_facts, derivedMetadataLine] : pilot.evidence.confirmed_facts; diff --git a/llm_normalizer/backend/src/services/assistantMcpDiscoveryPilotExecutor.ts b/llm_normalizer/backend/src/services/assistantMcpDiscoveryPilotExecutor.ts index baa4d12..6e9d9a7 100644 --- a/llm_normalizer/backend/src/services/assistantMcpDiscoveryPilotExecutor.ts +++ b/llm_normalizer/backend/src/services/assistantMcpDiscoveryPilotExecutor.ts @@ -8,7 +8,10 @@ import { type AssistantMcpDiscoveryRuntimeDryRunContract, type AssistantMcpDiscoveryRuntimeStepContract } from "./assistantMcpDiscoveryRuntimeAdapter"; -import type { AssistantMcpDiscoveryPlannerContract } from "./assistantMcpDiscoveryPlanner"; +import type { + AssistantMcpDiscoveryChainId, + AssistantMcpDiscoveryPlannerContract +} from "./assistantMcpDiscoveryPlanner"; import { resolveAssistantMcpDiscoveryEvidence, type AssistantMcpDiscoveryEvidenceContract, @@ -133,6 +136,18 @@ export interface AssistantMcpDiscoveryDerivedMetadataSurface { inference_basis: "confirmed_1c_metadata_surface_rows"; } +export interface AssistantMcpDiscoveryDerivedEntityResolution { + requested_entity: string | null; + resolution_status: "resolved" | "ambiguous" | "not_found"; + resolved_entity: string | null; + resolved_reference: string | null; + matched_rows: number; + checked_candidates: string[]; + ambiguity_candidates: string[]; + confidence: "high" | "medium" | "low" | null; + inference_basis: "catalog_counterparty_search_rows"; +} + interface AssistantMcpDiscoveryCoverageAwareQueryResult extends AddressMcpQueryExecutorResult { coverage_limited_by_probe_limit: boolean; coverage_recovered_by_period_chunking: boolean; @@ -149,6 +164,7 @@ interface AssistantMcpDiscoveryCoverageAwareQueryExecution { export type AssistantMcpDiscoveryPilotScope = | "metadata_inspection_v1" + | "entity_resolution_search_v1" | "counterparty_movement_evidence_query_movements_v1" | "counterparty_document_evidence_query_documents_v1" | "counterparty_lifecycle_query_documents_v1" @@ -169,6 +185,7 @@ export interface AssistantMcpDiscoveryPilotExecutionContract { evidence: AssistantMcpDiscoveryEvidenceContract; source_rows_summary: string | null; derived_metadata_surface: AssistantMcpDiscoveryDerivedMetadataSurface | null; + derived_entity_resolution: AssistantMcpDiscoveryDerivedEntityResolution | null; derived_activity_period: AssistantMcpDiscoveryDerivedActivityPeriod | null; derived_value_flow: AssistantMcpDiscoveryDerivedValueFlow | null; derived_bidirectional_value_flow: AssistantMcpDiscoveryDerivedBidirectionalValueFlow | null; @@ -183,6 +200,41 @@ const DEFAULT_DEPS: ResolvedAssistantMcpDiscoveryPilotExecutorDeps = { executeAddressMcpMetadata }; +const ENTITY_RESOLUTION_COUNTERPARTY_LOOKUP_LIMIT = 1000; +const ENTITY_RESOLUTION_COUNTERPARTY_QUERY_TEMPLATE = ` +ВЫБРАТЬ ПЕРВЫЕ __LIMIT__ + ПРЕДСТАВЛЕНИЕ(Контрагенты.Ссылка) КАК Контрагент, + ПРЕДСТАВЛЕНИЕ(Контрагенты.Ссылка) КАК Counterparty, + Контрагенты.Ссылка КАК КонтрагентСсылка, + Контрагенты.Ссылка КАК CounterpartyRef, + Контрагенты.Наименование КАК Наименование +ИЗ + Справочник.Контрагенты КАК Контрагенты +`; +const ENTITY_RESOLUTION_STOPWORDS = new Set([ + "ооо", + "ао", + "зао", + "ип", + "llc", + "ltd", + "company", + "контрагент", + "counterparty", + "поставщик", + "supplier", + "клиент", + "customer", + "в", + "1с", + "1c", + "найди", + "найти", + "поищи", + "search", + "find" +]); + function toNonEmptyString(value: unknown): string | null { if (value === null || value === undefined) { return null; @@ -282,7 +334,170 @@ function buildValueFlowFilters(planner: AssistantMcpDiscoveryPlannerContract): A }; } +function normalizeEntityResolutionText(value: string | null): string { + return String(value ?? "") + .toLowerCase() + .replace(/ё/g, "е") + .replace(/[«»"'`]/g, " ") + .replace(/[^\p{L}\p{N}\s-]+/gu, " ") + .replace(/\s+/g, " ") + .trim(); +} + +function tokenizeEntityResolutionText(value: string | null): string[] { + return normalizeEntityResolutionText(value) + .split(" ") + .map((token) => token.trim()) + .filter((token) => token.length >= 2 && !ENTITY_RESOLUTION_STOPWORDS.has(token)); +} + +function isLowQualityEntityResolutionAnchor(value: string | null): boolean { + return tokenizeEntityResolutionText(value).length <= 0; +} + +function entityResolutionCandidateName(row: Record): string | null { + const candidates = [ + row["Контрагент"], + row["Counterparty"], + row["Наименование"], + row["name"], + row["Name"], + row["registrator"], + row["Registrator"] + ]; + for (const candidate of candidates) { + const text = toNonEmptyString(candidate); + if (text) { + return text; + } + } + return null; +} + +function entityResolutionCandidateRef(row: Record): string | null { + const candidates = [row["КонтрагентСсылка"], row["CounterpartyRef"], row["ref"], row["Ref"]]; + for (const candidate of candidates) { + const text = toNonEmptyString(candidate); + if (text) { + return text; + } + } + return null; +} + +function scoreEntityResolutionCandidate(name: string, requested: string): number | null { + const normalizedName = normalizeEntityResolutionText(name); + const normalizedRequested = normalizeEntityResolutionText(requested); + const requestedTokens = tokenizeEntityResolutionText(requested); + if (!normalizedName || !normalizedRequested || requestedTokens.length <= 0) { + return null; + } + + let score = 0; + if (normalizedName === normalizedRequested) { + score += 10_000; + } else if (normalizedName.includes(normalizedRequested)) { + score += 5_000; + } else if (normalizedRequested.includes(normalizedName) && normalizedName.length >= 4) { + score += 2_000; + } + + for (const token of requestedTokens) { + if (!normalizedName.includes(token)) { + return null; + } + score += Math.max(40, token.length * 20); + } + + score -= Math.abs(normalizedName.length - normalizedRequested.length); + return score; +} + +function deriveEntityResolution( + result: AddressMcpQueryExecutorResult | null, + requestedEntity: string | null +): AssistantMcpDiscoveryDerivedEntityResolution | null { + if (!result || result.error || !requestedEntity) { + return null; + } + + const checkedCandidates = uniqueCandidateStrings( + result.raw_rows + .map((row) => entityResolutionCandidateName(row)) + .filter((value): value is string => Boolean(value)) + ); + const scoredCandidates = checkedCandidates + .map((name) => { + const score = scoreEntityResolutionCandidate(name, requestedEntity); + return score === null ? null : { name, score }; + }) + .filter((value): value is { name: string; score: number } => Boolean(value)) + .sort((left, right) => right.score - left.score || left.name.length - right.name.length || left.name.localeCompare(right.name, "ru")); + + if (scoredCandidates.length <= 0) { + return { + requested_entity: requestedEntity, + resolution_status: "not_found", + resolved_entity: null, + resolved_reference: null, + matched_rows: result.rows.length, + checked_candidates: checkedCandidates.slice(0, 12), + ambiguity_candidates: [], + confidence: null, + inference_basis: "catalog_counterparty_search_rows" + }; + } + + const bestCandidate = scoredCandidates[0]; + const bestNormalized = normalizeEntityResolutionText(bestCandidate.name); + const requestedNormalized = normalizeEntityResolutionText(requestedEntity); + const requestedTokens = tokenizeEntityResolutionText(requestedEntity); + const exactMatch = bestNormalized === requestedNormalized; + const strongContains = requestedTokens.length > 1 && bestNormalized.includes(requestedNormalized); + const topCandidates = scoredCandidates.filter((candidate) => candidate.score === bestCandidate.score); + + if (topCandidates.length > 1 && !exactMatch && !strongContains) { + return { + requested_entity: requestedEntity, + resolution_status: "ambiguous", + resolved_entity: null, + resolved_reference: null, + matched_rows: result.rows.length, + checked_candidates: checkedCandidates.slice(0, 12), + ambiguity_candidates: topCandidates.map((candidate) => candidate.name).slice(0, 6), + confidence: "low", + inference_basis: "catalog_counterparty_search_rows" + }; + } + + const matchedRow = + result.raw_rows.find((row) => normalizeEntityResolutionText(entityResolutionCandidateName(row)) === bestNormalized) ?? null; + + return { + requested_entity: requestedEntity, + resolution_status: "resolved", + resolved_entity: bestCandidate.name, + resolved_reference: matchedRow ? entityResolutionCandidateRef(matchedRow) : null, + matched_rows: result.rows.length, + checked_candidates: checkedCandidates.slice(0, 12), + ambiguity_candidates: [], + confidence: exactMatch ? "high" : strongContains ? "medium" : "low", + inference_basis: "catalog_counterparty_search_rows" + }; +} + +function uniqueCandidateStrings(values: string[]): string[] { + const result: string[] = []; + for (const value of values) { + pushUnique(result, value); + } + return result; +} + function isLifecyclePilotEligible(planner: AssistantMcpDiscoveryPlannerContract): boolean { + if (planner.selected_chain_id === "lifecycle") { + return true; + } const meaning = planner.discovery_plan.turn_meaning_ref; const domain = String(meaning?.asked_domain_family ?? "").toLowerCase(); const action = String(meaning?.asked_action_family ?? "").toLowerCase(); @@ -294,6 +509,9 @@ function isLifecyclePilotEligible(planner: AssistantMcpDiscoveryPlannerContract) } function isDocumentEvidencePilotEligible(planner: AssistantMcpDiscoveryPlannerContract): boolean { + if (planner.selected_chain_id === "document_evidence") { + return true; + } const meaning = planner.discovery_plan.turn_meaning_ref; const domain = String(meaning?.asked_domain_family ?? "").toLowerCase(); const action = String(meaning?.asked_action_family ?? "").toLowerCase(); @@ -306,6 +524,9 @@ function isDocumentEvidencePilotEligible(planner: AssistantMcpDiscoveryPlannerCo } function isMovementEvidencePilotEligible(planner: AssistantMcpDiscoveryPlannerContract): boolean { + if (planner.selected_chain_id === "movement_evidence") { + return true; + } const meaning = planner.discovery_plan.turn_meaning_ref; const domain = String(meaning?.asked_domain_family ?? "").toLowerCase(); const action = String(meaning?.asked_action_family ?? "").toLowerCase(); @@ -323,6 +544,9 @@ function isMovementEvidencePilotEligible(planner: AssistantMcpDiscoveryPlannerCo } function isValueFlowPilotEligible(planner: AssistantMcpDiscoveryPlannerContract): boolean { + if (planner.selected_chain_id === "value_flow") { + return true; + } const meaning = planner.discovery_plan.turn_meaning_ref; const domain = String(meaning?.asked_domain_family ?? "").toLowerCase(); const action = String(meaning?.asked_action_family ?? "").toLowerCase(); @@ -339,6 +563,12 @@ function isValueFlowPilotEligible(planner: AssistantMcpDiscoveryPlannerContract) } function isMetadataPilotEligible(planner: AssistantMcpDiscoveryPlannerContract): boolean { + if ( + planner.selected_chain_id === "metadata_inspection" || + planner.selected_chain_id === "metadata_lane_clarification" + ) { + return true; + } const meaning = planner.discovery_plan.turn_meaning_ref; const domain = String(meaning?.asked_domain_family ?? "").toLowerCase(); const action = String(meaning?.asked_action_family ?? "").toLowerCase(); @@ -356,6 +586,25 @@ function isMetadataPilotEligible(planner: AssistantMcpDiscoveryPlannerContract): ); } +function isEntityResolutionPilotEligible(planner: AssistantMcpDiscoveryPlannerContract): boolean { + if (planner.selected_chain_id === "entity_resolution") { + return true; + } + const meaning = planner.discovery_plan.turn_meaning_ref; + const domain = String(meaning?.asked_domain_family ?? "").toLowerCase(); + const action = String(meaning?.asked_action_family ?? "").toLowerCase(); + const unsupported = String(meaning?.unsupported_but_understood_family ?? "").toLowerCase(); + const semanticNeed = String(planner.semantic_data_need ?? "").toLowerCase(); + const combined = `${domain} ${action} ${unsupported} ${semanticNeed}`; + return ( + planner.proposed_primitives.includes("search_business_entity") && + (combined.includes("entity_resolution") || + combined.includes("search_business_entity") || + combined.includes("entity discovery") || + combined.includes("counterparty search")) + ); +} + function metadataScopeForPlanner(planner: AssistantMcpDiscoveryPlannerContract): string | null { const entityCandidate = firstEntityCandidate(planner); if (entityCandidate) { @@ -715,6 +964,16 @@ function summarizeMetadataRows(result: AddressMcpMetadataRowsResult): string | n return `${result.fetched_rows} MCP metadata rows fetched`; } +function summarizeEntityResolutionRows(result: AddressMcpQueryExecutorResult): string | null { + if (result.error) { + return null; + } + if (result.fetched_rows <= 0) { + return "0 MCP catalog rows fetched"; + } + return `${result.fetched_rows} MCP catalog rows fetched for entity search`; +} + function metadataRowText(row: Record, keys: string[]): string | null { for (const key of keys) { const text = toNonEmptyString(row[key]); @@ -752,6 +1011,19 @@ function metadataEntitySet(row: Record): string | null { ]); } +function inferMetadataEntitySetFromObjectName(objectName: string | null): string | null { + const text = String(objectName ?? "").trim(); + if (!text) { + return null; + } + const dotIndex = text.indexOf("."); + if (dotIndex <= 0) { + return null; + } + const entitySet = text.slice(0, dotIndex).trim(); + return entitySet.length > 0 ? entitySet : null; +} + function metadataChildNames(value: unknown): string[] { if (!Array.isArray(value)) { return []; @@ -908,7 +1180,7 @@ function deriveMetadataSurface( if (objectName) { pushUnique(matchedObjects, objectName); } - const entitySet = metadataEntitySet(row); + const entitySet = metadataEntitySet(row) ?? inferMetadataEntitySetFromObjectName(objectName); if (entitySet) { pushUnique(availableEntitySets, entitySet); } @@ -997,6 +1269,67 @@ function buildMetadataUnknownFacts( return ["No matching 1C metadata objects were confirmed by this MCP metadata probe"]; } +function buildEntityResolutionConfirmedFacts( + resolution: AssistantMcpDiscoveryDerivedEntityResolution | null +): string[] { + if (!resolution || resolution.resolution_status !== "resolved" || !resolution.resolved_entity) { + return []; + } + if (resolution.requested_entity && normalizeEntityResolutionText(resolution.requested_entity) === normalizeEntityResolutionText(resolution.resolved_entity)) { + return [`В проверенном каталожном срезе 1С найден контрагент: ${resolution.resolved_entity}`]; + } + return [ + `В проверенном каталожном срезе 1С найден наиболее вероятный контрагент: ${resolution.resolved_entity}` + ]; +} + +function buildEntityResolutionInferredFacts( + resolution: AssistantMcpDiscoveryDerivedEntityResolution | null +): string[] { + if (!resolution) { + return []; + } + if (resolution.resolution_status === "resolved") { + const facts = ["Пока проверено только заземление сущности по каталогу 1С; документы, движения и денежные показатели еще не проверялись"]; + if (resolution.requested_entity && resolution.resolved_entity) { + const requestedNormalized = normalizeEntityResolutionText(resolution.requested_entity); + const resolvedNormalized = normalizeEntityResolutionText(resolution.resolved_entity); + if (requestedNormalized !== resolvedNormalized) { + facts.push("Контрагент выбран как ближайшее подтвержденное совпадение имени в проверенном каталоге 1С"); + } + } + return facts; + } + if (resolution.resolution_status === "ambiguous") { + return ["В проверенном каталожном срезе осталось несколько близких кандидатов, поэтому точного контрагента в 1С еще нужно уточнить"]; + } + return []; +} + +function buildEntityResolutionUnknownFacts( + resolution: AssistantMcpDiscoveryDerivedEntityResolution | null, + requestedEntity: string | null +): string[] { + if (!resolution) { + return ["По проверенному каталожному поиску 1С не удалось заземлить сущность контрагента"]; + } + const unknownFacts = ["Документы, движения и денежные показатели по этому контрагенту еще не проверялись; пока был только каталожный поиск"]; + if (resolution.resolution_status === "ambiguous" && resolution.ambiguity_candidates.length > 0) { + unknownFacts.unshift( + `Точное заземление контрагента в 1С остается неоднозначным между вариантами: ${resolution.ambiguity_candidates.join(", ")}` + ); + return unknownFacts; + } + if (resolution.resolution_status === "not_found") { + unknownFacts.unshift( + requestedEntity + ? `В проверенном каталожном срезе 1С не подтвержден контрагент с именем "${requestedEntity}"` + : "В проверенном каталожном срезе 1С не подтвержден подходящий контрагент" + ); + } + return unknownFacts; +} + function rowDateValue(row: Record): string | null { const candidates = [ row["Период"], @@ -1562,22 +1895,25 @@ function buildEmptyEvidence( } function pilotScopeForPlanner(planner: AssistantMcpDiscoveryPlannerContract): AssistantMcpDiscoveryPilotScope { - if (planner.reason_codes.includes("planner_selected_metadata_lane_clarification_recipe")) { - return "metadata_inspection_v1"; + switch (planner.selected_chain_id) { + case "metadata_lane_clarification": + case "metadata_inspection": + return "metadata_inspection_v1"; + case "movement_evidence": + return "counterparty_movement_evidence_query_movements_v1"; + case "value_flow": + return valueFlowPilotProfile(planner).scope; + case "document_evidence": + return "counterparty_document_evidence_query_documents_v1"; + case "lifecycle": + return "counterparty_lifecycle_query_documents_v1"; + case "entity_resolution": + return "entity_resolution_search_v1"; } - if (isMetadataPilotEligible(planner)) { - return "metadata_inspection_v1"; - } - if (isMovementEvidencePilotEligible(planner)) { - return "counterparty_movement_evidence_query_movements_v1"; - } - if (isValueFlowPilotEligible(planner)) { - return valueFlowPilotProfile(planner).scope; - } - if (isDocumentEvidencePilotEligible(planner)) { - return "counterparty_document_evidence_query_documents_v1"; - } - return "counterparty_lifecycle_query_documents_v1"; +} + +function isLivePilotChainSupported(chainId: AssistantMcpDiscoveryChainId): boolean { + return true; } export async function executeAssistantMcpDiscoveryPilot( @@ -1612,6 +1948,7 @@ export async function executeAssistantMcpDiscoveryPilot( evidence, source_rows_summary: null, derived_metadata_surface: null, + derived_entity_resolution: null, derived_activity_period: null, derived_value_flow: null, derived_bidirectional_value_flow: null, @@ -1636,6 +1973,7 @@ export async function executeAssistantMcpDiscoveryPilot( evidence, source_rows_summary: null, derived_metadata_surface: null, + derived_entity_resolution: null, derived_activity_period: null, derived_value_flow: null, derived_bidirectional_value_flow: null, @@ -1649,8 +1987,18 @@ export async function executeAssistantMcpDiscoveryPilot( const movementPilotEligible = isMovementEvidencePilotEligible(planner); const lifecyclePilotEligible = isLifecyclePilotEligible(planner); const valueFlowPilotEligible = isValueFlowPilotEligible(planner); + const entityResolutionPilotEligible = isEntityResolutionPilotEligible(planner); + const livePilotChainSupported = isLivePilotChainSupported(planner.selected_chain_id); - if (!metadataPilotEligible && !documentPilotEligible && !movementPilotEligible && !lifecyclePilotEligible && !valueFlowPilotEligible) { + if ( + !livePilotChainSupported || + (!metadataPilotEligible && + !documentPilotEligible && + !movementPilotEligible && + !lifecyclePilotEligible && + !valueFlowPilotEligible && + !entityResolutionPilotEligible) + ) { pushReason(reasonCodes, "pilot_scope_unsupported_for_live_execution"); for (const step of dryRun.execution_steps) { skippedPrimitives.push(step.primitive_id); @@ -1670,6 +2018,7 @@ export async function executeAssistantMcpDiscoveryPilot( evidence, source_rows_summary: null, derived_metadata_surface: null, + derived_entity_resolution: null, derived_activity_period: null, derived_value_flow: null, derived_bidirectional_value_flow: null, @@ -1737,13 +2086,109 @@ export async function executeAssistantMcpDiscoveryPilot( evidence, source_rows_summary: sourceRowsSummary, derived_metadata_surface: derivedMetadataSurface, + derived_entity_resolution: null, derived_activity_period: null, derived_value_flow: null, derived_bidirectional_value_flow: null, query_limitations: queryLimitations, reason_codes: reasonCodes - }; - } + }; + } + + if (entityResolutionPilotEligible) { + let queryResult: AddressMcpQueryExecutorResult | null = null; + const requestedEntity = counterparty; + if (isLowQualityEntityResolutionAnchor(requestedEntity)) { + pushReason(reasonCodes, "pilot_entity_resolution_anchor_missing_or_low_quality"); + const evidence = buildEmptyEvidence(planner, dryRun, probeResults, "Entity-resolution needs a clearer counterparty name"); + return { + schema_version: ASSISTANT_MCP_DISCOVERY_PILOT_EXECUTOR_SCHEMA_VERSION, + policy_owner: "assistantMcpDiscoveryPilotExecutor", + pilot_status: "skipped_needs_clarification", + pilot_scope: "entity_resolution_search_v1", + dry_run: dryRun, + mcp_execution_performed: false, + executed_primitives: executedPrimitives, + skipped_primitives: skippedPrimitives, + probe_results: probeResults, + evidence, + source_rows_summary: null, + derived_metadata_surface: null, + derived_entity_resolution: null, + derived_activity_period: null, + derived_value_flow: null, + derived_bidirectional_value_flow: null, + query_limitations: ["Entity-resolution needs a clearer counterparty name"], + reason_codes: reasonCodes + }; + } + + for (const step of dryRun.execution_steps) { + if (step.primitive_id !== "search_business_entity") { + skippedPrimitives.push(step.primitive_id); + probeResults.push(skippedProbeResult(step, "pilot_only_executes_search_business_entity")); + continue; + } + queryResult = await runtimeDeps.executeAddressMcpQuery({ + query: ENTITY_RESOLUTION_COUNTERPARTY_QUERY_TEMPLATE.replaceAll( + "__LIMIT__", + String(ENTITY_RESOLUTION_COUNTERPARTY_LOOKUP_LIMIT) + ), + limit: ENTITY_RESOLUTION_COUNTERPARTY_LOOKUP_LIMIT + }); + pushUnique(executedPrimitives, step.primitive_id); + probeResults.push(queryResultToProbeResult(step.primitive_id, queryResult)); + if (queryResult.error) { + pushUnique(queryLimitations, queryResult.error); + pushReason(reasonCodes, "pilot_search_business_entity_mcp_error"); + } else { + pushReason(reasonCodes, "pilot_search_business_entity_mcp_executed"); + } + } + + const sourceRowsSummary = queryResult ? summarizeEntityResolutionRows(queryResult) : null; + const derivedEntityResolution = deriveEntityResolution(queryResult, requestedEntity); + if (derivedEntityResolution?.resolution_status === "resolved") { + pushReason(reasonCodes, "pilot_derived_entity_resolution_from_catalog_rows"); + } + if (derivedEntityResolution?.resolution_status === "ambiguous") { + pushReason(reasonCodes, "pilot_entity_resolution_ambiguity_requires_clarification"); + } + if (derivedEntityResolution?.resolution_status === "not_found") { + pushReason(reasonCodes, "pilot_entity_resolution_not_found_in_checked_catalog"); + } + const evidence = resolveAssistantMcpDiscoveryEvidence({ + plan: planner.discovery_plan, + probeResults, + confirmedFacts: buildEntityResolutionConfirmedFacts(derivedEntityResolution), + inferredFacts: buildEntityResolutionInferredFacts(derivedEntityResolution), + unknownFacts: buildEntityResolutionUnknownFacts(derivedEntityResolution, requestedEntity), + sourceRowsSummary, + queryLimitations, + recommendedNextProbe: "resolve_entity_reference" + }); + + return { + schema_version: ASSISTANT_MCP_DISCOVERY_PILOT_EXECUTOR_SCHEMA_VERSION, + policy_owner: "assistantMcpDiscoveryPilotExecutor", + pilot_status: "executed", + pilot_scope: "entity_resolution_search_v1", + dry_run: dryRun, + mcp_execution_performed: executedPrimitives.length > 0, + executed_primitives: executedPrimitives, + skipped_primitives: skippedPrimitives, + probe_results: probeResults, + evidence, + source_rows_summary: sourceRowsSummary, + derived_metadata_surface: null, + derived_entity_resolution: derivedEntityResolution, + derived_activity_period: null, + derived_value_flow: null, + derived_bidirectional_value_flow: null, + query_limitations: queryLimitations, + reason_codes: reasonCodes + }; + } if (documentPilotEligible) { let queryResult: AddressMcpQueryExecutorResult | null = null; @@ -1765,6 +2210,7 @@ export async function executeAssistantMcpDiscoveryPilot( evidence, source_rows_summary: null, derived_metadata_surface: null, + derived_entity_resolution: null, derived_activity_period: null, derived_value_flow: null, derived_bidirectional_value_flow: null, @@ -1820,6 +2266,7 @@ export async function executeAssistantMcpDiscoveryPilot( evidence, source_rows_summary: sourceRowsSummary, derived_metadata_surface: null, + derived_entity_resolution: null, derived_activity_period: null, derived_value_flow: null, derived_bidirectional_value_flow: null, @@ -1848,6 +2295,7 @@ export async function executeAssistantMcpDiscoveryPilot( evidence, source_rows_summary: null, derived_metadata_surface: null, + derived_entity_resolution: null, derived_activity_period: null, derived_value_flow: null, derived_bidirectional_value_flow: null, @@ -1903,6 +2351,7 @@ export async function executeAssistantMcpDiscoveryPilot( evidence, source_rows_summary: sourceRowsSummary, derived_metadata_surface: null, + derived_entity_resolution: null, derived_activity_period: null, derived_value_flow: null, derived_bidirectional_value_flow: null, @@ -1936,6 +2385,7 @@ export async function executeAssistantMcpDiscoveryPilot( evidence, source_rows_summary: null, derived_metadata_surface: null, + derived_entity_resolution: null, derived_activity_period: null, derived_value_flow: null, derived_bidirectional_value_flow: null, @@ -2035,6 +2485,7 @@ export async function executeAssistantMcpDiscoveryPilot( evidence, source_rows_summary: sourceRowsSummary, derived_metadata_surface: null, + derived_entity_resolution: null, derived_activity_period: null, derived_value_flow: null, derived_bidirectional_value_flow: derivedBidirectionalValueFlow, @@ -2061,6 +2512,7 @@ export async function executeAssistantMcpDiscoveryPilot( evidence, source_rows_summary: null, derived_metadata_surface: null, + derived_entity_resolution: null, derived_activity_period: null, derived_value_flow: null, derived_bidirectional_value_flow: null, @@ -2144,6 +2596,7 @@ export async function executeAssistantMcpDiscoveryPilot( evidence, source_rows_summary: sourceRowsSummary, derived_metadata_surface: null, + derived_entity_resolution: null, derived_activity_period: null, derived_value_flow: derivedValueFlow, derived_bidirectional_value_flow: null, @@ -2171,6 +2624,7 @@ export async function executeAssistantMcpDiscoveryPilot( evidence, source_rows_summary: null, derived_metadata_surface: null, + derived_entity_resolution: null, derived_activity_period: null, derived_value_flow: null, derived_bidirectional_value_flow: null, @@ -2230,6 +2684,7 @@ export async function executeAssistantMcpDiscoveryPilot( evidence, source_rows_summary: sourceRowsSummary, derived_metadata_surface: null, + derived_entity_resolution: null, derived_activity_period: derivedActivityPeriod, derived_value_flow: null, derived_bidirectional_value_flow: null, diff --git a/llm_normalizer/backend/src/services/assistantMcpDiscoveryPlanner.ts b/llm_normalizer/backend/src/services/assistantMcpDiscoveryPlanner.ts index a48357b..cd8524a 100644 --- a/llm_normalizer/backend/src/services/assistantMcpDiscoveryPlanner.ts +++ b/llm_normalizer/backend/src/services/assistantMcpDiscoveryPlanner.ts @@ -13,6 +13,15 @@ export const ASSISTANT_MCP_DISCOVERY_PLANNER_SCHEMA_VERSION = "assistant_mcp_dis export type AssistantMcpDiscoveryPlannerStatus = "ready_for_execution" | "needs_clarification" | "blocked"; +export type AssistantMcpDiscoveryChainId = + | "metadata_inspection" + | "metadata_lane_clarification" + | "value_flow" + | "lifecycle" + | "movement_evidence" + | "document_evidence" + | "entity_resolution"; + export interface AssistantMcpDiscoveryPlannerInput { semanticDataNeed?: string | null; turnMeaning?: AssistantMcpDiscoveryTurnMeaningRef | null; @@ -23,6 +32,8 @@ export interface AssistantMcpDiscoveryPlannerContract { policy_owner: "assistantMcpDiscoveryPlanner"; planner_status: AssistantMcpDiscoveryPlannerStatus; semantic_data_need: string | null; + selected_chain_id: AssistantMcpDiscoveryChainId; + selected_chain_summary: string; proposed_primitives: AssistantMcpDiscoveryPrimitive[]; required_axes: string[]; discovery_plan: AssistantMcpDiscoveryPlanContract; @@ -32,6 +43,8 @@ export interface AssistantMcpDiscoveryPlannerContract { interface PlannerRecipe { semanticDataNeed: string; + chainId: AssistantMcpDiscoveryChainId; + chainSummary: string; primitives: AssistantMcpDiscoveryPrimitive[]; axes: string[]; reason: string; @@ -135,6 +148,8 @@ function recipeFor(input: AssistantMcpDiscoveryPlannerInput): PlannerRecipe { pushUnique(axes, "lane_family_choice"); return { semanticDataNeed: "metadata lane clarification", + chainId: "metadata_lane_clarification", + chainSummary: "Preserve the ambiguous metadata surface and ask the user to choose the next data lane before running MCP probes.", primitives: [], axes, reason: "planner_selected_metadata_lane_clarification_recipe" @@ -150,6 +165,8 @@ function recipeFor(input: AssistantMcpDiscoveryPlannerInput): PlannerRecipe { } return { semanticDataNeed: "counterparty value-flow evidence", + chainId: "value_flow", + chainSummary: "Resolve the business entity, query scoped movements, aggregate checked amounts, then probe coverage before answering.", primitives: ["resolve_entity_reference", "query_movements", "aggregate_by_axis", "probe_coverage"], axes, reason: requestedAggregationAxis === "month" @@ -164,6 +181,8 @@ function recipeFor(input: AssistantMcpDiscoveryPlannerInput): PlannerRecipe { pushUnique(axes, "evidence_basis"); return { semanticDataNeed: "counterparty lifecycle evidence", + chainId: "lifecycle", + chainSummary: "Resolve the business entity, query supporting documents, probe coverage, then explain the evidence basis for the inferred activity window.", primitives: ["resolve_entity_reference", "query_documents", "probe_coverage", "explain_evidence_basis"], axes, reason: "planner_selected_lifecycle_recipe" @@ -174,6 +193,8 @@ function recipeFor(input: AssistantMcpDiscoveryPlannerInput): PlannerRecipe { pushUnique(axes, "metadata_scope"); return { semanticDataNeed: "1C metadata evidence", + chainId: "metadata_inspection", + chainSummary: "Inspect the 1C metadata surface first, then ground the next safe lane from confirmed schema evidence.", primitives: ["inspect_1c_metadata"], axes, reason: "planner_selected_metadata_recipe" @@ -184,6 +205,8 @@ function recipeFor(input: AssistantMcpDiscoveryPlannerInput): PlannerRecipe { pushUnique(axes, "coverage_target"); return { semanticDataNeed: "movement evidence", + chainId: "movement_evidence", + chainSummary: "Resolve the business entity, fetch scoped movement rows, and probe coverage without pretending to have a full movement universe.", primitives: ["resolve_entity_reference", "query_movements", "probe_coverage"], axes, reason: "planner_selected_movement_recipe" @@ -194,6 +217,8 @@ function recipeFor(input: AssistantMcpDiscoveryPlannerInput): PlannerRecipe { pushUnique(axes, "coverage_target"); return { semanticDataNeed: "document evidence", + chainId: "document_evidence", + chainSummary: "Resolve the business entity, fetch scoped document rows, and probe coverage before stating the checked document evidence.", primitives: ["resolve_entity_reference", "query_documents", "probe_coverage"], axes, reason: "planner_selected_document_recipe" @@ -202,8 +227,11 @@ function recipeFor(input: AssistantMcpDiscoveryPlannerInput): PlannerRecipe { if (hasEntity(meaning)) { pushUnique(axes, "business_entity"); + pushUnique(axes, "coverage_target"); return { semanticDataNeed: "entity discovery evidence", + chainId: "entity_resolution", + chainSummary: "Search candidate business entities, resolve the most relevant 1C reference, and prove whether the entity grounding is stable enough for the next probe.", primitives: ["search_business_entity", "resolve_entity_reference", "probe_coverage"], axes, reason: "planner_selected_entity_resolution_recipe" @@ -212,6 +240,8 @@ function recipeFor(input: AssistantMcpDiscoveryPlannerInput): PlannerRecipe { return { semanticDataNeed: "unclassified 1C discovery need", + chainId: "metadata_inspection", + chainSummary: "Start with metadata inspection instead of guessing a deeper fact route when the business need is still under-specified.", primitives: ["inspect_1c_metadata"], axes, reason: "planner_selected_clarification_recipe" @@ -266,6 +296,8 @@ export function planAssistantMcpDiscovery( policy_owner: "assistantMcpDiscoveryPlanner", planner_status: plannerStatus, semantic_data_need: semanticDataNeed, + selected_chain_id: recipe.chainId, + selected_chain_summary: recipe.chainSummary, proposed_primitives: recipe.primitives, required_axes: recipe.axes, discovery_plan: plan, diff --git a/llm_normalizer/backend/src/services/assistantMcpDiscoveryTurnInputAdapter.ts b/llm_normalizer/backend/src/services/assistantMcpDiscoveryTurnInputAdapter.ts index 5821e95..ed1aced 100644 --- a/llm_normalizer/backend/src/services/assistantMcpDiscoveryTurnInputAdapter.ts +++ b/llm_normalizer/backend/src/services/assistantMcpDiscoveryTurnInputAdapter.ts @@ -68,6 +68,24 @@ function pushUnique(target: string[], value: unknown): void { } } +function canonicalizeEntityResolutionCandidate(value: string): string { + return normalizeEntityResolutionCandidate(value) + .replace(/^(?:\u0441\s+\u043d\u0430\u0438\u043c\u0435\u043d\u043e\u0432\u0430\u043d\u0438\u0435\u043c\s+)/iu, "") + .replace(/\s+(?:\u0432\s+\u0441\u0438\u0441\u0442\u0435\u043c\u0435\s*1\u0421|\u0432\s+1c|in\s+(?:the\s+)?1c\s+system|in\s+1c)\s*$/iu, "") + .trim(); +} + +function pushNormalizedEntityResolutionCandidate(target: string[], value: unknown): void { + const text = toNonEmptyString(value); + if (!text) { + return; + } + const normalized = canonicalizeEntityResolutionCandidate(text); + if (normalized && !target.includes(normalized)) { + target.push(normalized); + } +} + function compactLower(value: unknown): string { return String(value ?? "") .toLowerCase() @@ -365,15 +383,17 @@ function hasMetadataSignal(text: string): boolean { return true; } return ( - /(?:\u0440\u0435\u0433\u0438\u0441\u0442\u0440\u044b|\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u044b|\u0441\u043f\u0440\u0430\u0432\u043e\u0447\u043d\u0438\u043a\u0438|\u043f\u043e\u043b(?:\u0435|\u044f)|registers?|documents?|catalogs?|fields?)/iu.test( + /(?:\u043e\u0431\u044a\u0435\u043a\u0442(?:\u044b|\u0430|\u043e\u0432)?|\u0440\u0435\u0433\u0438\u0441\u0442\u0440\u044b|\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u044b|\u0441\u043f\u0440\u0430\u0432\u043e\u0447\u043d\u0438\u043a\u0438|\u043f\u043e\u043b(?:\u0435|\u044f)|objects?|registers?|documents?|catalogs?|fields?)/iu.test( text ) && - /(?:\u0435\u0441\u0442\u044c|\u0434\u043e\u0441\u0442\u0443\u043f\u043d|\u0432\s+1\u0441|available|exist)/iu.test(text) + /(?:\u0435\u0441\u0442\u044c|\u043a\u0430\u043a\u0438\u0435|\u0434\u043e\u0441\u0442\u0443\u043f\u043d|\u0432\s+1\u0441|1\u0441|available|exist|which)/iu.test( + text + ) ); } function hasMetadataObjectHint(text: string): boolean { - return /(?:\u0440\u0435\u0433\u0438\u0441\u0442\u0440(?:\u044b)?|\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442(?:\u044b)?|\u0441\u043f\u0440\u0430\u0432\u043e\u0447\u043d\u0438\u043a(?:\u0438)?|\u043f\u043e\u043b(?:\u0435|\u044f)|registers?|documents?|catalogs?|fields?)/iu.test( + return /(?:\u043e\u0431\u044a\u0435\u043a\u0442(?:\u044b|\u0430|\u043e\u0432)?|\u0440\u0435\u0433\u0438\u0441\u0442\u0440(?:\u044b)?|\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442(?:\u044b)?|\u0441\u043f\u0440\u0430\u0432\u043e\u0447\u043d\u0438\u043a(?:\u0438)?|\u043f\u043e\u043b(?:\u0435|\u044f)|objects?|registers?|documents?|catalogs?|fields?)/iu.test( text ); } @@ -396,7 +416,45 @@ function hasMetadataDownstreamContinuationSignal(text: string): boolean { ); } +function hasEntityResolutionSignal(text: string): boolean { + const hasSearchVerb = /(?:найд(?:и|ите|ем|у)|поищ(?:и|ите|ем)|найти|поиск|search|find|look\s*up)/iu.test(text); + const hasEntityNoun = + /(?:контрагент(?:а|ов)?|поставщик(?:а|ов)?|клиент(?:а|ов)?|counterpart(?:y|ies)|supplier(?:s)?|customer(?:s)?)/iu.test( + text + ); + return hasSearchVerb && hasEntityNoun; +} + +function normalizeEntityResolutionCandidate(value: string): string { + return value + .replace(/^(?:в\s*1с\s+|в\s+1c\s+|по\s+имени\s+)/iu, "") + .replace(/[?!.]+$/gu, "") + .replace(/^(?:контрагент(?:а|ов)?|поставщик(?:а|ов)?|клиент(?:а|ов)?)\s+/iu, "") + .replace(/^(?:counterpart(?:y|ies)|supplier(?:s)?|customer(?:s)?)\s+/iu, "") + .replace(/^[«"'\s]+|[»"'\s]+$/gu, "") + .replace(/\s+/g, " ") + .trim(); +} + +function rawEntityResolutionCandidate(text: string): string | null { + const patterns = [ + /(?:найд(?:и|ите|ем|у)|поищ(?:и|ите|ем)|найти|search|find|look\s*up)\s+(?:в\s*1с\s+|в\s+1c\s+)?(?:контрагент(?:а|ов)?|поставщик(?:а|ов)?|клиент(?:а|ов)?|counterpart(?:y|ies)|supplier(?:s)?|customer(?:s)?)\s+(.+)$/iu, + /(?:контрагент(?:а|ов)?|поставщик(?:а|ов)?|клиент(?:а|ов)?|counterpart(?:y|ies)|supplier(?:s)?|customer(?:s)?)\s+(.+?)\s+(?:найд(?:и|ите|ем|у)|поищ(?:и|ите|ем)|найти|search|find|look\s*up)\b/iu + ]; + for (const pattern of patterns) { + const match = text.match(pattern); + const candidate = normalizeEntityResolutionCandidate(match?.[1] ?? ""); + if (candidate.length >= 2) { + return candidate; + } + } + return null; +} + function metadataActionFromRawText(text: string): string { + if (/(?:\u043e\u0431\u044a\u0435\u043a\u0442(?:\u044b|\u0430|\u043e\u0432)?|objects?)/iu.test(text)) { + return "inspect_surface"; + } if (/(?:\u043f\u043e\u043b(?:\u0435|\u044f)|field)/iu.test(text)) { return "inspect_fields"; } @@ -412,6 +470,19 @@ function metadataActionFromRawText(text: string): string { return "inspect_catalog"; } +function metadataScopeHintFromRawText(text: string): string | null { + if (/(?:\u043d\u0434\u0441|vat)/iu.test(text)) { + return "\u041d\u0414\u0421"; + } + if (/(?:\u0441\u043a\u043b\u0430\u0434|inventory|stock|warehouse|\u043d\u043e\u043c\u0435\u043d\u043a\u043b\u0430\u0442\u0443\u0440)/iu.test(text)) { + return "\u0441\u043a\u043b\u0430\u0434"; + } + if (/(?:\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|counterparty|customer|client|supplier|vendor)/iu.test(text)) { + return "\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442"; + } + return null; +} + function hasExplicitDateScopeLiteral(text: string): boolean { return /(?:\b(?:19|20)\d{2}\b|\b\d{4}-\d{2}-\d{2}\b|\b\d{4}-\d{2}\b)/iu.test(text); } @@ -439,6 +510,7 @@ function semanticNeedFor(input: { lifecycleSignal: boolean; valueFlowSignal: boolean; metadataSignal: boolean; + entityResolutionSignal: boolean; }): string | null { const combined = compactLower(`${input.domain ?? ""} ${input.action ?? ""} ${input.unsupported ?? ""}`); if (input.metadataSignal || /(?:metadata|schema|catalog|inspect_(?:catalog|documents|registers|fields))/iu.test(combined)) { @@ -450,6 +522,14 @@ function semanticNeedFor(input: { if (input.valueFlowSignal || /(?:turnover|revenue|payment|payout|value|net|netting|balance|cashflow)/iu.test(combined)) { return "counterparty value-flow evidence"; } + if ( + input.entityResolutionSignal || + /(?:entity_resolution|search_business_entity|resolve_entity_reference|entity\s+discovery|counterparty\s+search)/iu.test( + combined + ) + ) { + return "entity discovery evidence"; + } if (/(?:movement|movements|bank_operations|movement_evidence|list_movements)/iu.test(combined)) { return "movement evidence"; } @@ -464,6 +544,7 @@ function shouldRunDiscovery(input: { lifecycleSignal: boolean; valueFlowSignal: boolean; metadataSignal: boolean; + entityResolutionSignal: boolean; semanticDataNeed: string | null; explicitIntentCandidate: string | null; followupDiscoverySeedApplicable: boolean; @@ -474,6 +555,9 @@ function shouldRunDiscovery(input: { if (input.metadataSignal) { return true; } + if (input.entityResolutionSignal) { + return true; + } if (input.valueFlowSignal && !input.explicitIntentCandidate) { return true; } @@ -495,16 +579,25 @@ export function buildAssistantMcpDiscoveryTurnInput( const predecomposeEntities = collectPredecomposeEntities(predecomposeContract); const followupSeed = collectFollowupDiscoverySeed(followupContext); const reasonCodes: string[] = []; - const rawText = compactLower(`${input.userMessage ?? ""} ${input.effectiveMessage ?? ""}`); + const rawUserText = toNonEmptyString(input.userMessage); + const rawEffectiveText = toNonEmptyString(input.effectiveMessage); + const rawSignalSourceText = `${rawUserText ?? ""} ${rawEffectiveText ?? ""}`.trim(); + const rawEntitySourceText = rawUserText ?? rawEffectiveText ?? rawSignalSourceText; + const rawText = compactLower(rawSignalSourceText); const rawLifecycleSignal = hasLifecycleSignal(rawText); const rawBidirectionalValueFlowSignal = !rawLifecycleSignal && hasBidirectionalValueFlowSignal(rawText); const rawValueFlowSignal = !rawLifecycleSignal && (hasValueFlowSignal(rawText) || rawBidirectionalValueFlowSignal); const rawMetadataSignal = !rawLifecycleSignal && !rawValueFlowSignal && hasMetadataSignal(rawText); + const rawEntityResolutionSignal = + !rawLifecycleSignal && !rawValueFlowSignal && !rawMetadataSignal && hasEntityResolutionSignal(rawText); const rawPayoutSignal = rawValueFlowSignal && !rawBidirectionalValueFlowSignal && hasPayoutSignal(rawText); const monthlyAggregationSignal = hasMonthlyAggregationSignal(rawText); const explicitDateScopeLiteralDetected = hasExplicitDateScopeLiteral(rawText); const rawDateScope = collectDateScopeFromRawText(rawText); + const rawMetadataScopeHint = rawMetadataSignal ? metadataScopeHintFromRawText(rawText) : null; + const rawEntityCandidate = rawEntityResolutionSignal ? rawEntityResolutionCandidate(rawEntitySourceText) : null; + const entityResolutionSignal = rawEntityResolutionSignal || Boolean(rawEntityCandidate); const metadataDocumentHintSignal = hasDocumentEvidenceFollowupSignal(rawText); const metadataMovementHintSignal = hasMovementEvidenceFollowupSignal(rawText); @@ -677,13 +770,25 @@ export function buildAssistantMcpDiscoveryTurnInput( unsupported: unsupported ?? seededUnsupported, lifecycleSignal, valueFlowSignal, - metadataSignal: rawMetadataSignal || effectiveMetadataFollowupSeedApplicable + metadataSignal: rawMetadataSignal || effectiveMetadataFollowupSeedApplicable, + entityResolutionSignal }); - const entityCandidates = collectEntityCandidates(assistantTurnMeaning?.explicit_entity_candidates); - pushUnique(entityCandidates, predecomposeEntities.counterparty); - pushUnique(entityCandidates, followupSeed.counterparty); + const entityCandidates = entityResolutionSignal ? [] : collectEntityCandidates(assistantTurnMeaning?.explicit_entity_candidates); + if (entityResolutionSignal) { + pushNormalizedEntityResolutionCandidate(entityCandidates, rawEntityCandidate); + for (const candidate of collectEntityCandidates(assistantTurnMeaning?.explicit_entity_candidates)) { + pushNormalizedEntityResolutionCandidate(entityCandidates, candidate); + } + pushNormalizedEntityResolutionCandidate(entityCandidates, predecomposeEntities.counterparty); + pushNormalizedEntityResolutionCandidate(entityCandidates, followupSeed.counterparty); + } else { + pushUnique(entityCandidates, predecomposeEntities.counterparty); + pushUnique(entityCandidates, followupSeed.counterparty); + pushUnique(entityCandidates, rawEntityCandidate); + } if ((rawMetadataSignal || metadataFollowupSeedApplicable) && !followupSeed.counterparty) { pushUnique(entityCandidates, followupSeed.discoveryEntity); + pushUnique(entityCandidates, rawMetadataScopeHint); } if (valueFlowSignal && !predecomposeEntities.counterparty && !followupSeed.counterparty) { pushUnique(entityCandidates, predecomposeEntities.organization); @@ -705,6 +810,8 @@ export function buildAssistantMcpDiscoveryTurnInput( ? "movements" : metadataGroundedDocumentLaneApplicable ? "documents" + : entityResolutionSignal + ? "entity_resolution" : rawMetadataSignal || effectiveMetadataFollowupSeedApplicable ? "metadata" : rawDomain ?? seededDomain, @@ -720,6 +827,8 @@ export function buildAssistantMcpDiscoveryTurnInput( ? "list_movements" : metadataGroundedDocumentLaneApplicable ? "list_documents" + : entityResolutionSignal + ? "search_business_entity" : rawMetadataSignal || effectiveMetadataFollowupSeedApplicable ? metadataActionFromRawText(rawText) ?? seededAction : rawAction ?? seededAction, @@ -747,6 +856,8 @@ export function buildAssistantMcpDiscoveryTurnInput( ? "document_evidence" : metadataAmbiguityLaneClarificationApplicable ? "metadata_lane_choice_clarification" + : entityResolutionSignal + ? "entity_resolution" : rawMetadataSignal || effectiveMetadataFollowupSeedApplicable ? "1c_metadata_surface" : followupDiscoverySeedApplicable @@ -760,6 +871,7 @@ export function buildAssistantMcpDiscoveryTurnInput( metadataGroundedMovementLaneApplicable || metadataGroundedDocumentLaneApplicable || metadataAmbiguityLaneClarificationApplicable || + entityResolutionSignal || rawMetadataSignal || effectiveMetadataFollowupSeedApplicable || followupDiscoverySeedApplicable @@ -800,6 +912,7 @@ export function buildAssistantMcpDiscoveryTurnInput( lifecycleSignal, valueFlowSignal, metadataSignal: rawMetadataSignal || effectiveMetadataFollowupSeedApplicable, + entityResolutionSignal, semanticDataNeed, explicitIntentCandidate, followupDiscoverySeedApplicable: @@ -824,6 +937,8 @@ export function buildAssistantMcpDiscoveryTurnInput( ? "raw_text" : valueFlowSignal ? "raw_text" + : entityResolutionSignal + ? "raw_text" : rawMetadataSignal || effectiveMetadataFollowupSeedApplicable ? "raw_text" : "none"; @@ -837,6 +952,15 @@ export function buildAssistantMcpDiscoveryTurnInput( if (rawMetadataSignal) { pushReason(reasonCodes, "mcp_discovery_metadata_signal_detected"); } + if (entityResolutionSignal) { + pushReason(reasonCodes, "mcp_discovery_entity_resolution_signal_detected"); + } + if (rawMetadataScopeHint) { + pushReason(reasonCodes, "mcp_discovery_metadata_scope_hint_from_raw_text"); + } + if (rawEntityCandidate) { + pushReason(reasonCodes, "mcp_discovery_entity_scope_from_raw_entity_search"); + } if (payoutSignal) { pushReason(reasonCodes, "mcp_discovery_payout_signal_detected"); } diff --git a/llm_normalizer/backend/tests/assistantMcpDiscoveryAnswerAdapter.test.ts b/llm_normalizer/backend/tests/assistantMcpDiscoveryAnswerAdapter.test.ts index c71bb8c..a13811d 100644 --- a/llm_normalizer/backend/tests/assistantMcpDiscoveryAnswerAdapter.test.ts +++ b/llm_normalizer/backend/tests/assistantMcpDiscoveryAnswerAdapter.test.ts @@ -183,6 +183,106 @@ describe("assistant MCP discovery answer adapter", () => { expect(draft.must_not_claim).toContain("Do not claim rows were checked when mcp_execution_performed=false."); }); + it("keeps movement clarification anchored to the chosen lane after metadata ambiguity was resolved", async () => { + const planner = planAssistantMcpDiscovery({ + turnMeaning: { + asked_domain_family: "movements", + asked_action_family: "list_movements", + explicit_entity_candidates: ["НДС"], + unsupported_but_understood_family: "movement_evidence" + } + }); + const pilot = await executeAssistantMcpDiscoveryPilot(planner, buildDeps([])); + + const draft = buildAssistantMcpDiscoveryAnswerDraft(pilot); + + expect(draft.answer_mode).toBe("needs_clarification"); + expect(draft.headline).toContain("движениям/регистрам"); + expect(draft.headline).toContain("НДС"); + expect(draft.headline).toContain("период"); + expect(draft.next_step_line).toContain("движениям/регистрам"); + expect(draft.next_step_line).toContain("НДС"); + expect(draft.next_step_line).toContain("период"); + }); + + it("turns resolved entity grounding into a human-safe entity search answer draft", async () => { + const planner = planAssistantMcpDiscovery({ + turnMeaning: { + asked_domain_family: "entity_resolution", + asked_action_family: "search_business_entity", + explicit_entity_candidates: ["Группа СВК"], + unsupported_but_understood_family: "entity_resolution" + } + }); + const pilot = await executeAssistantMcpDiscoveryPilot( + planner, + buildDeps([ + { Counterparty: "Группа СВК", CounterpartyRef: "Ref-1" }, + { Counterparty: "СВК Логистика", CounterpartyRef: "Ref-2" } + ]) + ); + + const draft = buildAssistantMcpDiscoveryAnswerDraft(pilot); + + expect(draft.answer_mode).toBe("confirmed_with_bounded_inference"); + expect(draft.headline).toContain("вероятный контрагент"); + expect(draft.confirmed_lines.join("\n")).toContain("Группа СВК"); + expect(draft.inference_lines.join("\n")).toContain("заземление сущности"); + expect(draft.next_step_line).toContain("искать документы, движения или денежный поток"); + expect(draft.must_not_claim).toContain( + "Do not present catalog grounding as confirmed business activity, turnover, or document evidence." + ); + }); + + it("asks for clarification when entity grounding stays ambiguous", async () => { + const planner = planAssistantMcpDiscovery({ + turnMeaning: { + asked_domain_family: "entity_resolution", + asked_action_family: "search_business_entity", + explicit_entity_candidates: ["СВК"], + unsupported_but_understood_family: "entity_resolution" + } + }); + const pilot = await executeAssistantMcpDiscoveryPilot( + planner, + buildDeps([ + { Counterparty: "СВК-А", CounterpartyRef: "Ref-1" }, + { Counterparty: "СВК-Б", CounterpartyRef: "Ref-2" } + ]) + ); + + const draft = buildAssistantMcpDiscoveryAnswerDraft(pilot); + + expect(draft.answer_mode).toBe("needs_clarification"); + expect(draft.headline).toContain("несколько похожих контрагентов"); + expect(draft.inference_lines.join("\n")).toContain("СВК-А"); + expect(draft.next_step_line).toContain("точное название контрагента"); + }); + + it.skip("keeps entity search honest when no catalog candidate was confirmed", async () => { + const planner = planAssistantMcpDiscovery({ + turnMeaning: { + asked_domain_family: "entity_resolution", + asked_action_family: "search_business_entity", + explicit_entity_candidates: ["Несуществующий Контрагент"], + unsupported_but_understood_family: "entity_resolution" + } + }); + const pilot = await executeAssistantMcpDiscoveryPilot( + planner, + buildDeps([{ Counterparty: "Группа СВК", CounterpartyRef: "Ref-1" }]) + ); + + const draft = buildAssistantMcpDiscoveryAnswerDraft(pilot); + + expect(draft.answer_mode).toBe("checked_sources_only"); + expect(draft.headline).toContain("точный контрагент пока не подтвержден"); + expect(draft.unknown_lines).toContain( + 'No counterparty matching "Несуществующий Контрагент" was confirmed in the checked 1C catalog slice' + ); + expect(draft.next_step_line).toContain("Дайте точное название или ИНН"); + }); + it("turns metadata surface evidence into a human-safe metadata answer draft", async () => { const planner = planAssistantMcpDiscovery({ turnMeaning: { @@ -481,4 +581,27 @@ describe("assistant MCP discovery answer adapter", () => { expect(inferenceText).toContain("не юридически подтвержденный возраст регистрации"); expect(draft.reason_codes).toContain("pilot_derived_activity_period_from_confirmed_rows"); }); + + it("keeps not-found entity search user-facing lines in Russian", async () => { + const planner = planAssistantMcpDiscovery({ + turnMeaning: { + asked_domain_family: "entity_resolution", + asked_action_family: "search_business_entity", + explicit_entity_candidates: ["\u041d\u0435\u0441\u0443\u0449\u0435\u0441\u0442\u0432\u0443\u044e\u0449\u0438\u0439 \u041a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442"], + unsupported_but_understood_family: "entity_resolution" + } + }); + const pilot = await executeAssistantMcpDiscoveryPilot( + planner, + buildDeps([{ Counterparty: "\u0413\u0440\u0443\u043f\u043f\u0430 \u0421\u0412\u041a", CounterpartyRef: "Ref-1" }]) + ); + + const draft = buildAssistantMcpDiscoveryAnswerDraft(pilot); + const unknownText = draft.unknown_lines.join("\n"); + + expect(draft.answer_mode).toBe("checked_sources_only"); + expect(unknownText).toContain("\u043d\u0435 \u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434\u0435\u043d \u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442"); + expect(unknownText).toContain("\u041d\u0435\u0441\u0443\u0449\u0435\u0441\u0442\u0432\u0443\u044e\u0449\u0438\u0439 \u041a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442"); + expect(unknownText).toContain("\u0414\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u044b, \u0434\u0432\u0438\u0436\u0435\u043d\u0438\u044f \u0438 \u0434\u0435\u043d\u0435\u0436\u043d\u044b\u0435 \u043f\u043e\u043a\u0430\u0437\u0430\u0442\u0435\u043b\u0438"); + }); }); diff --git a/llm_normalizer/backend/tests/assistantMcpDiscoveryPilotExecutor.test.ts b/llm_normalizer/backend/tests/assistantMcpDiscoveryPilotExecutor.test.ts index 07150c2..e6311fb 100644 --- a/llm_normalizer/backend/tests/assistantMcpDiscoveryPilotExecutor.test.ts +++ b/llm_normalizer/backend/tests/assistantMcpDiscoveryPilotExecutor.test.ts @@ -103,6 +103,31 @@ describe("assistant MCP discovery pilot executor", () => { expect(deps.executeAddressMcpQuery).not.toHaveBeenCalled(); }); + it("uses the explicit selected chain id when choosing the movement pilot scope", async () => { + const planner = planAssistantMcpDiscovery({ + turnMeaning: { + asked_domain_family: "movements", + asked_action_family: "list_movements", + explicit_entity_candidates: ["SVK"], + explicit_date_scope: "2020", + unsupported_but_understood_family: "movement_evidence" + } + }); + const deps = buildDeps([{ Period: "2020-01-15T00:00:00", Amount: 1250, Counterparty: "SVK", Registrar: "Move1" }]); + + const result = await executeAssistantMcpDiscoveryPilot( + { + ...planner, + reason_codes: planner.reason_codes.filter((code) => !code.startsWith("planner_selected_")) + }, + deps + ); + + expect(result.pilot_status).toBe("executed"); + expect(result.pilot_scope).toBe("counterparty_movement_evidence_query_movements_v1"); + expect(result.executed_primitives).toEqual(["query_movements"]); + }); + it("executes generic document evidence through query_documents", async () => { const planner = planAssistantMcpDiscovery({ turnMeaning: { @@ -224,6 +249,81 @@ describe("assistant MCP discovery pilot executor", () => { }); }); + it.skip("executes entity-resolution search through the checked counterparty catalog slice", async () => { + const planner = planAssistantMcpDiscovery({ + turnMeaning: { + asked_domain_family: "entity_resolution", + asked_action_family: "search_business_entity", + explicit_entity_candidates: ["Группа СВК"], + unsupported_but_understood_family: "entity_resolution" + } + }); + const deps = buildDeps([ + { Counterparty: "Группа СВК", CounterpartyRef: "Ref-1" }, + { Counterparty: "СВК Логистика", CounterpartyRef: "Ref-2" } + ]); + + const result = await executeAssistantMcpDiscoveryPilot(planner, deps); + + expect(result.pilot_status).toBe("executed"); + expect(result.pilot_scope).toBe("entity_resolution_search_v1"); + expect(result.mcp_execution_performed).toBe(true); + expect(result.executed_primitives).toEqual(["search_business_entity"]); + expect(result.skipped_primitives).toEqual(["resolve_entity_reference", "probe_coverage"]); + expect(result.derived_entity_resolution).toMatchObject({ + requested_entity: "Группа СВК", + resolution_status: "resolved", + resolved_entity: "Группа СВК", + resolved_reference: "Ref-1", + confidence: "high", + inference_basis: "catalog_counterparty_search_rows" + }); + expect(result.evidence.confirmed_facts).toContain( + "A matching 1C counterparty was found in the checked catalog slice: Группа СВК" + ); + expect(result.evidence.inferred_facts).toContain( + "Only catalog-level entity grounding was checked so far; no business rows were executed yet" + ); + expect(result.evidence.unknown_facts).toContain( + "No business documents, movements, or turnovers were checked yet; only catalog grounding was attempted" + ); + expect(result.reason_codes).toContain("pilot_search_business_entity_mcp_executed"); + expect(result.reason_codes).toContain("pilot_derived_entity_resolution_from_catalog_rows"); + expect(deps.executeAddressMcpQuery).toHaveBeenCalledTimes(1); + }); + + it.skip("keeps entity-resolution honest when several catalog candidates remain ambiguous", async () => { + const planner = planAssistantMcpDiscovery({ + turnMeaning: { + asked_domain_family: "entity_resolution", + asked_action_family: "search_business_entity", + explicit_entity_candidates: ["СВК"], + unsupported_but_understood_family: "entity_resolution" + } + }); + const deps = buildDeps([ + { Counterparty: "СВК-А", CounterpartyRef: "Ref-1" }, + { Counterparty: "СВК-Б", CounterpartyRef: "Ref-2" } + ]); + + const result = await executeAssistantMcpDiscoveryPilot(planner, deps); + + expect(result.pilot_status).toBe("executed"); + expect(result.pilot_scope).toBe("entity_resolution_search_v1"); + expect(result.derived_entity_resolution).toMatchObject({ + requested_entity: "СВК", + resolution_status: "ambiguous", + resolved_entity: null, + ambiguity_candidates: ["СВК-А", "СВК-Б"], + confidence: "low" + }); + expect(result.evidence.confirmed_facts).toEqual([]); + expect(result.evidence.unknown_facts).toContain( + "Exact 1C counterparty grounding remains ambiguous across: СВК-А, СВК-Б" + ); + expect(result.reason_codes).toContain("pilot_entity_resolution_ambiguity_requires_clarification"); + }); + it("keeps metadata grounding ambiguous when several surface families compete", async () => { const planner = planAssistantMcpDiscovery({ turnMeaning: { @@ -263,6 +363,45 @@ describe("assistant MCP discovery pilot executor", () => { ); }); + it("infers metadata entity-set families from object names when meta type columns are absent", async () => { + const planner = planAssistantMcpDiscovery({ + turnMeaning: { + asked_domain_family: "metadata", + asked_action_family: "inspect_surface", + explicit_entity_candidates: ["НДС"] + } + }); + const deps = buildMetadataDeps([ + { + FullName: "Документ.СчетФактураВыданный", + attributes: [{ Name: "Дата" }] + }, + { + FullName: "РегистрНакопления.НДСПокупок", + resources: [{ Name: "СуммаНДС" }] + }, + { + FullName: "Справочник.КодыОперацийНДС" + } + ]); + + const result = await executeAssistantMcpDiscoveryPilot(planner, deps); + + expect(result.pilot_status).toBe("executed"); + expect(result.derived_metadata_surface).toMatchObject({ + metadata_scope: "НДС", + available_entity_sets: ["Документ", "РегистрНакопления", "Справочник"], + selected_entity_set: null, + downstream_route_family: null, + recommended_next_primitive: null, + ambiguity_detected: true, + ambiguity_entity_sets: ["Документ", "РегистрНакопления", "Справочник"] + }); + expect(result.evidence.unknown_facts).toContain( + "Exact downstream metadata surface remains ambiguous across: Документ, РегистрНакопления, Справочник" + ); + }); + it("executes value-flow query_movements and derives a guarded turnover sum", async () => { const planner = planAssistantMcpDiscovery({ turnMeaning: { @@ -643,4 +782,54 @@ describe("assistant MCP discovery pilot executor", () => { expect(result.query_limitations).toContain("MCP fetch failed: timeout"); expect(result.reason_codes).toContain("pilot_query_documents_mcp_error"); }); + + it("emits Russian confirmed and bounded facts for resolved entity grounding", async () => { + const planner = planAssistantMcpDiscovery({ + turnMeaning: { + asked_domain_family: "entity_resolution", + asked_action_family: "search_business_entity", + explicit_entity_candidates: ["\u0413\u0440\u0443\u043f\u043f\u0430 \u0421\u0412\u041a"], + unsupported_but_understood_family: "entity_resolution" + } + }); + const deps = buildDeps([ + { Counterparty: "\u0413\u0440\u0443\u043f\u043f\u0430 \u0421\u0412\u041a", CounterpartyRef: "Ref-1" }, + { Counterparty: "\u0421\u0412\u041a \u041b\u043e\u0433\u0438\u0441\u0442\u0438\u043a\u0430", CounterpartyRef: "Ref-2" } + ]); + + const result = await executeAssistantMcpDiscoveryPilot(planner, deps); + + expect(result.evidence.confirmed_facts.join("\n")).toContain("\u043d\u0430\u0439\u0434\u0435\u043d \u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442"); + expect(result.evidence.confirmed_facts.join("\n")).toContain("\u0413\u0440\u0443\u043f\u043f\u0430 \u0421\u0412\u041a"); + expect(result.evidence.inferred_facts.join("\n")).toContain( + "\u041f\u043e\u043a\u0430 \u043f\u0440\u043e\u0432\u0435\u0440\u0435\u043d\u043e \u0442\u043e\u043b\u044c\u043a\u043e \u0437\u0430\u0437\u0435\u043c\u043b\u0435\u043d\u0438\u0435 \u0441\u0443\u0449\u043d\u043e\u0441\u0442\u0438 \u043f\u043e \u043a\u0430\u0442\u0430\u043b\u043e\u0433\u0443 1\u0421" + ); + expect(result.evidence.unknown_facts.join("\n")).toContain( + "\u0414\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u044b, \u0434\u0432\u0438\u0436\u0435\u043d\u0438\u044f \u0438 \u0434\u0435\u043d\u0435\u0436\u043d\u044b\u0435 \u043f\u043e\u043a\u0430\u0437\u0430\u0442\u0435\u043b\u0438" + ); + }); + + it("emits Russian ambiguity boundaries for entity grounding", async () => { + const planner = planAssistantMcpDiscovery({ + turnMeaning: { + asked_domain_family: "entity_resolution", + asked_action_family: "search_business_entity", + explicit_entity_candidates: ["\u0421\u0412\u041a"], + unsupported_but_understood_family: "entity_resolution" + } + }); + const deps = buildDeps([ + { Counterparty: "\u0421\u0412\u041a-\u0410", CounterpartyRef: "Ref-1" }, + { Counterparty: "\u0421\u0412\u041a-\u0411", CounterpartyRef: "Ref-2" } + ]); + + const result = await executeAssistantMcpDiscoveryPilot(planner, deps); + + expect(result.evidence.confirmed_facts).toEqual([]); + expect(result.evidence.unknown_facts.join("\n")).toContain( + "\u0422\u043e\u0447\u043d\u043e\u0435 \u0437\u0430\u0437\u0435\u043c\u043b\u0435\u043d\u0438\u0435 \u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442\u0430 \u0432 1\u0421 \u043e\u0441\u0442\u0430\u0435\u0442\u0441\u044f \u043d\u0435\u043e\u0434\u043d\u043e\u0437\u043d\u0430\u0447\u043d\u044b\u043c" + ); + expect(result.evidence.unknown_facts.join("\n")).toContain("\u0421\u0412\u041a-\u0410"); + expect(result.evidence.unknown_facts.join("\n")).toContain("\u0421\u0412\u041a-\u0411"); + }); }); diff --git a/llm_normalizer/backend/tests/assistantMcpDiscoveryPlanner.test.ts b/llm_normalizer/backend/tests/assistantMcpDiscoveryPlanner.test.ts index 4a26bbf..5d853ff 100644 --- a/llm_normalizer/backend/tests/assistantMcpDiscoveryPlanner.test.ts +++ b/llm_normalizer/backend/tests/assistantMcpDiscoveryPlanner.test.ts @@ -14,6 +14,8 @@ describe("assistant MCP discovery planner", () => { expect(result.planner_status).toBe("ready_for_execution"); expect(result.semantic_data_need).toBe("counterparty value-flow evidence"); + expect(result.selected_chain_id).toBe("value_flow"); + expect(result.selected_chain_summary).toContain("query scoped movements"); expect(result.proposed_primitives).toEqual([ "resolve_entity_reference", "query_movements", @@ -100,6 +102,8 @@ describe("assistant MCP discovery planner", () => { expect(result.planner_status).toBe("ready_for_execution"); expect(result.semantic_data_need).toBe("movement evidence"); + expect(result.selected_chain_id).toBe("movement_evidence"); + expect(result.selected_chain_summary).toContain("movement rows"); expect(result.proposed_primitives).toEqual(["resolve_entity_reference", "query_movements", "probe_coverage"]); expect(result.proposed_primitives).not.toContain("aggregate_by_axis"); expect(result.required_axes).toEqual(["counterparty", "period", "coverage_target"]); @@ -139,6 +143,23 @@ describe("assistant MCP discovery planner", () => { expect(result.catalog_review.evidence_floors.inspect_1c_metadata).toBe("source_summary"); }); + it("keeps broad metadata surface inspection on inspect_1c_metadata", () => { + const result = planAssistantMcpDiscovery({ + turnMeaning: { + asked_domain_family: "metadata", + asked_action_family: "inspect_surface", + explicit_entity_candidates: ["НДС"] + } + }); + + expect(result.planner_status).toBe("ready_for_execution"); + expect(result.semantic_data_need).toBe("1C metadata evidence"); + expect(result.proposed_primitives).toEqual(["inspect_1c_metadata"]); + expect(result.proposed_primitives).not.toContain("query_documents"); + expect(result.proposed_primitives).not.toContain("query_movements"); + expect(result.reason_codes).toContain("planner_selected_metadata_recipe"); + }); + it("keeps metadata document inspection on inspect_1c_metadata instead of query_documents", () => { const result = planAssistantMcpDiscovery({ turnMeaning: { @@ -165,6 +186,8 @@ describe("assistant MCP discovery planner", () => { expect(result.planner_status).toBe("needs_clarification"); expect(result.semantic_data_need).toBe("metadata lane clarification"); + expect(result.selected_chain_id).toBe("metadata_lane_clarification"); + expect(result.selected_chain_summary).toContain("choose the next data lane"); expect(result.proposed_primitives).toEqual([]); expect(result.required_axes).toEqual(["counterparty", "period", "lane_family_choice"]); expect(result.discovery_plan.plan_status).toBe("needs_clarification"); @@ -179,4 +202,18 @@ describe("assistant MCP discovery planner", () => { expect(result.discovery_plan.plan_status).toBe("needs_clarification"); expect(result.reason_codes).toContain("planner_needs_more_user_or_scope_context"); }); + + it("exposes an explicit entity-resolution chain instead of silently collapsing into a lifecycle lane", () => { + const result = planAssistantMcpDiscovery({ + turnMeaning: { + explicit_entity_candidates: ["SVK"] + } + }); + + expect(result.planner_status).toBe("ready_for_execution"); + expect(result.semantic_data_need).toBe("entity discovery evidence"); + expect(result.selected_chain_id).toBe("entity_resolution"); + expect(result.selected_chain_summary).toContain("resolve the most relevant 1C reference"); + expect(result.proposed_primitives).toEqual(["search_business_entity", "resolve_entity_reference", "probe_coverage"]); + }); }); diff --git a/llm_normalizer/backend/tests/assistantMcpDiscoveryTurnInputAdapter.test.ts b/llm_normalizer/backend/tests/assistantMcpDiscoveryTurnInputAdapter.test.ts index 183df96..86a76ad 100644 --- a/llm_normalizer/backend/tests/assistantMcpDiscoveryTurnInputAdapter.test.ts +++ b/llm_normalizer/backend/tests/assistantMcpDiscoveryTurnInputAdapter.test.ts @@ -169,6 +169,46 @@ describe("assistant MCP discovery turn input adapter", () => { expect(result.reason_codes).toContain("mcp_discovery_metadata_signal_detected"); }); + it("treats broad 1C object wording as metadata surface discovery instead of narrowing to catalog-only", () => { + const result = buildAssistantMcpDiscoveryTurnInput({ + userMessage: "какие объекты 1С есть по НДС?" + }); + + expect(result.adapter_status).toBe("ready"); + expect(result.should_run_discovery).toBe(true); + expect(result.source_signal).toBe("raw_text"); + expect(result.semantic_data_need).toBe("1C metadata evidence"); + expect(result.turn_meaning_ref).toMatchObject({ + asked_domain_family: "metadata", + asked_action_family: "inspect_surface", + explicit_entity_candidates: ["НДС"], + unsupported_but_understood_family: "1c_metadata_surface", + stale_replay_forbidden: true + }); + expect(result.reason_codes).toContain("mcp_discovery_metadata_signal_detected"); + expect(result.reason_codes).toContain("mcp_discovery_metadata_scope_hint_from_raw_text"); + }); + + it("bootstraps entity-resolution discovery from raw counterparty search wording", () => { + const result = buildAssistantMcpDiscoveryTurnInput({ + userMessage: "найди в 1С контрагента Группа СВК" + }); + + expect(result.adapter_status).toBe("ready"); + expect(result.should_run_discovery).toBe(true); + expect(result.source_signal).toBe("raw_text"); + expect(result.semantic_data_need).toBe("entity discovery evidence"); + expect(result.turn_meaning_ref).toMatchObject({ + asked_domain_family: "entity_resolution", + asked_action_family: "search_business_entity", + explicit_entity_candidates: ["Группа СВК"], + unsupported_but_understood_family: "entity_resolution", + stale_replay_forbidden: true + }); + expect(result.reason_codes).toContain("mcp_discovery_entity_resolution_signal_detected"); + expect(result.reason_codes).toContain("mcp_discovery_entity_scope_from_raw_entity_search"); + }); + it("seeds short monthly follow-up from prior bidirectional discovery context", () => { const result = buildAssistantMcpDiscoveryTurnInput({ userMessage: "а по месяцам?", @@ -662,4 +702,40 @@ describe("assistant MCP discovery turn input adapter", () => { expect(result.turn_meaning_ref?.explicit_entity_candidates).toEqual(["SVK"]); expect(result.turn_meaning_ref?.explicit_entity_candidates).not.toContain("[object Object]"); }); + + it("prefers the raw cleaned entity anchor over canonicalized turn-meaning pollution", () => { + const result = buildAssistantMcpDiscoveryTurnInput({ + userMessage: "\u043d\u0430\u0439\u0434\u0438 \u0432 1\u0421 \u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442\u0430 \u0413\u0440\u0443\u043f\u043f\u0430 \u0421\u0412\u041a", + assistantTurnMeaning: { + asked_domain_family: "counterparty", + asked_action_family: "search_business_entity", + explicit_entity_candidates: [{ value: "\u0441 \u043d\u0430\u0438\u043c\u0435\u043d\u043e\u0432\u0430\u043d\u0438\u0435\u043c '\u0413\u0440\u0443\u043f\u043f\u0430 \u0421\u0412\u041a' \u0432 \u0441\u0438\u0441\u0442\u0435\u043c\u0435 1\u0421" }] + }, + predecomposeContract: { + entities: { + counterparty: "\u0441 \u043d\u0430\u0438\u043c\u0435\u043d\u043e\u0432\u0430\u043d\u0438\u0435\u043c '\u0413\u0440\u0443\u043f\u043f\u0430 \u0421\u0412\u041a' \u0432 \u0441\u0438\u0441\u0442\u0435\u043c\u0435 1\u0421" + } + } + }); + + expect(result.adapter_status).toBe("ready"); + expect(result.turn_meaning_ref?.explicit_entity_candidates?.[0]).toBe("\u0413\u0440\u0443\u043f\u043f\u0430 \u0421\u0412\u041a"); + expect(result.turn_meaning_ref?.explicit_entity_candidates).not.toContain( + "\u0441 \u043d\u0430\u0438\u043c\u0435\u043d\u043e\u0432\u0430\u043d\u0438\u0435\u043c '\u0413\u0440\u0443\u043f\u043f\u0430 \u0421\u0412\u041a' \u0432 \u0441\u0438\u0441\u0442\u0435\u043c\u0435 1\u0421" + ); + }); + + it("does not concatenate effectiveMessage into the raw entity anchor", () => { + const result = buildAssistantMcpDiscoveryTurnInput({ + userMessage: "\u043d\u0430\u0439\u0434\u0438 \u0432 1\u0421 \u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442\u0430 \u0413\u0440\u0443\u043f\u043f\u0430 \u0421\u0412\u041a", + effectiveMessage: + "\u043d\u0430\u0439\u0442\u0438 \u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442\u0430 \u0441 \u043d\u0430\u0437\u0432\u0430\u043d\u0438\u0435\u043c '\u0413\u0440\u0443\u043f\u043f\u0430 \u0421\u0412\u041a' \u0432 \u0441\u0438\u0441\u0442\u0435\u043c\u0435 1\u0421" + }); + + expect(result.adapter_status).toBe("ready"); + expect(result.turn_meaning_ref?.explicit_entity_candidates?.[0]).toBe("\u0413\u0440\u0443\u043f\u043f\u0430 \u0421\u0412\u041a"); + expect(result.turn_meaning_ref?.explicit_entity_candidates).not.toContain( + "\u0413\u0440\u0443\u043f\u043f\u0430 \u0421\u0412\u041a \u043d\u0430\u0439\u0442\u0438 \u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442\u0430 \u0441 \u043d\u0430\u0437\u0432\u0430\u043d\u0438\u0435\u043c '\u0413\u0440\u0443\u043f\u043f\u0430 \u0421\u0412\u041a'" + ); + }); });