From d797b3fa994f61879c9c90aa1bfd8a8d787b3299 Mon Sep 17 00:00:00 2001 From: dctouch Date: Tue, 5 May 2026 15:43:47 +0300 Subject: [PATCH] =?UTF-8?q?Open-World:=20=D1=83=D0=B4=D0=B5=D1=80=D0=B6?= =?UTF-8?q?=D0=B0=D1=82=D1=8C=20=D0=B1=D0=B8=D0=B7=D0=BD=D0=B5=D1=81-?= =?UTF-8?q?=D0=BE=D0=B1=D0=B7=D0=BE=D1=80=20=D0=BE=D1=82=20=D0=BC=D1=83?= =?UTF-8?q?=D1=81=D0=BE=D1=80=D0=BD=D1=8B=D1=85=20anchors?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../06 - phase_acceptance_matrix.md | 2 +- .../21 - current_status_canon_2026-05-01.md | 6 +- ...rld_bounded_autonomy_breadth_2026-05-01.md | 24 +++- ...ne_and_semantic_control_gate_2026-05-05.md | 20 ++- .../11 - architecture_turnaround/README.md | 2 +- .../addressCounterpartyIntentSignals.js | 12 +- .../dist/services/addressIntentResolver.js | 36 ++++++ .../assistantMcpDiscoveryTurnInputAdapter.js | 69 +++++++++- .../addressCounterpartyIntentSignals.ts | 12 +- .../src/services/addressIntentResolver.ts | 36 ++++++ .../assistantMcpDiscoveryTurnInputAdapter.ts | 90 ++++++++++++- .../addressIntentResolverRegression.test.ts | 15 ++- ...istantMcpDiscoveryTurnInputAdapter.test.ts | 119 ++++++++++++++++++ 13 files changed, 424 insertions(+), 19 deletions(-) diff --git a/docs/ARCH/11 - architecture_turnaround/06 - phase_acceptance_matrix.md b/docs/ARCH/11 - architecture_turnaround/06 - phase_acceptance_matrix.md index e34e118..3009236 100644 --- a/docs/ARCH/11 - architecture_turnaround/06 - phase_acceptance_matrix.md +++ b/docs/ARCH/11 - architecture_turnaround/06 - phase_acceptance_matrix.md @@ -27,7 +27,7 @@ Current reporting baseline: - Post-F Semantic Integrity Hardening: `99%`, operationally closed/regression gate. - Planner Autonomy Consolidation: `100%` for the declared phase83 slice. - Open-World Business Overview implementation breadth: `~99%` through Slice 25. -- Active next pressure: `Open-World Semantic Control Gate`, accepted module progress `~93%` after manual replay `assistant-stage1-EHMOy3lNFt`. +- Active next pressure: `Open-World Semantic Control Gate`, accepted module progress `~94%` after the first local control-gate cut. ## Archived Execution Snapshot (2026-04-17) diff --git a/docs/ARCH/11 - architecture_turnaround/21 - current_status_canon_2026-05-01.md b/docs/ARCH/11 - architecture_turnaround/21 - current_status_canon_2026-05-01.md index 34cc867..9398738 100644 --- a/docs/ARCH/11 - architecture_turnaround/21 - current_status_canon_2026-05-01.md +++ b/docs/ARCH/11 - architecture_turnaround/21 - current_status_canon_2026-05-01.md @@ -17,7 +17,7 @@ It did not reopen Post-F and it did not prove that the Open-World implementation From this point forward: - `~99%` for Open-World means implementation breadth through `Business Overview Missing Proof Ledger`; -- accepted module progress is `~93%` until the new `Open-World Semantic Control Gate` is fixed and rerun; +- accepted module progress is `~94%` after the first local Semantic Control Gate cut, and remains below closure until the EHMO-derived subset is rerun; - the active work is control-gate hardening, not immediate expansion into more proof families. For the current execution spine, read `23 - current_execution_spine_and_semantic_control_gate_2026-05-05.md`. @@ -55,7 +55,7 @@ For the current execution spine, read `23 - current_execution_spine_and_semantic - Completed active slice: `Business Overview Missing Proof Ledger`: business overview now records machine-readable hard proof gaps for accounting profit/margin, due-date debt aging, inventory reserve/liquidation quality, and vendor/procurement quality, distinguishing proxy-only evidence from reviewed routes that are not wired yet. - Implementation breadth: `~99% (Open-World Bounded Autonomy Breadth through Slice 25)`. - Next active slice: `Open-World Semantic Control Gate`, covering garbage-anchor protection, business-overview continuation, intent dominance, frame hygiene, counterparty/organization arbitration, and final-summary answer shape. -- Active module progress: `~93% (Open-World Bounded Autonomy Breadth, active slice: Semantic Control Gate)`. +- Active module progress: `~94% (Open-World Bounded Autonomy Breadth, active slice: Semantic Control Gate)`. ## Reporting Rule @@ -63,7 +63,7 @@ Use these labels when reporting progress: - `Прогресс модуля: 99% (Post-F Semantic Integrity Hardening, operationally closed/regression gate)` when discussing the Post-F slice itself. - `Прогресс модуля: 100% (Planner Autonomy Consolidation, declared phase83 slice closed)` when discussing the planner-autonomy slice that was just completed. -- `Прогресс модуля: 93% (Open-World Bounded Autonomy Breadth, active slice: Semantic Control Gate)` while discussing current module closure after the EHMO manual replay. +- `Прогресс модуля: 94% (Open-World Bounded Autonomy Breadth, active slice: Semantic Control Gate)` while discussing current module closure after the first local Semantic Control Gate cut. - `Open-World Business Overview implementation breadth: ~99%, semantic acceptance gate still open` when discussing only the already wired Slice 25 breadth. - `Прогресс модуля: X% (Open-World Bounded Autonomy Breadth, active slice: )` for later breadth work after the Semantic Control Gate is accepted. diff --git a/docs/ARCH/11 - architecture_turnaround/22 - open_world_bounded_autonomy_breadth_2026-05-01.md b/docs/ARCH/11 - architecture_turnaround/22 - open_world_bounded_autonomy_breadth_2026-05-01.md index f8dc277..819105a 100644 --- a/docs/ARCH/11 - architecture_turnaround/22 - open_world_bounded_autonomy_breadth_2026-05-01.md +++ b/docs/ARCH/11 - architecture_turnaround/22 - open_world_bounded_autonomy_breadth_2026-05-01.md @@ -779,5 +779,27 @@ Suggested first subset: Current status: - implementation breadth through Slice 25: `~99%`; -- accepted Open-World module progress after EHMO: `~93%`; +- accepted Open-World module progress after the first local Semantic Control Gate cut: `~94%`; - exact P&L, real due-date debt aging, reserve/write-off/liquidation evidence, and vendor-risk engines stay queued behind this semantic gate. + +### Slice 26 local cut 1 - anchor hygiene and overview continuation + +Implemented now: + +- pseudo-entity candidates such as "by these data", "main supplier in", "what is confirmed", and generic proxy/audit tails are rejected before they can become scoped counterparty subjects; +- business-overview follow-ups after `business_overview_route_template_v1` keep the organization/date frame and stay in the bounded analyst overview lane; +- VAT-position wording inside a business-overview continuation no longer lets the generic word "position" drag the turn into inventory selected-object logic; +- resolver-level "by these data" wording is no longer accepted as a loose counterparty anchor for document requests; +- the older top-year company revenue resolver regression expectation was realigned with the current architecture: organization-level best-year wording belongs to business overview discovery, not the exact customer-value route. + +Validation: + +- `npm.cmd test -- assistantMcpDiscoveryTurnInputAdapter.test.ts addressIntentResolverRegression.test.ts`: passed `94` with `6` skipped. +- `npm.cmd test -- addressCounterpartyIntentSignals.test.ts addressQueryRuntimeM23.test.ts assistantMcpDiscoveryTurnInputAdapter.test.ts addressIntentResolverRegression.test.ts`: passed `515` with `6` skipped. +- `npm.cmd run build`: passed. + +Remaining before acceptance: + +- rerun the EHMO-derived semantic subset; +- continue W3/W4/W5/W6 for intent dominance, frame reset, counterparty/organization arbitration, and final-summary answer lane; +- only then rerun the fat manual GUI pack for acceptance. diff --git a/docs/ARCH/11 - architecture_turnaround/23 - current_execution_spine_and_semantic_control_gate_2026-05-05.md b/docs/ARCH/11 - architecture_turnaround/23 - current_execution_spine_and_semantic_control_gate_2026-05-05.md index 9acb548..7425859 100644 --- a/docs/ARCH/11 - architecture_turnaround/23 - current_execution_spine_and_semantic_control_gate_2026-05-05.md +++ b/docs/ARCH/11 - architecture_turnaround/23 - current_execution_spine_and_semantic_control_gate_2026-05-05.md @@ -66,12 +66,22 @@ This gate is not a request to tune the assistant for every weird question in tha Current status should be reported as: - implementation breadth: `~99%` for Open-World Business Overview through Slice 25; -- accepted module progress: `~93% (Open-World Bounded Autonomy Breadth, active slice: Semantic Control Gate)`. +- accepted module progress: `~94% (Open-World Bounded Autonomy Breadth, active slice: Semantic Control Gate)`. -This is not a regression from `99%` to `93%`. It is a metric split: +This is not a regression from `99%` to `94%`. It is a metric split: - `99%` describes wired breadth; -- `93%` describes closure confidence after a broad human semantic replay found control-gate defects. +- `94%` describes closure confidence after the first local control-gate cut; the gate is still not accepted without EHMO-subset replay. + +## Current Local Cut + +Local cut 1 is implemented: + +- W1 starter: garbage anchors and pseudo-entity candidates are filtered before they can become counterparty subjects. +- W2 starter: business-overview continuation wording keeps the bounded overview lane after a previous overview run. +- W3 edge covered locally: VAT-position wording inside a business-overview continuation does not fall into inventory selected-object routing. + +This is local evidence only. It improves the gate but does not close it until the EHMO-derived subset and the fat manual pack are semantically reviewed again. ## Failure Classes To Fix @@ -110,7 +120,7 @@ Each work unit should add focused local tests and then be validated against the ## Acceptance Gate -The current module can move from `~93%` toward closure only after: +The current module can move from `~94%` toward closure only after: - the EHMO-derived critical subset is rerun and semantically reviewed; - old canaries remain green: Post-F, phase83, inventory selected-object, VAT continuity, SVK document/movement chains; @@ -148,7 +158,7 @@ Graphify snapshot at this status cut: Until the semantic control gate is accepted, use: -`Прогресс модуля: 93% (Open-World Bounded Autonomy Breadth, active slice: Semantic Control Gate)` +`Прогресс модуля: 94% (Open-World Bounded Autonomy Breadth, active slice: Semantic Control Gate)` If discussing only the already wired business-overview breadth, say: diff --git a/docs/ARCH/11 - architecture_turnaround/README.md b/docs/ARCH/11 - architecture_turnaround/README.md index 323c166..f36ee5f 100644 --- a/docs/ARCH/11 - architecture_turnaround/README.md +++ b/docs/ARCH/11 - architecture_turnaround/README.md @@ -144,7 +144,7 @@ Current honest status: - bounded-autonomy foundation readiness: `~89%` - open-world bounded-autonomy readiness: `~87%` - active Open-World Bounded Autonomy Breadth implementation breadth: `~99%`, with business-overview evidence fusion, the reviewed `business_overview` catalog/data-need/planner route-fabric slice, the fresh multi-probe runtime bridge, the explicit-period VAT/tax fact-family bridge, the explicit-period debt-position bridge, the explicit-date inventory-position bridge, the open-settlement quality bridge accepted by live semantic replay, selected-item profitability bridged by local semantic/runtime regression tests, contract-date debt age bridged locally, debt staleness-risk proxy bridged locally, debt due-date boundary arbitration bridged locally, inventory reserve/liquidation boundary arbitration bridged locally, supplier/procurement-quality boundary arbitration bridged locally, supplier concentration proxy bridged locally, document/account-section activity profile bridged locally, counterparty population/roles and contract usage profiles bridged locally, yearly operating-flow proxy bridged locally, earnings/best-year wording arbitration bridged locally, profit/margin wording boundary arbitration bridged locally, analyst synthesis added to business-overview answer drafting, company-period trading margin proxy bridged locally, inventory sales-to-stock proxy bridged locally, inventory staleness-risk proxy bridged locally, gap-specific answer shaping bridged locally, and missing proof families recorded as runtime evidence ledger; exact accounting profit/margin, true due-date debt aging/overdue, confirmed vendor-risk/procurement-quality analysis, and confirmed reserve/write-off/liquidation inventory evidence are still pending -- active Open-World Bounded Autonomy Breadth accepted-module progress: `~93%`, because the EHMO manual GUI replay opened the `Open-World Semantic Control Gate` before closure +- active Open-World Bounded Autonomy Breadth accepted-module progress: `~94%`, because the first local `Open-World Semantic Control Gate` cut is implemented, but EHMO-subset replay is still pending - Post-F semantic integrity module progress: `~99%` operationally closed, with remaining risk now treated as next-slice discovery rather than an open blocker inside the closed slice - active inventory-stock breadth slice progress: `100%` for the declared scenario pack, not for arbitrary inventory questions - Planner Autonomy Consolidation progress: `100%` for the declared module, with catalog-fabric, value-flow arbitration, lifecycle bounded inference, broad-evaluation bridge, inventory catalog templates, inventory runtime-boundary honesty, exact inventory recipe bridging, unambiguous metadata-surface lane inference, catalog chain-template scoring, structured chain-match contract exposure, runtime/debug propagation, subject-aware bidirectional comparison arbitration, structured catalog-alignment verdicts, representative alignment regression guard, catalog-alignment reason-code telemetry, explicit `alignment_status` propagation, truth-harness/acceptance-matrix surfacing, soft divergence warning, `catalog_alignment_ok` acceptance invariant, step-level expected catalog-alignment assertions, phase66 and phase32 spec alignment expectations, AGENT source-catalog surfacing, generated phase83 mixed planner-brain replay spec, checked-source user-facing error sanitation, surface-grounded catalog promotion, and guarded live phase83 acceptance validated. Broader unfamiliar 1C asks are now next-module breadth work rather than an open blocker inside this declared slice diff --git a/llm_normalizer/backend/dist/services/addressCounterpartyIntentSignals.js b/llm_normalizer/backend/dist/services/addressCounterpartyIntentSignals.js index aa2d790..ac93d9c 100644 --- a/llm_normalizer/backend/dist/services/addressCounterpartyIntentSignals.js +++ b/llm_normalizer/backend/dist/services/addressCounterpartyIntentSignals.js @@ -56,7 +56,17 @@ function hasUnicodeLikelyCounterpartyAfterBy(text) { "\u0431\u0430\u043d\u043a\u0435", "\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u0430\u043c", "\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u044b", - "\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u0443" + "\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u0443", + "\u0434\u0430\u043d\u043d\u044b\u043c", + "\u044d\u0442\u0438\u043c", + "\u044d\u0442\u0438\u043c\u0438", + "\u0438\u0442\u043e\u0433\u0443", + "\u0438\u0442\u043e\u0433\u0430\u043c", + "\u0432\u0441\u0435\u043c\u0443", + "\u0432\u0441\u0435\u0439", + "\u0432\u0441\u0435\u043c", + "\u0432\u044b\u0432\u043e\u0434\u0443", + "\u0432\u044b\u0432\u043e\u0434\u0430\u043c" ]); return !stopWords.has(token); } diff --git a/llm_normalizer/backend/dist/services/addressIntentResolver.js b/llm_normalizer/backend/dist/services/addressIntentResolver.js index 49e9bd7..faf1db1 100644 --- a/llm_normalizer/backend/dist/services/addressIntentResolver.js +++ b/llm_normalizer/backend/dist/services/addressIntentResolver.js @@ -1152,6 +1152,30 @@ function isLikelyCounterpartyToken(rawToken) { "этомуже", "томуже" ]); + for (const semanticStopWord of [ + "данным", + "этим", + "этими", + "итогу", + "итогам", + "всему", + "всей", + "всем", + "выводу", + "выводам", + "аудиту", + "прокси", + "покажи", + "показать", + "выведи", + "вывести", + "дай", + "дать", + "какие", + "список" + ]) { + stopWords.add(semanticStopWord); + } return !stopWords.has(token); } function hasPartyAnchorMention(text) { @@ -1658,6 +1682,18 @@ function resolveUnicodeAddressIntentBridge(text) { "сальдо", "дате", "периоду", + "данным", + "этим", + "этими", + "итогу", + "итогам", + "всему", + "всей", + "всем", + "выводу", + "выводам", + "аудиту", + "прокси", "складу", "товару", "этому", diff --git a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryTurnInputAdapter.js b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryTurnInputAdapter.js index 49124b0..bbb9873 100644 --- a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryTurnInputAdapter.js +++ b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryTurnInputAdapter.js @@ -95,11 +95,48 @@ function isActionVerbEntityCandidate(value) { } return new Set(["провести", "показать", "посмотреть", "смотреть", "найти", "искать", "анализ"]).has((0, addressTextRepair_1.normalizeRussianComparableText)(value)); } +function isGarbageSemanticAnchorCandidate(value) { + if (!value) { + return false; + } + const text = compactLower((0, addressTextRepair_1.repairAddressMojibakeText)(value)); + if (!text || text.length < 2) { + return true; + } + const compact = (0, addressTextRepair_1.normalizeRussianComparableText)(text); + if (new Set([ + "данным", + "этим", + "этими", + "итогу", + "итогам", + "всему", + "всей", + "всем", + "выводу", + "выводам", + "аудиту", + "прокси", + "proxy", + "summary", + "overall" + ]).has(compact)) { + return true; + } + if (/^(?:по\s+)?(?:этим|этими)\s+данн\p{L}*$/iu.test(text) || + /^(?:и\s+)?кто\s+(?:главн\p{L}*|основн\p{L}*|крупн\p{L}*)\s+(?:клиент|покупател|поставщик|контрагент)(?:\s+в)?$/iu.test(text) || + /^(?:что|чего)\s+(?:подтвержден\p{L}*|не\s+хватает)/iu.test(text) || + /^(?:можно\s+ли|если\s+нет|дай\s+proxy|дай\s+прокси)/iu.test(text)) { + return true; + } + return false; +} function isInvalidEntityCandidate(value) { return Boolean(value && (isReferentialEntityPlaceholder(value) || isValueFlowPredicateEntityCandidate(value) || - isActionVerbEntityCandidate(value))); + isActionVerbEntityCandidate(value) || + isGarbageSemanticAnchorCandidate(value))); } function normalizeFollowupCounterpartyCandidate(value) { const text = candidateValue(value); @@ -288,6 +325,13 @@ function mapPilotScopeToFollowupMeaning(pilotScope) { unsupported: "1c_metadata_surface" }; } + if (pilotScope === "business_overview_route_template_v1") { + return { + domain: "business_overview", + action: "broad_evaluation", + unsupported: "broad_business_evaluation" + }; + } return { domain: null, action: null, @@ -568,6 +612,23 @@ function hasBusinessOverviewSignal(text) { } return /(?:\u0431\u0438\u0437\u043d\u0435\u0441[-\s]?\u043e\u0431\u0437\u043e\u0440|\u0431\u0438\u0437\u043d\u0435\u0441[-\s]?\u0430\u0443\u0434\u0438\u0442|\u043f\u043e\u043b\u043d\w*\s+\u0430\u043d\u0430\u043b\u0438\u0437\s+(?:\u043a\u043e\u043c\u043f\u0430\u043d|\u0431\u0438\u0437\u043d\u0435\u0441|\u0434\u0435\u044f\u0442\u0435\u043b)|\u0441\u0432\u043e\u0434\u043d\w*\s+\u0430\u043d\u0430\u043b\u0438\u0437\s+(?:\u043a\u043e\u043c\u043f\u0430\u043d|\u0431\u0438\u0437\u043d\u0435\u0441|\u0434\u0435\u044f\u0442\u0435\u043b)|\u043a\u0430\u043a\s+\u0442\u044b\s+\u043e\u0446\u0435\u043d(?:\u0438\u0448\u044c|\u0438)\s+\u0434\u0435\u044f\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442|\u043a\u043e\u043c\u043f\u0430\u043d(?:\u0438\u0438|\u0438\u044e|\u0438\u044f)\s+\u0432\s+\u0446\u0435\u043b\u043e\u043c|company\s+(?:analysis|overview)|business\s+(?:overview|audit)|llm[-\s]?audit|бизнес[-\s]?РѕР±Р·РѕСЂ|бизнес[-\s]?аудит)/iu.test(text); } +function hasBusinessOverviewContinuationSignal(text) { + if (!text) { + return false; + } + const normalized = compactLower(text); + const hasEvidenceContinuationCue = /(?:по\s+этим\s+данн|на\s+базе\s+этих\s+данн|из\s+этого|по\s+итогам|что\s+подтвержден|чего\s+не\s+хватает|что\s+пока\s+нельзя|что\s+можно\s+сказать|какой\s+вывод|итогов|резюм|вывод)/iu.test(normalized); + const hasAnalystContinuationCue = /(?:можно\s+ли|если\s+нет|proxy|прокси|аудит|оцен|что\s+думаешь|нормальн\p{L}*\s+прибыл|прибыл|марж|рентаб|ликвидн|просроч|качество\s+долг|риск|налогов\p{L}*\s+вывод)/iu.test(normalized); + const hasTaxContinuationCue = /(?:ндс|vat)[\s\S]{0,120}(?:позици|период|основан|не\s+хватает|налогов\p{L}*\s+вывод)|(?:позици|налогов\p{L}*\s+вывод)[\s\S]{0,80}(?:ндс|vat)/iu.test(normalized); + return hasEvidenceContinuationCue || hasAnalystContinuationCue || hasTaxContinuationCue; +} +function hasBusinessOverviewFollowupSeed(followupSeed) { + return (followupSeed.pilotScope === "business_overview_route_template_v1" || + followupSeed.domain === "business_overview" || + followupSeed.action === "broad_evaluation" || + followupSeed.unsupported === "broad_business_evaluation" || + followupSeed.loopSelectedChainId === "business_overview"); +} function hasValueFlowSignal(text) { return /(?:оборот|выручк|оплат|плат[её]ж|заплат|перечисл|списан|расход|исходящ|входящ|получ(?:ил|ено|ен)|поступил|поступлен|денежн[а-яёa-z0-9_-]*\s+поток|(?): boolean { + return ( + followupSeed.pilotScope === "business_overview_route_template_v1" || + followupSeed.domain === "business_overview" || + followupSeed.action === "broad_evaluation" || + followupSeed.unsupported === "broad_business_evaluation" || + followupSeed.loopSelectedChainId === "business_overview" + ); +} + function hasValueFlowSignal(text: string): boolean { return /(?:оборот|выручк|оплат|плат[её]ж|заплат|перечисл|списан|расход|исходящ|входящ|получ(?:ил|ено|ен)|поступил|поступлен|денежн[а-яёa-z0-9_-]*\s+поток|(? { expect(result.intent).toBe("customer_revenue_and_payments"); }); - it("detects top-year company revenue wording", () => { + it("defers top-year company revenue wording to business overview discovery", () => { const result = resolveAddressIntent("какой у нас самый доходный год"); - expect(result.intent).toBe("customer_revenue_and_payments"); + expect(result.intent).toBe("unknown"); + expect(result.reasons).toContain("unicode_business_overview_earnings_deferred_to_discovery"); }); it("detects specific counterparty turnover wording as revenue profile", () => { @@ -52,6 +53,16 @@ describe("addressIntentResolver regression bridges", () => { expect(result.intent).toBe("list_documents_by_counterparty"); }); + it("does not treat by-these-data wording as a loose counterparty anchor", () => { + const result = resolveAddressIntent( + "\u043f\u043e\u043a\u0430\u0436\u0438 \u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u044b \u043f\u043e \u044d\u0442\u0438\u043c \u0434\u0430\u043d\u043d\u044b\u043c" + ); + + expect(result.intent).not.toBe("list_documents_by_counterparty"); + expect(result.reasons).not.toContain("generic_lookup_with_loose_anchor_fallback"); + expect(result.reasons).not.toContain("unicode_documents_by_counterparty_bridge_signal_detected"); + }); + it("does not collapse very old stock request into generic inventory snapshot", () => { const result = resolveAddressIntent("Есть ли остатки товара, которые закупались очень давно"); diff --git a/llm_normalizer/backend/tests/assistantMcpDiscoveryTurnInputAdapter.test.ts b/llm_normalizer/backend/tests/assistantMcpDiscoveryTurnInputAdapter.test.ts index 42bde19..2dba6fc 100644 --- a/llm_normalizer/backend/tests/assistantMcpDiscoveryTurnInputAdapter.test.ts +++ b/llm_normalizer/backend/tests/assistantMcpDiscoveryTurnInputAdapter.test.ts @@ -2647,4 +2647,123 @@ describe("assistant MCP discovery turn input adapter", () => { expect(result.reason_codes).toContain("mcp_discovery_metadata_scoped_lane_without_subject"); expect(result.reason_codes).not.toContain("mcp_discovery_counterparty_from_followup_context"); }); + + it("continues business overview on by-these-data profit wording without grounding pseudo anchors", () => { + const orgName = + "\u041e\u041e\u041e \u0410\u043b\u044c\u0442\u0435\u0440\u043d\u0430\u0442\u0438\u0432\u0430 \u041f\u043b\u044e\u0441"; + const result = buildAssistantMcpDiscoveryTurnInput({ + userMessage: + "\u041c\u043e\u0436\u043d\u043e \u043b\u0438 \u043f\u043e \u044d\u0442\u0438\u043c \u0434\u0430\u043d\u043d\u044b\u043c \u043f\u043e\u0441\u0447\u0438\u0442\u0430\u0442\u044c \u043d\u043e\u0440\u043c\u0430\u043b\u044c\u043d\u0443\u044e \u043f\u0440\u0438\u0431\u044b\u043b\u044c \u0438 \u043c\u0430\u0440\u0436\u0443? \u0415\u0441\u043b\u0438 \u043d\u0435\u0442, \u0434\u0430\u0439 proxy-\u0430\u043d\u0430\u043b\u0438\u0437.", + assistantTurnMeaning: { + asked_domain_family: "counterparty", + asked_action_family: "turnover", + explicit_intent_candidate: "customer_revenue_and_payments", + explicit_entity_candidates: [ + "\u043f\u043e \u044d\u0442\u0438\u043c \u0434\u0430\u043d\u043d\u044b\u043c", + "\u0438 \u043a\u0442\u043e \u0433\u043b\u0430\u0432\u043d\u044b\u0439 \u043f\u043e\u0441\u0442\u0430\u0432\u0449\u0438\u043a \u0432" + ] + }, + followupContext: { + previous_discovery_pilot_scope: "business_overview_route_template_v1", + previous_filters: { + organization: orgName, + period_from: "2020-01-01", + period_to: "2020-12-31" + } + } + }); + + expect(result.adapter_status).toBe("ready"); + expect(result.should_run_discovery).toBe(true); + expect(result.semantic_data_need).toBe("business overview evidence with bounded analyst interpretation"); + expect(result.data_need_graph?.business_fact_family).toBe("business_overview"); + expect(result.data_need_graph?.subject_candidates).toEqual([]); + expect(result.turn_meaning_ref).toMatchObject({ + asked_domain_family: "business_overview", + asked_action_family: "broad_evaluation", + explicit_organization_scope: orgName, + explicit_date_scope: "2020", + unsupported_but_understood_family: "broad_business_evaluation", + stale_replay_forbidden: true + }); + expect(result.turn_meaning_ref?.explicit_entity_candidates).toBeUndefined(); + expect(result.reason_codes).toContain("mcp_discovery_business_overview_continuation_from_followup_context"); + expect(result.reason_codes).not.toContain("mcp_discovery_counterparty_from_followup_context"); + }); + + it("keeps VAT-position follow-up inside business overview instead of stale inventory position", () => { + const orgName = + "\u041e\u041e\u041e \u0410\u043b\u044c\u0442\u0435\u0440\u043d\u0430\u0442\u0438\u0432\u0430 \u041f\u043b\u044e\u0441"; + const result = buildAssistantMcpDiscoveryTurnInput({ + userMessage: + "\u0427\u0442\u043e \u0441 \u041d\u0414\u0421 \u0437\u0430 2020 \u0433\u043e\u0434: \u043a\u0430\u043a\u0430\u044f \u043f\u043e\u0437\u0438\u0446\u0438\u044f \u0432\u0438\u0434\u043d\u0430 \u0438 \u0447\u0435\u0433\u043e \u043d\u0435 \u0445\u0432\u0430\u0442\u0430\u0435\u0442 \u0434\u043b\u044f \u043d\u0430\u043b\u043e\u0433\u043e\u0432\u043e\u0433\u043e \u0432\u044b\u0432\u043e\u0434\u0430?", + assistantTurnMeaning: { + asked_domain_family: "inventory", + asked_action_family: "stock_snapshot", + explicit_intent_candidate: "inventory_on_hand_as_of_date", + explicit_entity_candidates: ["\u043f\u043e\u0437\u0438\u0446\u0438\u044f"] + }, + followupContext: { + previous_discovery_pilot_scope: "business_overview_route_template_v1", + previous_filters: { + organization: orgName + } + } + }); + + expect(result.adapter_status).toBe("ready"); + expect(result.should_run_discovery).toBe(true); + expect(result.semantic_data_need).toBe("business overview evidence with bounded analyst interpretation"); + expect(result.data_need_graph?.business_fact_family).toBe("business_overview"); + expect(result.data_need_graph?.subject_candidates).toEqual([]); + expect(result.turn_meaning_ref).toMatchObject({ + asked_domain_family: "business_overview", + asked_action_family: "broad_evaluation", + explicit_organization_scope: orgName, + explicit_date_scope: "2020", + unsupported_but_understood_family: "broad_business_evaluation", + stale_replay_forbidden: true + }); + expect(result.turn_meaning_ref?.explicit_entity_candidates).toBeUndefined(); + expect(result.reason_codes).toContain("mcp_discovery_business_overview_continuation_from_followup_context"); + }); + + it("routes business overview final-summary wording to the overview lane without document pseudo subject", () => { + const orgName = + "\u041e\u041e\u041e \u0410\u043b\u044c\u0442\u0435\u0440\u043d\u0430\u0442\u0438\u0432\u0430 \u041f\u043b\u044e\u0441"; + const result = buildAssistantMcpDiscoveryTurnInput({ + userMessage: + "\u0427\u0442\u043e \u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434\u0435\u043d\u043e \u043f\u043e \u044d\u0442\u0438\u043c \u0434\u0430\u043d\u043d\u044b\u043c \u0438 \u0447\u0435\u0433\u043e \u043d\u0435 \u0445\u0432\u0430\u0442\u0430\u0435\u0442 \u0434\u043b\u044f \u0432\u044b\u0432\u043e\u0434\u0430?", + assistantTurnMeaning: { + asked_domain_family: "counterparty", + asked_action_family: "list_documents", + explicit_intent_candidate: "list_documents_by_counterparty", + explicit_entity_candidates: ["\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u0430\u043c"] + }, + followupContext: { + previous_discovery_pilot_scope: "business_overview_route_template_v1", + previous_filters: { + organization: orgName, + period_from: "2020-01-01", + period_to: "2020-12-31" + } + } + }); + + expect(result.adapter_status).toBe("ready"); + expect(result.should_run_discovery).toBe(true); + expect(result.semantic_data_need).toBe("business overview evidence with bounded analyst interpretation"); + expect(result.data_need_graph?.business_fact_family).toBe("business_overview"); + expect(result.data_need_graph?.subject_candidates).toEqual([]); + expect(result.turn_meaning_ref).toMatchObject({ + asked_domain_family: "business_overview", + asked_action_family: "broad_evaluation", + explicit_organization_scope: orgName, + explicit_date_scope: "2020", + unsupported_but_understood_family: "broad_business_evaluation", + stale_replay_forbidden: true + }); + expect(result.turn_meaning_ref?.explicit_entity_candidates).toBeUndefined(); + expect(result.reason_codes).toContain("mcp_discovery_business_overview_continuation_from_followup_context"); + }); });