From 0c527e55824307e7dab16cce1dd75e6d3ad74169 Mon Sep 17 00:00:00 2001 From: dctouch Date: Tue, 5 May 2026 11:19:28 +0300 Subject: [PATCH] =?UTF-8?q?Open-World:=20=D0=BD=D0=B0=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=B8=D1=82=D1=8C=20=D0=B2=D0=BE=D0=BF=D1=80=D0=BE=D1=81?= =?UTF-8?q?=D1=8B=20=D0=BE=20=D0=B4=D0=BE=D1=85=D0=BE=D0=B4=D0=BD=D0=BE?= =?UTF-8?q?=D1=81=D1=82=D0=B8=20=D0=BA=D0=BE=D0=BC=D0=BF=D0=B0=D0=BD=D0=B8?= =?UTF-8?q?=D0=B8=20=D0=B2=20=D0=B1=D0=B8=D0=B7=D0=BD=D0=B5=D1=81-=D0=BE?= =?UTF-8?q?=D0=B1=D0=B7=D0=BE=D1=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../21 - current_status_canon_2026-05-01.md | 3 +- ...rld_bounded_autonomy_breadth_2026-05-01.md | 25 ++++++++++ .../11 - architecture_turnaround/README.md | 6 ++- .../dist/services/addressIntentResolver.js | 15 ++++++ .../assistantMcpDiscoveryTurnInputAdapter.js | 14 ++++++ .../services/assistantTurnMeaningPolicy.js | 17 +++++++ .../src/services/addressIntentResolver.ts | 27 +++++++++++ .../assistantMcpDiscoveryTurnInputAdapter.ts | 24 ++++++++++ .../services/assistantTurnMeaningPolicy.ts | 27 +++++++++++ .../tests/addressQueryRuntimeM23.test.ts | 46 ++++++++----------- ...istantMcpDiscoveryTurnInputAdapter.test.ts | 9 ++-- .../tests/assistantTurnMeaningPolicy.test.ts | 23 ++++++++++ 12 files changed, 202 insertions(+), 34 deletions(-) diff --git a/docs/ARCH/11 - architecture_turnaround/21 - current_status_canon_2026-05-01.md b/docs/ARCH/11 - architecture_turnaround/21 - current_status_canon_2026-05-01.md index fd9bb32..083e3ef 100644 --- a/docs/ARCH/11 - architecture_turnaround/21 - current_status_canon_2026-05-01.md +++ b/docs/ARCH/11 - architecture_turnaround/21 - current_status_canon_2026-05-01.md @@ -31,8 +31,9 @@ If another document says `78%`, `87%`, `92%`, or `85%` for a module that is now - Completed active slice: `Business Overview Debt Staleness Risk Proxy Bridge`: when current-turn open-settlement concentration and contract-date age are both present, business overview can include a bounded debt staleness-risk proxy while contractual delinquency, credit risk, and due-date aging remain unclaimed. - Completed active slice: `Business Overview Supplier Concentration Proxy Bridge`: business overview now derives top suppliers/recipients from confirmed outgoing payment rows and surfaces procurement concentration without claiming vendor risk, procurement quality, or full expense structure. - Completed active slice: `Business Overview Yearly Operating-Flow Proxy Bridge`: business overview now derives annual incoming/outgoing/net buckets from confirmed money-flow rows and can name the strongest incoming year and best operating-net year without claiming profit or P&L. +- Completed active slice: `Business Overview Earnings Wording Arbitration Bridge`: organization-level earnings/best-year/overall-turnover wording now routes to `business_overview` instead of the exact customer-value lane, while explicit customer/counterparty wording remains in `customer_revenue_and_payments`. - Next active slice: continue breadth into exact company-wide accounting profit/margin, real due-date debt aging, confirmed inventory reserve/write-off/liquidation evidence, and broader unfamiliar 1C route families only where reviewed evidence routes exist. -- Active module progress: `~90% (Open-World Bounded Autonomy Breadth)`. +- Active module progress: `~92% (Open-World Bounded Autonomy Breadth)`. ## Reporting Rule diff --git a/docs/ARCH/11 - architecture_turnaround/22 - open_world_bounded_autonomy_breadth_2026-05-01.md b/docs/ARCH/11 - architecture_turnaround/22 - open_world_bounded_autonomy_breadth_2026-05-01.md index 725588f..2afa299 100644 --- a/docs/ARCH/11 - architecture_turnaround/22 - open_world_bounded_autonomy_breadth_2026-05-01.md +++ b/docs/ARCH/11 - architecture_turnaround/22 - open_world_bounded_autonomy_breadth_2026-05-01.md @@ -528,3 +528,28 @@ Local validation is accepted for this slice: - `npm.cmd run build`: passed. Graphify rebuild after Slice 17 code/doc sync: `6047 nodes`, `13177 edges`, `139 communities`. + +## Slice 18 - Business Overview Earnings Wording Arbitration Bridge + +This slice closes the routing gap exposed by broad manual questions about company earnings. + +The runtime already had a bounded yearly operating-flow proxy, but organization-level wording such as "сколько вообще денег мы заработали", "какой у нас самый доходный год", or "какие общие обороты за все время" could still be captured by the old exact `customer_revenue_and_payments` address lane. That was semantically wrong: those questions are about the company as a whole, not about a customer ranking, and the safe answer surface is `business_overview` with explicit profit/P&L boundaries. + +Implemented now: + +- turn meaning policy treats organization-level earnings/best-year wording as `broad_business_evaluation`; +- the discovery turn-input adapter promotes the same raw wording into `business overview evidence with bounded analyst interpretation` before value-flow detection can steal it; +- the address intent resolver now defers organization-level earnings/turnover wording with `unicode_business_overview_earnings_deferred_to_discovery`; +- explicit customer/counterparty/supplier/contract/item wording still stays in the existing exact routes, so "кто самый доходный клиент" remains `customer_revenue_and_payments`; +- the stale follow-up all-time wording case now keeps the previous organization scope but drops stale date scope while routing to `business_overview`. + +This is an arbitration hardening slice, not a new profit engine. It makes the already implemented annual operating-flow proxy reachable from natural business-analysis wording while preserving the boundary that accounting profit, financial result, true margin, and full P&L still need reviewed evidence routes. + +Local validation is accepted for this slice: + +- `npm.cmd test -- assistantTurnMeaningPolicy.test.ts assistantMcpDiscoveryTurnInputAdapter.test.ts`: passed `85` with `6` skipped. +- `npm.cmd test -- addressQueryRuntimeM23.test.ts`: passed `412`. +- `npm.cmd test -- assistantMcpDiscoveryRuntimeEntryPoint.test.ts assistantMcpDiscoveryPilotExecutor.test.ts assistantMcpDiscoveryAnswerAdapter.test.ts`: passed `85` with `3` skipped. +- `npm.cmd run build`: passed. + +Graphify rebuild after Slice 18 code/doc sync: `6052 nodes`, `13187 edges`, `138 communities`. diff --git a/docs/ARCH/11 - architecture_turnaround/README.md b/docs/ARCH/11 - architecture_turnaround/README.md index f35e424..62189f8 100644 --- a/docs/ARCH/11 - architecture_turnaround/README.md +++ b/docs/ARCH/11 - architecture_turnaround/README.md @@ -66,6 +66,7 @@ Status canon for planning: - The current completed breadth slice is `Business Overview Debt Staleness Risk Proxy Bridge`: when current-turn open-settlement concentration and contract-date age are both present, company analysis can include a bounded debt staleness-risk proxy while confirmed overdue debt, contractual delinquency, credit risk, and due-date aging remain unclaimed. - The current completed breadth slice is `Business Overview Supplier Concentration Proxy Bridge`: company analysis now ranks confirmed outgoing payment counterparties and surfaces supplier/procurement concentration as a bounded proxy, not as vendor risk or full expense structure. - The current completed breadth slice is `Business Overview Yearly Operating-Flow Proxy Bridge`: company analysis now builds annual incoming/outgoing/net buckets from confirmed money-flow rows and names strongest years as operating-flow proxy, not profit or full P&L. +- The current completed breadth slice is `Business Overview Earnings Wording Arbitration Bridge`: organization-level earnings, best-year, and overall-turnover wording now reaches `business_overview` instead of the exact customer-value route, while explicit customer/counterparty ranking remains unchanged. - The next active breadth slice continues breadth into exact company-wide accounting profit/margin, real due-date debt aging, confirmed reserve/write-off/liquidation inventory evidence, and broader unfamiliar 1C route families without relaxing truth boundaries. - The short source of truth for status wording is [21 - current_status_canon_2026-05-01.md](./21%20-%20current_status_canon_2026-05-01.md). @@ -131,11 +132,11 @@ Current honest status: - pre-multidomain readiness: `~90%` - bounded-autonomy foundation readiness: `~89%` - open-world bounded-autonomy readiness: `~87%` -- active Open-World Bounded Autonomy Breadth progress: `~90%`, with business-overview evidence fusion, the reviewed `business_overview` catalog/data-need/planner route-fabric slice, the fresh multi-probe runtime bridge, the explicit-period VAT/tax fact-family bridge, the explicit-period debt-position bridge, the explicit-date inventory-position bridge, the open-settlement quality bridge accepted by live semantic replay, selected-item profitability bridged by local semantic/runtime regression tests, contract-date debt age bridged locally, debt staleness-risk proxy bridged locally, supplier concentration proxy bridged locally, yearly operating-flow proxy bridged locally, analyst synthesis added to business-overview answer drafting, company-period trading margin proxy bridged locally, inventory sales-to-stock proxy bridged locally, inventory staleness-risk proxy bridged locally, and gap-specific answer shaping bridged locally; exact accounting profit/margin, true due-date debt aging/overdue, confirmed vendor-risk/procurement-quality analysis, and confirmed reserve/write-off/liquidation inventory evidence are still pending +- active Open-World Bounded Autonomy Breadth progress: `~92%`, with business-overview evidence fusion, the reviewed `business_overview` catalog/data-need/planner route-fabric slice, the fresh multi-probe runtime bridge, the explicit-period VAT/tax fact-family bridge, the explicit-period debt-position bridge, the explicit-date inventory-position bridge, the open-settlement quality bridge accepted by live semantic replay, selected-item profitability bridged by local semantic/runtime regression tests, contract-date debt age bridged locally, debt staleness-risk proxy bridged locally, supplier concentration proxy bridged locally, yearly operating-flow proxy bridged locally, earnings/best-year wording arbitration bridged locally, analyst synthesis added to business-overview answer drafting, company-period trading margin proxy bridged locally, inventory sales-to-stock proxy bridged locally, inventory staleness-risk proxy bridged locally, and gap-specific answer shaping bridged locally; exact accounting profit/margin, true due-date debt aging/overdue, confirmed vendor-risk/procurement-quality analysis, and confirmed reserve/write-off/liquidation inventory evidence are still pending - Post-F semantic integrity module progress: `~99%` operationally closed, with remaining risk now treated as next-slice discovery rather than an open blocker inside the closed slice - active inventory-stock breadth slice progress: `100%` for the declared scenario pack, not for arbitrary inventory questions - Planner Autonomy Consolidation progress: `100%` for the declared module, with catalog-fabric, value-flow arbitration, lifecycle bounded inference, broad-evaluation bridge, inventory catalog templates, inventory runtime-boundary honesty, exact inventory recipe bridging, unambiguous metadata-surface lane inference, catalog chain-template scoring, structured chain-match contract exposure, runtime/debug propagation, subject-aware bidirectional comparison arbitration, structured catalog-alignment verdicts, representative alignment regression guard, catalog-alignment reason-code telemetry, explicit `alignment_status` propagation, truth-harness/acceptance-matrix surfacing, soft divergence warning, `catalog_alignment_ok` acceptance invariant, step-level expected catalog-alignment assertions, phase66 and phase32 spec alignment expectations, AGENT source-catalog surfacing, generated phase83 mixed planner-brain replay spec, checked-source user-facing error sanitation, surface-grounded catalog promotion, and guarded live phase83 acceptance validated. Broader unfamiliar 1C asks are now next-module breadth work rather than an open blocker inside this declared slice -- graph snapshot after latest rebuild: `6047 nodes`, `13177 edges`, `139 communities` +- graph snapshot after latest rebuild: `6052 nodes`, `13187 edges`, `138 communities` - current regression-gate breakpoint: - the validated hot paths are no longer structurally broken; - flagship continuity collapse is no longer the primary risk; @@ -189,6 +190,7 @@ Latest live proof now includes: - business-overview debt staleness-risk proxy accepted locally: targeted executor/answer-adapter slice passed `66/66` with `1` skipped; M23 route/runtime regression passed `412/412`; build passed; graphify rebuilt to `6040 nodes`, `13158 edges`, `135 communities`; the proxy combines contract-date age and open-balance concentration while confirmed overdue debt, contractual delinquency, credit risk, and due-date aging remain unclaimed - business-overview supplier concentration proxy accepted locally: targeted executor/answer-adapter slice passed `66/66` with `1` skipped; M23 route/runtime regression passed `412/412`; build passed; graphify rebuilt to `6041 nodes`, `13162 edges`, `136 communities`; the proxy ranks confirmed outgoing payment counterparties while vendor risk, procurement quality, and full expense structure remain unclaimed - business-overview yearly operating-flow proxy accepted locally: targeted executor/answer-adapter slice passed `66/66` with `1` skipped; M23 route/runtime regression passed `412/412`; build passed; graphify rebuilt to `6047 nodes`, `13177 edges`, `139 communities`; the proxy builds annual incoming/outgoing/net buckets from confirmed money-flow rows while profit, финрезультат, and full P&L remain unclaimed +- business-overview earnings wording arbitration accepted locally: turn-meaning/turn-input slice passed `85/85` with `6` skipped; M23 route/runtime regression passed `412/412`; runtime-entry/pilot/answer slice passed `85/85` with `3` skipped; build passed; graphify rebuilt to `6052 nodes`, `13187 edges`, `138 communities`; organization-level earnings/best-year wording now reaches `business_overview` while explicit customer/counterparty ranking remains in exact customer value routes - inventory template lift accepted locally: catalog/data-need/planner/turn-input slice passed `139/139` with `6` skipped; full MCP-discovery slice passed `276/276` with `9` skipped; build passed; graphify stayed at `5912 nodes`, `12833 edges`, `138 communities` - inventory runtime-boundary hardening accepted locally: runtime-bridge/answer-adapter/pilot-executor slice passed `68/68` with `1` skipped; full MCP-discovery slice passed `277/277` with `9` skipped; build passed; graphify rebuilt to `5913 nodes`, `12837 edges`, `138 communities` - inventory exact-runtime bridge accepted locally: runtime-bridge/answer-adapter/pilot-executor slice passed `70/70` with `1` skipped; full MCP-discovery slice passed `279/279` with `9` skipped; build passed; graphify rebuilt to `5930 nodes`, `12884 edges`, `135 communities` diff --git a/llm_normalizer/backend/dist/services/addressIntentResolver.js b/llm_normalizer/backend/dist/services/addressIntentResolver.js index b01f226..61be114 100644 --- a/llm_normalizer/backend/dist/services/addressIntentResolver.js +++ b/llm_normalizer/backend/dist/services/addressIntentResolver.js @@ -1479,6 +1479,18 @@ function hasCustomerRevenueRankingBridgeSignal(text) { const hasRevenueAggregateCue = /(?:(?:\u043a\u0430\u043a\u043e\u0439|\u043a\u0430\u043a\u0430\u044f)\s+(?:\u0443\s+\u043d\u0430\u0441\s+)?\u0441\u0430\u043c(?:\u044b\u0439|\u0430\u044f|\u043e\u0435)?\s+\u0434\u043e\u0445\u043e\u0434\u043d\w*\s+\u0433\u043e\u0434|(?:\u0441\u043a\u043e\u043b\u044c\u043a\u043e|скок)\s+(?:\u0432\u043e\u043e\u0431\u0449\u0435\s+)?(?:\u0434\u0435\u043d\u0435\u0433\s+)?\u043c\u044b\s+\u0437\u0430\u0440\u0430\u0431\u043e\u0442\u0430\u043b\u0438|(?:\u0437\u0430|for)\s+\d{4}\s+\u043c\u044b\s+(?:\u0441\u043a\u043e\u043b\u044c\u043a\u043e|скок)\s+\u0437\u0430\u0440\u0430\u0431\u043e\u0442\u0430\u043b\u0438|\u0432\u044b\u0440\u0443\u0447\u043a\w*\s+\u0437\u0430\s+\d{4})/iu.test(normalized); return hasCustomerRankingCue || hasRevenueAggregateCue; } +function hasOrganizationLevelEarningsOverviewBridgeSignal(text) { + const normalized = String(text ?? "").trim().toLowerCase(); + if (!normalized) { + return false; + } + if (/(?:\u043a\u043b\u0438\u0435\u043d\u0442|\u043f\u043e\u043a\u0443\u043f\u0430\u0442\u0435\u043b|\u0437\u0430\u043a\u0430\u0437\u0447\u0438\u043a|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|\u043f\u043e\u0441\u0442\u0430\u0432\u0449\u0438\u043a|\u0434\u043e\u0433\u043e\u0432\u043e\u0440|\u043a\u043e\u043d\u0442\u0440\u0430\u043a\u0442|\u0442\u043e\u0432\u0430\u0440|\u043d\u043e\u043c\u0435\u043d\u043a\u043b\u0430\u0442\u0443\u0440|\u0441\u0434\u0435\u043b\u043a|customer|client|counterparty|supplier|vendor|contract|item|product|deal)/iu.test(normalized)) { + return false; + } + const hasYearRankingCue = /(?:(?:\u043a\u0430\u043a\u043e\u0439|\u043a\u0430\u043a\u0438\u0435|\u043a\u0430\u043a\u0430\u044f|what|which)\s+(?:\u0443\s+\u043d\u0430\u0441\s+)?(?:[\p{L}0-9._-]+\s+){0,4}(?:\u0441\u0430\u043c|\u0441\u0430\u043c\u044b\u0435|best|top|most)[\s\S]{0,80}(?:\u0434\u043e\u0445\u043e\u0434\u043d|\u043f\u0440\u0438\u0431\u044b\u043b|\u0432\u044b\u0440\u0443\u0447\u043a|\u043e\u0431\u043e\u0440\u043e\u0442|revenue|profit|turnover)[\s\S]{0,40}(?:\u0433\u043e\u0434|year)|(?:\u0434\u043e\u0445\u043e\u0434\u043d|\u043f\u0440\u0438\u0431\u044b\u043b|\u0432\u044b\u0440\u0443\u0447\u043a|\u043e\u0431\u043e\u0440\u043e\u0442|revenue|profit|turnover)[\s\S]{0,60}(?:\u0441\u0430\u043c|\u0441\u0430\u043c\u044b\u0435|best|top|most)[\s\S]{0,40}(?:\u0433\u043e\u0434|year))/iu.test(normalized); + const hasCompanyEarningsCue = /(?:(?:\u0441\u043a\u043e\u043b\u044c\u043a\u043e|\u0441\u043a\u043e\u043a\w*|how\s+much)[\s\S]{0,120}(?:\u0437\u0430\u0440\u0430\u0431\u043e\u0442|\u0432\u044b\u0440\u0443\u0447)|(?:\u0437\u0430\u0440\u0430\u0431\u043e\u0442|\u0432\u044b\u0440\u0443\u0447)[\s\S]{0,80}(?:\u0437\u0430\s+(?:\u0432\u0441\u0435\s+\u0432\u0440\u0435\u043c\u044f|\d{2,4}\s*\u0433\u043e\u0434|(?:19|20)\d{2})|all\s+time|\b(?:19|20)\d{2}\b)|(?:\u043e\u0431\u0449\w*\s+(?:\u043e\u0431\u043e\u0440\u043e\u0442|\u0432\u044b\u0440\u0443\u0447\u043a)|(?:\u043e\u0431\u043e\u0440\u043e\u0442|\u0432\u044b\u0440\u0443\u0447\u043a)[\s\S]{0,40}\u0437\u0430\s+\u0432\u0441\u0435\s+\u0432\u0440\u0435\u043c\u044f))/iu.test(normalized); + return hasYearRankingCue || hasCompanyEarningsCue; +} function hasSpecificCounterpartyRevenueBridgeSignal(text) { const normalized = String(text ?? "").trim().toLowerCase(); if (!normalized) { @@ -1636,6 +1648,9 @@ function resolveUnicodeAddressIntentBridge(text) { if (!hasContractCue && hasHighestValueCustomerCue) { return unicodeBridgeResolution("customer_revenue_and_payments", "high", "unicode_customer_revenue_bridge_signal_detected"); } + if (hasOrganizationLevelEarningsOverviewBridgeSignal(normalized)) { + return unicodeBridgeResolution("unknown", "high", "unicode_business_overview_earnings_deferred_to_discovery"); + } if (hasBidirectionalValueFlowComparisonSignal(normalized)) { return unicodeBridgeResolution("unknown", "high", "unicode_bidirectional_value_flow_deferred_to_discovery"); } diff --git a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryTurnInputAdapter.js b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryTurnInputAdapter.js index e3832c7..6da133a 100644 --- a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryTurnInputAdapter.js +++ b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryTurnInputAdapter.js @@ -511,7 +511,21 @@ function metadataAmbiguityCollapsesToMovementLane(values) { function hasLifecycleSignal(text) { return /(?:сколько\s+лет|как\s+давно|давно\s+ли|возраст|перв(?:ая|ый)\s+актив|когда\s+начал|когда\s+появ|lifecycle|activity\s+duration|business\s+age|how\s+long)/iu.test(text); } +function hasExplicitCounterpartyValueObject(text) { + return /(?:\u043a\u043b\u0438\u0435\u043d\u0442|\u043f\u043e\u043a\u0443\u043f\u0430\u0442\u0435\u043b|\u0437\u0430\u043a\u0430\u0437\u0447\u0438\u043a|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|\u043f\u043e\u0441\u0442\u0430\u0432\u0449\u0438\u043a|\u0434\u043e\u0433\u043e\u0432\u043e\u0440|\u043a\u043e\u043d\u0442\u0440\u0430\u043a\u0442|\u0442\u043e\u0432\u0430\u0440|\u043d\u043e\u043c\u0435\u043d\u043a\u043b\u0430\u0442\u0443\u0440|\u0441\u0434\u0435\u043b\u043a|customer|client|counterparty|supplier|vendor|contract|item|product|deal)/iu.test(text); +} +function hasOrganizationLevelEarningsOverviewSignal(text) { + if (!text || hasExplicitCounterpartyValueObject(text)) { + return false; + } + const hasYearRankingCue = /(?:(?:\u043a\u0430\u043a\u043e\u0439|\u043a\u0430\u043a\u0438\u0435|\u043a\u0430\u043a\u0430\u044f|what|which)\s+(?:\u0443\s+\u043d\u0430\u0441\s+)?(?:[\p{L}0-9._-]+\s+){0,4}(?:\u0441\u0430\u043c|\u0441\u0430\u043c\u044b\u0435|best|top|most)[\s\S]{0,80}(?:\u0434\u043e\u0445\u043e\u0434\u043d|\u043f\u0440\u0438\u0431\u044b\u043b|\u0432\u044b\u0440\u0443\u0447\u043a|\u043e\u0431\u043e\u0440\u043e\u0442|revenue|profit|turnover)[\s\S]{0,40}(?:\u0433\u043e\u0434|year)|(?:\u0434\u043e\u0445\u043e\u0434\u043d|\u043f\u0440\u0438\u0431\u044b\u043b|\u0432\u044b\u0440\u0443\u0447\u043a|\u043e\u0431\u043e\u0440\u043e\u0442|revenue|profit|turnover)[\s\S]{0,60}(?:\u0441\u0430\u043c|\u0441\u0430\u043c\u044b\u0435|best|top|most)[\s\S]{0,40}(?:\u0433\u043e\u0434|year))/iu.test(text); + const hasCompanyEarningsCue = /(?:(?:\u0441\u043a\u043e\u043b\u044c\u043a\u043e|\u0441\u043a\u043e\u043a\w*|how\s+much)[\s\S]{0,120}(?:\u0437\u0430\u0440\u0430\u0431\u043e\u0442|\u0432\u044b\u0440\u0443\u0447)|(?:\u0437\u0430\u0440\u0430\u0431\u043e\u0442|\u0432\u044b\u0440\u0443\u0447)[\s\S]{0,80}(?:\u0437\u0430\s+(?:\u0432\u0441\u0435\s+\u0432\u0440\u0435\u043c\u044f|\d{2,4}\s*\u0433\u043e\u0434|(?:19|20)\d{2})|all\s+time|\b(?:19|20)\d{2}\b)|(?:\u043e\u0431\u0449\w*\s+(?:\u043e\u0431\u043e\u0440\u043e\u0442|\u0432\u044b\u0440\u0443\u0447\u043a)|(?:\u043e\u0431\u043e\u0440\u043e\u0442|\u0432\u044b\u0440\u0443\u0447\u043a)[\s\S]{0,40}\u0437\u0430\s+\u0432\u0441\u0435\s+\u0432\u0440\u0435\u043c\u044f))/iu.test(text); + return hasYearRankingCue || hasCompanyEarningsCue; +} function hasBusinessOverviewSignal(text) { + if (hasOrganizationLevelEarningsOverviewSignal(text)) { + return true; + } return /(?:\u0431\u0438\u0437\u043d\u0435\u0441[-\s]?\u043e\u0431\u0437\u043e\u0440|\u0431\u0438\u0437\u043d\u0435\u0441[-\s]?\u0430\u0443\u0434\u0438\u0442|\u043f\u043e\u043b\u043d\w*\s+\u0430\u043d\u0430\u043b\u0438\u0437\s+(?:\u043a\u043e\u043c\u043f\u0430\u043d|\u0431\u0438\u0437\u043d\u0435\u0441|\u0434\u0435\u044f\u0442\u0435\u043b)|\u0441\u0432\u043e\u0434\u043d\w*\s+\u0430\u043d\u0430\u043b\u0438\u0437\s+(?:\u043a\u043e\u043c\u043f\u0430\u043d|\u0431\u0438\u0437\u043d\u0435\u0441|\u0434\u0435\u044f\u0442\u0435\u043b)|\u043a\u0430\u043a\s+\u0442\u044b\s+\u043e\u0446\u0435\u043d(?:\u0438\u0448\u044c|\u0438)\s+\u0434\u0435\u044f\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442|\u043a\u043e\u043c\u043f\u0430\u043d(?:\u0438\u0438|\u0438\u044e|\u0438\u044f)\s+\u0432\s+\u0446\u0435\u043b\u043e\u043c|company\s+(?:analysis|overview)|business\s+(?:overview|audit)|llm[-\s]?audit|бизнес[-\s]?РѕР±Р·РѕСЂ|бизнес[-\s]?аудит)/iu.test(text); } function hasValueFlowSignal(text) { diff --git a/llm_normalizer/backend/dist/services/assistantTurnMeaningPolicy.js b/llm_normalizer/backend/dist/services/assistantTurnMeaningPolicy.js index 6ebb670..bc48975 100644 --- a/llm_normalizer/backend/dist/services/assistantTurnMeaningPolicy.js +++ b/llm_normalizer/backend/dist/services/assistantTurnMeaningPolicy.js @@ -112,6 +112,18 @@ function detectCounterpartyTurnoverFamily(text) { entity }; } +function hasExplicitCounterpartyValueObject(text) { + return /(?:\u043a\u043b\u0438\u0435\u043d\u0442|\u043f\u043e\u043a\u0443\u043f\u0430\u0442\u0435\u043b|\u0437\u0430\u043a\u0430\u0437\u0447\u0438\u043a|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|\u043f\u043e\u0441\u0442\u0430\u0432\u0449\u0438\u043a|\u0434\u043e\u0433\u043e\u0432\u043e\u0440|\u043a\u043e\u043d\u0442\u0440\u0430\u043a\u0442|\u0442\u043e\u0432\u0430\u0440|\u043d\u043e\u043c\u0435\u043d\u043a\u043b\u0430\u0442\u0443\u0440|\u0441\u0434\u0435\u043b\u043a|customer|client|counterparty|supplier|vendor|contract|item|product|deal)/iu.test(text); +} +function hasOrganizationLevelEarningsOverviewSignal(text) { + const normalized = String(text ?? ""); + if (!normalized || hasExplicitCounterpartyValueObject(normalized)) { + return false; + } + const hasYearRankingCue = /(?:(?:\u043a\u0430\u043a\u043e\u0439|\u043a\u0430\u043a\u0438\u0435|\u043a\u0430\u043a\u0430\u044f|what|which)\s+(?:\u0443\s+\u043d\u0430\u0441\s+)?(?:[\p{L}0-9._-]+\s+){0,4}(?:\u0441\u0430\u043c|\u0441\u0430\u043c\u044b\u0435|best|top|most)[\s\S]{0,80}(?:\u0434\u043e\u0445\u043e\u0434\u043d|\u043f\u0440\u0438\u0431\u044b\u043b|\u0432\u044b\u0440\u0443\u0447\u043a|\u043e\u0431\u043e\u0440\u043e\u0442|revenue|profit|turnover)[\s\S]{0,40}(?:\u0433\u043e\u0434|year)|(?:\u0434\u043e\u0445\u043e\u0434\u043d|\u043f\u0440\u0438\u0431\u044b\u043b|\u0432\u044b\u0440\u0443\u0447\u043a|\u043e\u0431\u043e\u0440\u043e\u0442|revenue|profit|turnover)[\s\S]{0,60}(?:\u0441\u0430\u043c|\u0441\u0430\u043c\u044b\u0435|best|top|most)[\s\S]{0,40}(?:\u0433\u043e\u0434|year))/iu.test(normalized); + const hasCompanyEarningsCue = /(?:(?:\u0441\u043a\u043e\u043b\u044c\u043a\u043e|\u0441\u043a\u043e\u043a\w*|how\s+much)[\s\S]{0,120}(?:\u0437\u0430\u0440\u0430\u0431\u043e\u0442|\u0432\u044b\u0440\u0443\u0447)|(?:\u0437\u0430\u0440\u0430\u0431\u043e\u0442|\u0432\u044b\u0440\u0443\u0447)[\s\S]{0,80}(?:\u0437\u0430\s+(?:\u0432\u0441\u0435\s+\u0432\u0440\u0435\u043c\u044f|\d{2,4}\s*\u0433\u043e\u0434|(?:19|20)\d{2})|all\s+time|\b(?:19|20)\d{2}\b)|(?:\u043e\u0431\u0449\w*\s+(?:\u043e\u0431\u043e\u0440\u043e\u0442|\u0432\u044b\u0440\u0443\u0447\u043a)|(?:\u043e\u0431\u043e\u0440\u043e\u0442|\u0432\u044b\u0440\u0443\u0447\u043a)[\s\S]{0,40}\u0437\u0430\s+\u0432\u0441\u0435\s+\u0432\u0440\u0435\u043c\u044f))/iu.test(normalized); + return hasYearRankingCue || hasCompanyEarningsCue; +} function detectBroadBusinessEvaluation(text) { const normalized = String(text ?? ""); if (!normalized) { @@ -127,6 +139,11 @@ function detectBroadBusinessEvaluation(text) { family: "broad_business_evaluation" }; } + if (hasOrganizationLevelEarningsOverviewSignal(normalized)) { + return { + family: "broad_business_evaluation" + }; + } return null; } function buildEntityCandidates(counterpartyTurnover) { diff --git a/llm_normalizer/backend/src/services/addressIntentResolver.ts b/llm_normalizer/backend/src/services/addressIntentResolver.ts index f86dcdc..9882730 100644 --- a/llm_normalizer/backend/src/services/addressIntentResolver.ts +++ b/llm_normalizer/backend/src/services/addressIntentResolver.ts @@ -1843,6 +1843,29 @@ function hasCustomerRevenueRankingBridgeSignal(text: string): boolean { return hasCustomerRankingCue || hasRevenueAggregateCue; } +function hasOrganizationLevelEarningsOverviewBridgeSignal(text: string): boolean { + const normalized = String(text ?? "").trim().toLowerCase(); + if (!normalized) { + return false; + } + if ( + /(?:\u043a\u043b\u0438\u0435\u043d\u0442|\u043f\u043e\u043a\u0443\u043f\u0430\u0442\u0435\u043b|\u0437\u0430\u043a\u0430\u0437\u0447\u0438\u043a|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|\u043f\u043e\u0441\u0442\u0430\u0432\u0449\u0438\u043a|\u0434\u043e\u0433\u043e\u0432\u043e\u0440|\u043a\u043e\u043d\u0442\u0440\u0430\u043a\u0442|\u0442\u043e\u0432\u0430\u0440|\u043d\u043e\u043c\u0435\u043d\u043a\u043b\u0430\u0442\u0443\u0440|\u0441\u0434\u0435\u043b\u043a|customer|client|counterparty|supplier|vendor|contract|item|product|deal)/iu.test( + normalized + ) + ) { + return false; + } + const hasYearRankingCue = + /(?:(?:\u043a\u0430\u043a\u043e\u0439|\u043a\u0430\u043a\u0438\u0435|\u043a\u0430\u043a\u0430\u044f|what|which)\s+(?:\u0443\s+\u043d\u0430\u0441\s+)?(?:[\p{L}0-9._-]+\s+){0,4}(?:\u0441\u0430\u043c|\u0441\u0430\u043c\u044b\u0435|best|top|most)[\s\S]{0,80}(?:\u0434\u043e\u0445\u043e\u0434\u043d|\u043f\u0440\u0438\u0431\u044b\u043b|\u0432\u044b\u0440\u0443\u0447\u043a|\u043e\u0431\u043e\u0440\u043e\u0442|revenue|profit|turnover)[\s\S]{0,40}(?:\u0433\u043e\u0434|year)|(?:\u0434\u043e\u0445\u043e\u0434\u043d|\u043f\u0440\u0438\u0431\u044b\u043b|\u0432\u044b\u0440\u0443\u0447\u043a|\u043e\u0431\u043e\u0440\u043e\u0442|revenue|profit|turnover)[\s\S]{0,60}(?:\u0441\u0430\u043c|\u0441\u0430\u043c\u044b\u0435|best|top|most)[\s\S]{0,40}(?:\u0433\u043e\u0434|year))/iu.test( + normalized + ); + const hasCompanyEarningsCue = + /(?:(?:\u0441\u043a\u043e\u043b\u044c\u043a\u043e|\u0441\u043a\u043e\u043a\w*|how\s+much)[\s\S]{0,120}(?:\u0437\u0430\u0440\u0430\u0431\u043e\u0442|\u0432\u044b\u0440\u0443\u0447)|(?:\u0437\u0430\u0440\u0430\u0431\u043e\u0442|\u0432\u044b\u0440\u0443\u0447)[\s\S]{0,80}(?:\u0437\u0430\s+(?:\u0432\u0441\u0435\s+\u0432\u0440\u0435\u043c\u044f|\d{2,4}\s*\u0433\u043e\u0434|(?:19|20)\d{2})|all\s+time|\b(?:19|20)\d{2}\b)|(?:\u043e\u0431\u0449\w*\s+(?:\u043e\u0431\u043e\u0440\u043e\u0442|\u0432\u044b\u0440\u0443\u0447\u043a)|(?:\u043e\u0431\u043e\u0440\u043e\u0442|\u0432\u044b\u0440\u0443\u0447\u043a)[\s\S]{0,40}\u0437\u0430\s+\u0432\u0441\u0435\s+\u0432\u0440\u0435\u043c\u044f))/iu.test( + normalized + ); + return hasYearRankingCue || hasCompanyEarningsCue; +} + function hasSpecificCounterpartyRevenueBridgeSignal(text: string): boolean { const normalized = String(text ?? "").trim().toLowerCase(); if (!normalized) { @@ -2100,6 +2123,10 @@ function resolveUnicodeAddressIntentBridge(text: string): AddressIntentResolutio ); } + if (hasOrganizationLevelEarningsOverviewBridgeSignal(normalized)) { + return unicodeBridgeResolution("unknown", "high", "unicode_business_overview_earnings_deferred_to_discovery"); + } + if (hasBidirectionalValueFlowComparisonSignal(normalized)) { return unicodeBridgeResolution( "unknown", diff --git a/llm_normalizer/backend/src/services/assistantMcpDiscoveryTurnInputAdapter.ts b/llm_normalizer/backend/src/services/assistantMcpDiscoveryTurnInputAdapter.ts index 23dcea2..83d1321 100644 --- a/llm_normalizer/backend/src/services/assistantMcpDiscoveryTurnInputAdapter.ts +++ b/llm_normalizer/backend/src/services/assistantMcpDiscoveryTurnInputAdapter.ts @@ -682,7 +682,31 @@ function hasLifecycleSignal(text: string): boolean { ); } +function hasExplicitCounterpartyValueObject(text: string): boolean { + return /(?:\u043a\u043b\u0438\u0435\u043d\u0442|\u043f\u043e\u043a\u0443\u043f\u0430\u0442\u0435\u043b|\u0437\u0430\u043a\u0430\u0437\u0447\u0438\u043a|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|\u043f\u043e\u0441\u0442\u0430\u0432\u0449\u0438\u043a|\u0434\u043e\u0433\u043e\u0432\u043e\u0440|\u043a\u043e\u043d\u0442\u0440\u0430\u043a\u0442|\u0442\u043e\u0432\u0430\u0440|\u043d\u043e\u043c\u0435\u043d\u043a\u043b\u0430\u0442\u0443\u0440|\u0441\u0434\u0435\u043b\u043a|customer|client|counterparty|supplier|vendor|contract|item|product|deal)/iu.test( + text + ); +} + +function hasOrganizationLevelEarningsOverviewSignal(text: string): boolean { + if (!text || hasExplicitCounterpartyValueObject(text)) { + return false; + } + const hasYearRankingCue = + /(?:(?:\u043a\u0430\u043a\u043e\u0439|\u043a\u0430\u043a\u0438\u0435|\u043a\u0430\u043a\u0430\u044f|what|which)\s+(?:\u0443\s+\u043d\u0430\u0441\s+)?(?:[\p{L}0-9._-]+\s+){0,4}(?:\u0441\u0430\u043c|\u0441\u0430\u043c\u044b\u0435|best|top|most)[\s\S]{0,80}(?:\u0434\u043e\u0445\u043e\u0434\u043d|\u043f\u0440\u0438\u0431\u044b\u043b|\u0432\u044b\u0440\u0443\u0447\u043a|\u043e\u0431\u043e\u0440\u043e\u0442|revenue|profit|turnover)[\s\S]{0,40}(?:\u0433\u043e\u0434|year)|(?:\u0434\u043e\u0445\u043e\u0434\u043d|\u043f\u0440\u0438\u0431\u044b\u043b|\u0432\u044b\u0440\u0443\u0447\u043a|\u043e\u0431\u043e\u0440\u043e\u0442|revenue|profit|turnover)[\s\S]{0,60}(?:\u0441\u0430\u043c|\u0441\u0430\u043c\u044b\u0435|best|top|most)[\s\S]{0,40}(?:\u0433\u043e\u0434|year))/iu.test( + text + ); + const hasCompanyEarningsCue = + /(?:(?:\u0441\u043a\u043e\u043b\u044c\u043a\u043e|\u0441\u043a\u043e\u043a\w*|how\s+much)[\s\S]{0,120}(?:\u0437\u0430\u0440\u0430\u0431\u043e\u0442|\u0432\u044b\u0440\u0443\u0447)|(?:\u0437\u0430\u0440\u0430\u0431\u043e\u0442|\u0432\u044b\u0440\u0443\u0447)[\s\S]{0,80}(?:\u0437\u0430\s+(?:\u0432\u0441\u0435\s+\u0432\u0440\u0435\u043c\u044f|\d{2,4}\s*\u0433\u043e\u0434|(?:19|20)\d{2})|all\s+time|\b(?:19|20)\d{2}\b)|(?:\u043e\u0431\u0449\w*\s+(?:\u043e\u0431\u043e\u0440\u043e\u0442|\u0432\u044b\u0440\u0443\u0447\u043a)|(?:\u043e\u0431\u043e\u0440\u043e\u0442|\u0432\u044b\u0440\u0443\u0447\u043a)[\s\S]{0,40}\u0437\u0430\s+\u0432\u0441\u0435\s+\u0432\u0440\u0435\u043c\u044f))/iu.test( + text + ); + return hasYearRankingCue || hasCompanyEarningsCue; +} + function hasBusinessOverviewSignal(text: string): boolean { + if (hasOrganizationLevelEarningsOverviewSignal(text)) { + return true; + } return /(?:\u0431\u0438\u0437\u043d\u0435\u0441[-\s]?\u043e\u0431\u0437\u043e\u0440|\u0431\u0438\u0437\u043d\u0435\u0441[-\s]?\u0430\u0443\u0434\u0438\u0442|\u043f\u043e\u043b\u043d\w*\s+\u0430\u043d\u0430\u043b\u0438\u0437\s+(?:\u043a\u043e\u043c\u043f\u0430\u043d|\u0431\u0438\u0437\u043d\u0435\u0441|\u0434\u0435\u044f\u0442\u0435\u043b)|\u0441\u0432\u043e\u0434\u043d\w*\s+\u0430\u043d\u0430\u043b\u0438\u0437\s+(?:\u043a\u043e\u043c\u043f\u0430\u043d|\u0431\u0438\u0437\u043d\u0435\u0441|\u0434\u0435\u044f\u0442\u0435\u043b)|\u043a\u0430\u043a\s+\u0442\u044b\s+\u043e\u0446\u0435\u043d(?:\u0438\u0448\u044c|\u0438)\s+\u0434\u0435\u044f\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442|\u043a\u043e\u043c\u043f\u0430\u043d(?:\u0438\u0438|\u0438\u044e|\u0438\u044f)\s+\u0432\s+\u0446\u0435\u043b\u043e\u043c|company\s+(?:analysis|overview)|business\s+(?:overview|audit)|llm[-\s]?audit|бизнес[-\s]?РѕР±Р·РѕСЂ|бизнес[-\s]?аудит)/iu.test( text ); diff --git a/llm_normalizer/backend/src/services/assistantTurnMeaningPolicy.ts b/llm_normalizer/backend/src/services/assistantTurnMeaningPolicy.ts index 95c1b29..1472b12 100644 --- a/llm_normalizer/backend/src/services/assistantTurnMeaningPolicy.ts +++ b/llm_normalizer/backend/src/services/assistantTurnMeaningPolicy.ts @@ -117,6 +117,28 @@ function detectCounterpartyTurnoverFamily(text) { }; } +function hasExplicitCounterpartyValueObject(text) { + return /(?:\u043a\u043b\u0438\u0435\u043d\u0442|\u043f\u043e\u043a\u0443\u043f\u0430\u0442\u0435\u043b|\u0437\u0430\u043a\u0430\u0437\u0447\u0438\u043a|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|\u043f\u043e\u0441\u0442\u0430\u0432\u0449\u0438\u043a|\u0434\u043e\u0433\u043e\u0432\u043e\u0440|\u043a\u043e\u043d\u0442\u0440\u0430\u043a\u0442|\u0442\u043e\u0432\u0430\u0440|\u043d\u043e\u043c\u0435\u043d\u043a\u043b\u0430\u0442\u0443\u0440|\u0441\u0434\u0435\u043b\u043a|customer|client|counterparty|supplier|vendor|contract|item|product|deal)/iu.test( + text + ); +} + +function hasOrganizationLevelEarningsOverviewSignal(text) { + const normalized = String(text ?? ""); + if (!normalized || hasExplicitCounterpartyValueObject(normalized)) { + return false; + } + const hasYearRankingCue = + /(?:(?:\u043a\u0430\u043a\u043e\u0439|\u043a\u0430\u043a\u0438\u0435|\u043a\u0430\u043a\u0430\u044f|what|which)\s+(?:\u0443\s+\u043d\u0430\u0441\s+)?(?:[\p{L}0-9._-]+\s+){0,4}(?:\u0441\u0430\u043c|\u0441\u0430\u043c\u044b\u0435|best|top|most)[\s\S]{0,80}(?:\u0434\u043e\u0445\u043e\u0434\u043d|\u043f\u0440\u0438\u0431\u044b\u043b|\u0432\u044b\u0440\u0443\u0447\u043a|\u043e\u0431\u043e\u0440\u043e\u0442|revenue|profit|turnover)[\s\S]{0,40}(?:\u0433\u043e\u0434|year)|(?:\u0434\u043e\u0445\u043e\u0434\u043d|\u043f\u0440\u0438\u0431\u044b\u043b|\u0432\u044b\u0440\u0443\u0447\u043a|\u043e\u0431\u043e\u0440\u043e\u0442|revenue|profit|turnover)[\s\S]{0,60}(?:\u0441\u0430\u043c|\u0441\u0430\u043c\u044b\u0435|best|top|most)[\s\S]{0,40}(?:\u0433\u043e\u0434|year))/iu.test( + normalized + ); + const hasCompanyEarningsCue = + /(?:(?:\u0441\u043a\u043e\u043b\u044c\u043a\u043e|\u0441\u043a\u043e\u043a\w*|how\s+much)[\s\S]{0,120}(?:\u0437\u0430\u0440\u0430\u0431\u043e\u0442|\u0432\u044b\u0440\u0443\u0447)|(?:\u0437\u0430\u0440\u0430\u0431\u043e\u0442|\u0432\u044b\u0440\u0443\u0447)[\s\S]{0,80}(?:\u0437\u0430\s+(?:\u0432\u0441\u0435\s+\u0432\u0440\u0435\u043c\u044f|\d{2,4}\s*\u0433\u043e\u0434|(?:19|20)\d{2})|all\s+time|\b(?:19|20)\d{2}\b)|(?:\u043e\u0431\u0449\w*\s+(?:\u043e\u0431\u043e\u0440\u043e\u0442|\u0432\u044b\u0440\u0443\u0447\u043a)|(?:\u043e\u0431\u043e\u0440\u043e\u0442|\u0432\u044b\u0440\u0443\u0447\u043a)[\s\S]{0,40}\u0437\u0430\s+\u0432\u0441\u0435\s+\u0432\u0440\u0435\u043c\u044f))/iu.test( + normalized + ); + return hasYearRankingCue || hasCompanyEarningsCue; +} + function detectBroadBusinessEvaluation(text) { const normalized = String(text ?? ""); if (!normalized) { @@ -138,6 +160,11 @@ function detectBroadBusinessEvaluation(text) { family: "broad_business_evaluation" }; } + if (hasOrganizationLevelEarningsOverviewSignal(normalized)) { + return { + family: "broad_business_evaluation" + }; + } return null; } diff --git a/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts b/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts index 893e623..b8fdbe0 100644 --- a/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts +++ b/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts @@ -2605,11 +2605,12 @@ describe("address intent resolver expansion (M2.3a)", () => { expect(result.intent).toBe("customer_revenue_and_payments"); }); - it("resolves yearly profitability wording into customer revenue intent", () => { + it("defers organization-level yearly profitability wording to business overview discovery", () => { const result = resolveAddressIntent( "\u043a\u0430\u043a\u0438\u0435 \u0441\u0430\u043c\u044b\u0435 \u0434\u043e\u0445\u043e\u0434\u043d\u044b\u0435 \u0433\u043e\u0434\u0430 \u0430\u043b\u044c\u0442\u0435\u0440\u043d\u0430\u0442\u0438\u0432\u044b" ); - expect(result.intent).toBe("customer_revenue_and_payments"); + expect(result.intent).toBe("unknown"); + expect(result.reasons).toContain("unicode_business_overview_earnings_deferred_to_discovery"); }); it("resolves major-share revenue wording into customer revenue intent", () => { @@ -2652,14 +2653,16 @@ describe("address intent resolver expansion (M2.3a)", () => { expect(result.intent).toBe("contract_usage_and_value"); }); - it("resolves revenue-total slang wording into customer revenue intent", () => { + it("defers organization-level revenue-total slang wording to business overview discovery", () => { const result = resolveAddressIntent("скока денег альтернатива заработала за 22 год"); - expect(result.intent).toBe("customer_revenue_and_payments"); + expect(result.intent).toBe("unknown"); + expect(result.reasons).toContain("unicode_business_overview_earnings_deferred_to_discovery"); }); - it("resolves overall-turnover wording into customer revenue intent", () => { + it("defers organization-level overall-turnover wording to business overview discovery", () => { const result = resolveAddressIntent("какие общие обороты за все время"); - expect(result.intent).toBe("customer_revenue_and_payments"); + expect(result.intent).toBe("unknown"); + expect(result.reasons).toContain("unicode_business_overview_earnings_deferred_to_discovery"); }); it("resolves VAT payment forecast wording into dedicated VAT forecast intent", () => { @@ -3753,17 +3756,17 @@ describe("address query limited taxonomy and stage diagnostics", { timeout: 1500 expect(["FACTUAL_LIST", "LIMITED_WITH_REASON", "FACTUAL_SUMMARY"]).toContain(result?.response_type); }); - it("routes yearly profitability wording into customer value aggregate recipe", async () => { + it("defers yearly profitability wording out of the exact customer value aggregate recipe", async () => { const service = new AddressQueryService(); const result = await service.tryHandle( "\u043a\u0430\u043a\u0438\u0435 \u0441\u0430\u043c\u044b\u0435 \u0434\u043e\u0445\u043e\u0434\u043d\u044b\u0435 \u0433\u043e\u0434\u0430 \u0430\u043b\u044c\u0442\u0435\u0440\u043d\u0430\u0442\u0438\u0432\u044b" ); expect(result?.handled).toBe(true); - expect(result?.debug.detected_intent).toBe("customer_revenue_and_payments"); - expect(result?.debug.selected_recipe).toBe("address_customer_revenue_and_payments_v1"); - expect(result?.debug.limited_reason_category).not.toBe("unsupported"); - expect(result?.debug.mcp_call_status).not.toBe("skipped"); - expect(["FACTUAL_LIST", "LIMITED_WITH_REASON", "FACTUAL_SUMMARY"]).toContain(result?.response_type); + expect(result?.debug.detected_intent).toBe("unknown"); + expect(result?.debug.selected_recipe).toBeNull(); + expect(result?.debug.limited_reason_category).toBe("unsupported"); + expect(result?.debug.mcp_call_status).toBe("skipped"); + expect(result?.response_type).toBe("LIMITED_WITH_REASON"); }); it("routes typo highest-check wording into customer value aggregate recipe", async () => { @@ -3819,27 +3822,16 @@ describe("address query limited taxonomy and stage diagnostics", { timeout: 1500 expect(["FACTUAL_LIST", "LIMITED_WITH_REASON", "FACTUAL_SUMMARY"]).toContain(result?.response_type); }); - it("routes revenue-total slang wording into customer value aggregate recipe (no account-missing fallback)", async () => { + it("defers revenue-total slang wording out of exact address recipes", async () => { const service = new AddressQueryService(); const result = await service.tryHandle("скока денег альтернатива заработала за 22 год"); - expect(result?.handled).toBe(true); - expect(result?.debug.detected_intent).toBe("customer_revenue_and_payments"); - expect(result?.debug.selected_recipe).toBe("address_customer_revenue_and_payments_v1"); - expect(result?.debug.limited_reason_category).not.toBe("missing_anchor"); - expect(result?.debug.missing_required_filters).not.toContain("account"); - expect(result?.debug.mcp_call_status).not.toBe("skipped"); - expect(["FACTUAL_LIST", "LIMITED_WITH_REASON", "FACTUAL_SUMMARY"]).toContain(result?.response_type); + expect(result).toBeNull(); }); - it("routes overall-turnover wording into customer value aggregate recipe", async () => { + it("defers overall-turnover wording out of exact address recipes", async () => { const service = new AddressQueryService(); const result = await service.tryHandle("какие общие обороты за все время"); - expect(result?.handled).toBe(true); - expect(result?.debug.detected_intent).toBe("customer_revenue_and_payments"); - expect(result?.debug.selected_recipe).toBe("address_customer_revenue_and_payments_v1"); - expect(result?.debug.limited_reason_category).not.toBe("unsupported"); - expect(result?.debug.mcp_call_status).not.toBe("skipped"); - expect(["FACTUAL_LIST", "LIMITED_WITH_REASON", "FACTUAL_SUMMARY"]).toContain(result?.response_type); + expect(result).toBeNull(); }); it("routes VAT payment forecast wording into dedicated VAT forecast recipe", async () => { diff --git a/llm_normalizer/backend/tests/assistantMcpDiscoveryTurnInputAdapter.test.ts b/llm_normalizer/backend/tests/assistantMcpDiscoveryTurnInputAdapter.test.ts index 9373d75..756772d 100644 --- a/llm_normalizer/backend/tests/assistantMcpDiscoveryTurnInputAdapter.test.ts +++ b/llm_normalizer/backend/tests/assistantMcpDiscoveryTurnInputAdapter.test.ts @@ -1944,12 +1944,13 @@ describe("assistant MCP discovery turn input adapter", () => { expect(result.adapter_status).toBe("ready"); expect(result.should_run_discovery).toBe(true); - expect(result.semantic_data_need).toBe("counterparty value-flow evidence"); + expect(result.semantic_data_need).toBe("business overview evidence with bounded analyst interpretation"); + expect(result.data_need_graph?.business_fact_family).toBe("business_overview"); expect(result.turn_meaning_ref).toMatchObject({ - asked_domain_family: "counterparty_value", - asked_action_family: "turnover", + asked_domain_family: "business_overview", + asked_action_family: "broad_evaluation", explicit_organization_scope: orgName, - unsupported_but_understood_family: "counterparty_value_or_turnover", + unsupported_but_understood_family: "broad_business_evaluation", stale_replay_forbidden: true }); expect(result.turn_meaning_ref?.explicit_date_scope).toBeUndefined(); diff --git a/llm_normalizer/backend/tests/assistantTurnMeaningPolicy.test.ts b/llm_normalizer/backend/tests/assistantTurnMeaningPolicy.test.ts index e95865b..a459cc3 100644 --- a/llm_normalizer/backend/tests/assistantTurnMeaningPolicy.test.ts +++ b/llm_normalizer/backend/tests/assistantTurnMeaningPolicy.test.ts @@ -145,4 +145,27 @@ describe("assistantTurnMeaningPolicy", () => { expect(meaning.stale_replay_forbidden).toBe(true); expect(meaning.reason_codes).toContain("broad_business_evaluation_current_turn_signal"); }); + + it("treats organization-level earnings and best-year wording as business overview", () => { + const policy = buildPolicy({ + resolveAddressIntent: () => ({ intent: "customer_revenue_and_payments", confidence: "high" }) + }); + + const earnings = policy.resolveAssistantTurnMeaning({ + rawUserMessage: "сколько вообще денег мы заработали за все время?" + }); + expect(earnings.explicit_intent_candidate).toBeNull(); + expect(earnings.asked_domain_family).toBe("business_summary"); + expect(earnings.asked_action_family).toBe("broad_evaluation"); + expect(earnings.unsupported_but_understood_family).toBe("broad_business_evaluation"); + expect(earnings.stale_replay_forbidden).toBe(true); + + const bestYear = policy.resolveAssistantTurnMeaning({ + rawUserMessage: "какой у нас самый доходный год" + }); + expect(bestYear.explicit_intent_candidate).toBeNull(); + expect(bestYear.asked_domain_family).toBe("business_summary"); + expect(bestYear.asked_action_family).toBe("broad_evaluation"); + expect(bestYear.reason_codes).toContain("broad_business_evaluation_current_turn_signal"); + }); });