From f69393a887bdf92f709835482b12188a4099bf53 Mon Sep 17 00:00:00 2001 From: dctouch Date: Sun, 24 May 2026 17:52:57 +0300 Subject: [PATCH] =?UTF-8?q?=D0=94=D0=BE=D0=BA=D0=B0=D0=B7=D0=B0=D1=82?= =?UTF-8?q?=D1=8C=20=D0=BF=D0=BE=D0=BB=D0=B5=D0=B7=D0=BD=D0=BE=D1=81=D1=82?= =?UTF-8?q?=D1=8C=20margin-agent=20=D1=87=D0=B5=D1=80=D0=B5=D0=B7=20=D1=87?= =?UTF-8?q?=D0=B5=D1=81=D1=82=D0=BD=D1=8B=D0=B9=20boundary=20replay?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/orchestration/detector_registry.json | 6 +- docs/orchestration/issue_catalog.json | 3 + .../dist/services/addressIntentResolver.js | 3 +- .../services/addressInventoryIntentSignals.js | 3 +- .../dist/services/addressQueryClassifier.js | 17 +++ .../address_runtime/inventoryReplyBuilders.js | 26 +++++ .../src/services/addressIntentResolver.ts | 6 +- .../services/addressInventoryIntentSignals.ts | 6 +- .../src/services/addressQueryClassifier.ts | 18 +++ .../address_runtime/inventoryReplyBuilders.ts | 29 +++++ .../addressInventoryIntentSignals.test.ts | 17 +++ .../tests/addressQueryRuntimeM23.test.ts | 9 ++ .../addressReplyBuildersRegression.test.ts | 62 +++++++++++ scripts/domain_case_loop.py | 105 +++++++++++++++++- 14 files changed, 302 insertions(+), 8 deletions(-) diff --git a/docs/orchestration/detector_registry.json b/docs/orchestration/detector_registry.json index 313435d..96d2f61 100644 --- a/docs/orchestration/detector_registry.json +++ b/docs/orchestration/detector_registry.json @@ -52,7 +52,7 @@ "inputs": ["steps//output.md", "steps//turn.json"], "check": { "forbidden_patterns": [ - "(?i)(амортизац|объект\\s+ОС|основн(ые|ых)?\\s+средств|payment_document|settlement|банк|оплат[аы])" + "(?i)(амортизац|объект\\s+ОС|основн(ые|ых)?\\s+средств|payment_document|settlement)" ] } }, @@ -115,7 +115,9 @@ "issue_codes": ["margin_domain_leak_accounting_route"], "inputs": ["steps//output.md"], "check": { - "forbidden_patterns": ["(?i)(payment_document|банковск|плат[её]ж|оплат[аы]).{0,80}(марж|себестоим|валов)"] + "forbidden_patterns": [ + "(?i)(payment_document|банковск|плат[её]ж|оплат[аы]).{0,80}(достаточ|посчитал|рассчитал|является\\s+источник|как\\s+источник|на\\s+основании.{0,40}(марж|себестоим|валов))" + ] } }, "margin_os_amortization_leak": { diff --git a/docs/orchestration/issue_catalog.json b/docs/orchestration/issue_catalog.json index 222e740..dad5ba8 100644 --- a/docs/orchestration/issue_catalog.json +++ b/docs/orchestration/issue_catalog.json @@ -75,9 +75,12 @@ "detectors": [ "forbidden_margin_terms", "missing_revenue_cogs_margin_fields", + "margin_payment_document_false_source", "wrong_capability_family" ], "allowed_patch_targets": [ + "llm_normalizer/backend/src/services/addressQueryClassifier.ts", + "llm_normalizer/backend/src/services/addressInventoryIntentSignals.ts", "llm_normalizer/backend/src/services/addressIntentResolver.ts", "llm_normalizer/backend/src/services/addressCapabilityPolicy.ts", "llm_normalizer/backend/src/services/addressFilterExtractor.ts", diff --git a/llm_normalizer/backend/dist/services/addressIntentResolver.js b/llm_normalizer/backend/dist/services/addressIntentResolver.js index d5f5625..bfa795a 100644 --- a/llm_normalizer/backend/dist/services/addressIntentResolver.js +++ b/llm_normalizer/backend/dist/services/addressIntentResolver.js @@ -1672,7 +1672,8 @@ function hasNomenclatureMarginRankingSignal(text) { const hasNomenclatureCue = /(?:номенклатур|товар|позици|ассортимент|sku|item|product|goods)/iu.test(normalized); const hasMarginCue = /(?:прибыл|марж|рентаб|наценк|себестоим|выручк|profit|margin|profitability|gross\s+spread|cogs)/iu.test(normalized); const hasRankingCue = /(?:высок|низк|топ|сам(?:ая|ый|ое|ые|ой|ого|ому|ым|ых|ую)|больш|меньш|ранж|рейтинг|max|min|high|low|top|rank|best|worst)/iu.test(normalized); - return hasNomenclatureCue && hasMarginCue && hasRankingCue; + const hasCalculationCue = /(?:посчита\p{L}*|рассчита\p{L}*|расч[её]т\p{L}*|расчита\p{L}*|понять|calculate|compute)/iu.test(normalized); + return hasNomenclatureCue && hasMarginCue && (hasRankingCue || hasCalculationCue); } function hasVatPeriodInspectionBridgeSignal(text) { const normalized = String(text ?? "").trim().toLowerCase(); diff --git a/llm_normalizer/backend/dist/services/addressInventoryIntentSignals.js b/llm_normalizer/backend/dist/services/addressInventoryIntentSignals.js index 709f8db..a1ecd63 100644 --- a/llm_normalizer/backend/dist/services/addressInventoryIntentSignals.js +++ b/llm_normalizer/backend/dist/services/addressInventoryIntentSignals.js @@ -32,7 +32,8 @@ function hasInventoryMarginRankingSignal(text) { const hasNomenclatureCue = /(?:номенклатур|товар|позици|ассортимент|sku|item|product|goods)/iu.test(normalized); const hasMarginCue = /(?:прибыл|марж|рентаб|наценк|себестоим|выручк|profit|margin|profitability|gross\s+spread|cogs)/iu.test(normalized); const hasRankingCue = /(?:высок|низк|топ|сам(?:ая|ый|ое|ые|ой|ого|ому|ым|ых|ую)|больш|меньш|ранж|рейтинг|max|min|high|low|top|rank|best|worst)/iu.test(normalized); - return hasNomenclatureCue && hasMarginCue && hasRankingCue; + const hasCalculationCue = /(?:посчита\p{L}*|рассчита\p{L}*|расч[её]т\p{L}*|расчита\p{L}*|понять|calculate|compute)/iu.test(normalized); + return hasNomenclatureCue && hasMarginCue && (hasRankingCue || hasCalculationCue); } function hasInventoryOnHandSignal(text) { const hasColloquialStockSnapshotCue = /(?:что|С‡[еёо])\s+(?:Сѓ\s+нас\s+)?РЅР°\s+склад(?:Рµ|Сѓ|РѕРј|ах)(?=$|[\s,.;:!?])/iu.test(text); diff --git a/llm_normalizer/backend/dist/services/addressQueryClassifier.js b/llm_normalizer/backend/dist/services/addressQueryClassifier.js index 9877dbe..8c2d143 100644 --- a/llm_normalizer/backend/dist/services/addressQueryClassifier.js +++ b/llm_normalizer/backend/dist/services/addressQueryClassifier.js @@ -17,6 +17,11 @@ const ADDRESS_ACTION_TOKENS = [ "показ", "проверь", "провер", + "посчитай", + "посчитать", + "рассчитай", + "рассчитать", + "понять", "чекни", "чекн", "глянь", @@ -102,6 +107,10 @@ const ADDRESS_ENTITY_TOKENS = [ "чек", "доход", "выруч", + "прибыл", + "марж", + "рентаб", + "себестоим", "сделк", "бюджет", "топ", @@ -402,6 +411,14 @@ function detectAddressQuestionMode(userMessage) { const hasFollowupSignal = hasAddressFollowupSignal(text); const hasSelectedObjectInventoryFollowup = hasSelectedObjectInventoryFollowupSignal(text); const hasAccountCode = hasAccountCodeAnchor(text); + const hasInventoryProfitabilitySignal = (0, inventoryLifecycleCueHelpers_1.hasInventoryProfitabilityCue)(text); + if (hasInventoryProfitabilitySignal && hasAddressEntity && !hasDeepReasoning) { + return { + mode: "address_query", + confidence: "high", + reasons: ["inventory_profitability_signal_detected", "address_entity_detected"] + }; + } if (hasAddressAction && (hasAddressEntity || hasAccountCode) && !hasDeepReasoning) { return { mode: "address_query", diff --git a/llm_normalizer/backend/dist/services/address_runtime/inventoryReplyBuilders.js b/llm_normalizer/backend/dist/services/address_runtime/inventoryReplyBuilders.js index 1d284a6..8b9d398 100644 --- a/llm_normalizer/backend/dist/services/address_runtime/inventoryReplyBuilders.js +++ b/llm_normalizer/backend/dist/services/address_runtime/inventoryReplyBuilders.js @@ -92,6 +92,12 @@ function asksForInventoryMarginBasis(userMessage) { const text = String(userMessage ?? "").toLowerCase(); return (/(?:из\s+чего|как\s+(?:ты\s+)?(?:это\s+)?посчитал|какие\s+поля|чего\s+не\s+хватает|не\s+хватает|точн(?:ой|ая|ую)?\s+марж|basis|source|fields|calculated|missing)/iu.test(text) && /(?:марж|прибыл|себестоимост|выручк|margin|profit|cogs|revenue)/iu.test(text)); } +function asksInventoryMarginFromPaymentOrBank(userMessage) { + const text = String(userMessage ?? "").toLowerCase(); + return (/(?:марж|прибыл|рентаб|profit|margin)/iu.test(text) && + /(?:товар|номенклатур|inventory|item|sku)/iu.test(text) && + /(?:банк|банковск|выписк|плат[её]ж|оплат|payment|bank|statement)/iu.test(text)); +} function inventoryRowItemLabel(row, deps) { return deps.summarizeInventoryTraceRows([row]).item; } @@ -471,6 +477,26 @@ function composeInventoryReply(intent, rows, options, deps) { const totalSpread = totalRevenue - totalCostProxy; const topMarginEntry = highMargin[0] ?? null; const marginBasisRequested = asksForInventoryMarginBasis(options.userMessage); + const paymentOrBankFalseSourceRequested = asksInventoryMarginFromPaymentOrBank(options.userMessage); + if (paymentOrBankFalseSourceRequested) { + const lines = [ + "По оплатам и банку такой показатель нельзя честно подтвердить: платежи показывают денежный поток и факт оплаты, а не связь реализации с себестоимостью по номенклатуре." + ]; + (0, inventoryReplyPresentation_1.appendInventoryBulletSection)(lines, "Корректная база для маржинальности:", [ + "выручка реализации по номенклатуре;", + "себестоимостная база реализации по той же номенклатуре;", + "валовая разница и процент валовой маржи." + ]); + (0, inventoryReplyPresentation_1.appendInventoryBulletSection)(lines, "Что можно сделать дальше:", [ + `посчитать управленческий рейтинг по выручке и себестоимостной базе за период ${periodLabel};`, + "отдельно сверить оплаты и банк как денежный поток, но не использовать их как расчетную базу." + ]); + (0, inventoryReplyPresentation_1.appendInventoryBulletSection)(lines, "Граница ответа:", [ + "оплаты могут помочь сверить поступление денег, но сами по себе не подтверждают валовую прибыль по товарам;", + "строгий бухгалтерский расчет требует проводок реализации и себестоимости, а не только банковских движений." + ]); + return (0, replyContracts_1.buildFactualSummaryReply)(lines, (0, replyContracts_1.buildConfirmedBalanceSemantics)("medium", false)); + } if (confirmedEntries.length === 0) { const costBaseRowsRequested = asksForInventoryCostBaseRows(options.userMessage); const lines = [ diff --git a/llm_normalizer/backend/src/services/addressIntentResolver.ts b/llm_normalizer/backend/src/services/addressIntentResolver.ts index b7323a9..1e4e681 100644 --- a/llm_normalizer/backend/src/services/addressIntentResolver.ts +++ b/llm_normalizer/backend/src/services/addressIntentResolver.ts @@ -2167,7 +2167,11 @@ function hasNomenclatureMarginRankingSignal(text: string): boolean { /(?:высок|низк|топ|сам(?:ая|ый|ое|ые|ой|ого|ому|ым|ых|ую)|больш|меньш|ранж|рейтинг|max|min|high|low|top|rank|best|worst)/iu.test( normalized ); - return hasNomenclatureCue && hasMarginCue && hasRankingCue; + const hasCalculationCue = + /(?:посчита\p{L}*|рассчита\p{L}*|расч[её]т\p{L}*|расчита\p{L}*|понять|calculate|compute)/iu.test( + normalized + ); + return hasNomenclatureCue && hasMarginCue && (hasRankingCue || hasCalculationCue); } function hasVatPeriodInspectionBridgeSignal(text: string): boolean { diff --git a/llm_normalizer/backend/src/services/addressInventoryIntentSignals.ts b/llm_normalizer/backend/src/services/addressInventoryIntentSignals.ts index dd1c9f8..2a4f537 100644 --- a/llm_normalizer/backend/src/services/addressInventoryIntentSignals.ts +++ b/llm_normalizer/backend/src/services/addressInventoryIntentSignals.ts @@ -53,7 +53,11 @@ function hasInventoryMarginRankingSignal(text: string): boolean { /(?:высок|низк|топ|сам(?:ая|ый|ое|ые|ой|ого|ому|ым|ых|ую)|больш|меньш|ранж|рейтинг|max|min|high|low|top|rank|best|worst)/iu.test( normalized ); - return hasNomenclatureCue && hasMarginCue && hasRankingCue; + const hasCalculationCue = + /(?:посчита\p{L}*|рассчита\p{L}*|расч[её]т\p{L}*|расчита\p{L}*|понять|calculate|compute)/iu.test( + normalized + ); + return hasNomenclatureCue && hasMarginCue && (hasRankingCue || hasCalculationCue); } function hasInventoryOnHandSignal(text: string): boolean { diff --git a/llm_normalizer/backend/src/services/addressQueryClassifier.ts b/llm_normalizer/backend/src/services/addressQueryClassifier.ts index bbf7338..fab3b99 100644 --- a/llm_normalizer/backend/src/services/addressQueryClassifier.ts +++ b/llm_normalizer/backend/src/services/addressQueryClassifier.ts @@ -22,6 +22,11 @@ const ADDRESS_ACTION_TOKENS = [ "показ", "проверь", "провер", + "посчитай", + "посчитать", + "рассчитай", + "рассчитать", + "понять", "чекни", "чекн", "глянь", @@ -108,6 +113,10 @@ const ADDRESS_ENTITY_TOKENS = [ "чек", "доход", "выруч", + "прибыл", + "марж", + "рентаб", + "себестоим", "сделк", "бюджет", "топ", @@ -427,6 +436,15 @@ export function detectAddressQuestionMode(userMessage: string): AddressModeDetec const hasFollowupSignal = hasAddressFollowupSignal(text); const hasSelectedObjectInventoryFollowup = hasSelectedObjectInventoryFollowupSignal(text); const hasAccountCode = hasAccountCodeAnchor(text); + const hasInventoryProfitabilitySignal = hasInventoryProfitabilityCue(text); + + if (hasInventoryProfitabilitySignal && hasAddressEntity && !hasDeepReasoning) { + return { + mode: "address_query", + confidence: "high", + reasons: ["inventory_profitability_signal_detected", "address_entity_detected"] + }; + } if (hasAddressAction && (hasAddressEntity || hasAccountCode) && !hasDeepReasoning) { return { diff --git a/llm_normalizer/backend/src/services/address_runtime/inventoryReplyBuilders.ts b/llm_normalizer/backend/src/services/address_runtime/inventoryReplyBuilders.ts index 71e71ec..8558aff 100644 --- a/llm_normalizer/backend/src/services/address_runtime/inventoryReplyBuilders.ts +++ b/llm_normalizer/backend/src/services/address_runtime/inventoryReplyBuilders.ts @@ -179,6 +179,15 @@ function asksForInventoryMarginBasis(userMessage: string | null | undefined): bo ); } +function asksInventoryMarginFromPaymentOrBank(userMessage: string | null | undefined): boolean { + const text = String(userMessage ?? "").toLowerCase(); + return ( + /(?:марж|прибыл|рентаб|profit|margin)/iu.test(text) && + /(?:товар|номенклатур|inventory|item|sku)/iu.test(text) && + /(?:банк|банковск|выписк|плат[её]ж|оплат|payment|bank|statement)/iu.test(text) + ); +} + interface InventoryMarginRankingEntry { item: string; revenue: number; @@ -649,6 +658,26 @@ export function composeInventoryReply( const totalSpread = totalRevenue - totalCostProxy; const topMarginEntry = highMargin[0] ?? null; const marginBasisRequested = asksForInventoryMarginBasis(options.userMessage); + const paymentOrBankFalseSourceRequested = asksInventoryMarginFromPaymentOrBank(options.userMessage); + if (paymentOrBankFalseSourceRequested) { + const lines = [ + "По оплатам и банку такой показатель нельзя честно подтвердить: платежи показывают денежный поток и факт оплаты, а не связь реализации с себестоимостью по номенклатуре." + ]; + appendInventoryBulletSection(lines, "Корректная база для маржинальности:", [ + "выручка реализации по номенклатуре;", + "себестоимостная база реализации по той же номенклатуре;", + "валовая разница и процент валовой маржи." + ]); + appendInventoryBulletSection(lines, "Что можно сделать дальше:", [ + `посчитать управленческий рейтинг по выручке и себестоимостной базе за период ${periodLabel};`, + "отдельно сверить оплаты и банк как денежный поток, но не использовать их как расчетную базу." + ]); + appendInventoryBulletSection(lines, "Граница ответа:", [ + "оплаты могут помочь сверить поступление денег, но сами по себе не подтверждают валовую прибыль по товарам;", + "строгий бухгалтерский расчет требует проводок реализации и себестоимости, а не только банковских движений." + ]); + return buildFactualSummaryReply(lines, buildConfirmedBalanceSemantics("medium", false)); + } if (confirmedEntries.length === 0) { const costBaseRowsRequested = asksForInventoryCostBaseRows(options.userMessage); const lines: string[] = [ diff --git a/llm_normalizer/backend/tests/addressInventoryIntentSignals.test.ts b/llm_normalizer/backend/tests/addressInventoryIntentSignals.test.ts index 828dfed..0a585ba 100644 --- a/llm_normalizer/backend/tests/addressInventoryIntentSignals.test.ts +++ b/llm_normalizer/backend/tests/addressInventoryIntentSignals.test.ts @@ -27,6 +27,23 @@ describe("addressInventoryIntentSignals", () => { expect(result?.reasons).toContain("inventory_margin_ranking_signal_detected"); }); + it("classifies calculate-margin nomenclature wording with false-source guards as margin ranking", () => { + const result = resolveInventoryAddressIntent( + "\u041f\u043e\u0441\u0447\u0438\u0442\u0430\u0439 \u043c\u0430\u0440\u0436\u0438\u043d\u0430\u043b\u044c\u043d\u043e\u0441\u0442\u044c \u0442\u043e\u0432\u0430\u0440\u043d\u043e\u0439 \u043d\u043e\u043c\u0435\u043d\u043a\u043b\u0430\u0442\u0443\u0440\u044b \u0437\u0430 2020 \u0433\u043e\u0434, \u043d\u0435 \u041e\u0421 \u0438 \u043d\u0435 \u0430\u043c\u043e\u0440\u0442\u0438\u0437\u0430\u0446\u0438\u044e." + ); + + expect(result?.intent).toBe("inventory_margin_ranking_for_nomenclature"); + expect(result?.reasons).toContain("inventory_margin_ranking_signal_detected"); + }); + + it("keeps payment-bank false-source wording in margin contour", () => { + const result = resolveAddressIntent( + "\u041c\u043e\u0436\u043d\u043e \u0431\u044b\u0441\u0442\u0440\u043e \u043f\u043e\u043d\u044f\u0442\u044c \u043c\u0430\u0440\u0436\u0438\u043d\u0430\u043b\u044c\u043d\u043e\u0441\u0442\u044c \u0442\u043e\u0432\u0430\u0440\u043e\u0432 \u0437\u0430 2020 \u0433\u043e\u0434 \u043f\u043e \u043e\u043f\u043b\u0430\u0442\u0430\u043c \u0438 \u0431\u0430\u043d\u043a\u0443?" + ); + + expect(result.intent).toBe("inventory_margin_ranking_for_nomenclature"); + }); + it("classifies selected-object purchase provenance wording through the extracted inventory owner", () => { const result = resolveInventoryAddressIntent("selected object supplier provenance"); diff --git a/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts b/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts index 7837904..e881557 100644 --- a/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts +++ b/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts @@ -131,6 +131,15 @@ describe("address query shape classifier", () => { expect(result.mode).toBe("address_query"); }); + it("keeps calculate margin wording in address lane before bank false-source cues can steal it", () => { + const result = detectAddressQuestionMode( + "\u041c\u043e\u0436\u043d\u043e \u0431\u044b\u0441\u0442\u0440\u043e \u043f\u043e\u043d\u044f\u0442\u044c \u043c\u0430\u0440\u0436\u0438\u043d\u0430\u043b\u044c\u043d\u043e\u0441\u0442\u044c \u0442\u043e\u0432\u0430\u0440\u043e\u0432 \u0437\u0430 2020 \u0433\u043e\u0434 \u043f\u043e \u043e\u043f\u043b\u0430\u0442\u0430\u043c \u0438 \u0431\u0430\u043d\u043a\u0443?" + ); + + expect(result.mode).toBe("address_query"); + expect(result.reasons).toContain("inventory_profitability_signal_detected"); + }); + it("extracts item anchor for inventory provenance questions", () => { const filters = extractAddressFilters( "От какого поставщика куплен товар Шкаф картотечный?", diff --git a/llm_normalizer/backend/tests/addressReplyBuildersRegression.test.ts b/llm_normalizer/backend/tests/addressReplyBuildersRegression.test.ts index 0065cd3..8cae26b 100644 --- a/llm_normalizer/backend/tests/addressReplyBuildersRegression.test.ts +++ b/llm_normalizer/backend/tests/addressReplyBuildersRegression.test.ts @@ -579,4 +579,66 @@ describe("address reply builders regressions", () => { expect(result?.text).not.toContain("входящих денежных поступлений"); expect(result?.text).not.toContain("амортизац"); }); + + it("answers payment-bank margin false-source questions as a boundary before any ranking", () => { + const result = composeInventoryReply( + "inventory_margin_ranking_for_nomenclature", + [ + { + amount: 1000, + quantity: 1, + item: "Товар A", + period: "2020-05-20", + registrator: "Реализация товаров" + } as any, + { + amount: 400, + quantity: 1, + item: "Товар A", + period: "2020-01-10", + registrator: "Поступление товаров" + } as any + ], + { + userMessage: + "Можно быстро понять маржинальность товаров за 2020 год по оплатам и банку?", + periodFrom: "2020-01-01", + periodTo: "2020-12-31" + }, + { + resolvePayablesAsOfDate: () => "2020-12-31", + buildInventoryOnHandAggregate: () => [], + uniqueStrings: (values: string[]) => Array.from(new Set(values)), + formatDateRu: (value: string) => value, + formatNumberWithDots: (value: number, fractionDigits = 0) => value.toFixed(fractionDigits), + formatMoneyRub: (value: number) => `${value} ₽`, + isInventoryPurchaseMovement: (row: any) => String(row.registrator ?? "").includes("Поступление"), + summarizeInventoryTraceRows: (rows: any[]) => ({ + item: rows[0]?.item ?? null, + warehouses: [], + organizations: [], + counterparties: [], + documents: [], + firstPeriod: null, + lastPeriod: null, + totalAmount: 0 + }), + formatInventoryTraceRows: () => [], + hasInventoryPurchaseDateActionFocus: () => false, + inventoryTraceDateLabel: () => "", + extractInventoryCounterpartyCandidates: () => [], + buildInventoryAgingByItemAggregate: () => [], + formatInventoryAgingRows: () => [], + isInventorySaleMovement: (row: any) => String(row.registrator ?? "").includes("Реализация") + } + ); + + expect(result?.text.split("\n")[0]).toContain("По оплатам и банку"); + expect(result?.text.split("\n")[0]).toContain("нельзя честно подтвердить"); + expect(result?.text).toContain("выручка реализации"); + expect(result?.text).toContain("себестоимостная база"); + expect(result?.text).toContain("Что можно сделать дальше"); + expect(result?.text).not.toContain("Самая маржинальная позиция"); + expect(result?.text).not.toMatch(/(?:оплат[аы]|банк|payment_document).{0,80}(?:источник|достаточ|посчитал|марж[ау])/iu); + }); }); diff --git a/scripts/domain_case_loop.py b/scripts/domain_case_loop.py index bb432e7..ed06c4f 100644 --- a/scripts/domain_case_loop.py +++ b/scripts/domain_case_loop.py @@ -256,6 +256,9 @@ GUARDED_INSUFFICIENCY_PRIMARY_MARKERS = ( "\u0442\u043e\u0447\u043d\u044b\u0435", "\u043d\u0435 \u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434\u0435\u043d", "\u043d\u0435 \u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434\u0451\u043d", + "\u043d\u0435 \u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434\u0430", + "\u043d\u0435\u043b\u044c\u0437\u044f \u0447\u0435\u0441\u0442\u043d\u043e \u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0434", + "\u043d\u0435\u043b\u044c\u0437\u044f \u043a\u043e\u0440\u0440\u0435\u043a\u0442\u043d\u043e \u043e\u043f\u0440\u0435\u0434\u0435\u043b", ) GUARDED_INSUFFICIENCY_LIMITATION_MARKERS = ( "\u043f\u0440\u0435\u0434\u0432\u0430\u0440\u0438\u0442\u0435\u043b\u044c\u043d", @@ -265,12 +268,16 @@ GUARDED_INSUFFICIENCY_LIMITATION_MARKERS = ( "\u043d\u0435 \u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434\u0451\u043d\u043d\u043e\u0435 \u0441\u0430\u043b\u044c\u0434\u043e", "\u043d\u0435 \u0434\u043e\u043a\u0430\u0437\u044b\u0432\u0430\u0435\u0442 \u043e\u0441\u0442\u0430\u0442\u043e\u043a", "\u043d\u0435 \u0444\u0438\u043d\u0430\u043b\u044c\u043d\u044b\u0439 \u0440\u0435\u0435\u0441\u0442\u0440", + "\u0433\u0440\u0430\u043d\u0438\u0446\u0430 \u043e\u0442\u0432\u0435\u0442\u0430", + "\u0440\u0430\u0441\u0447\u0435\u0442\u043d\u0443\u044e \u0431\u0430\u0437\u0443", + "\u0440\u0430\u0441\u0447\u0451\u0442\u043d\u0443\u044e \u0431\u0430\u0437\u0443", ) GUARDED_INSUFFICIENCY_RESULT_MODES = {"heuristic_candidates"} GUARDED_INSUFFICIENCY_TRUTH_MODES = {"limited"} GUARDED_INSUFFICIENCY_ANSWER_SHAPES = {"limited_with_reason"} BUSINESS_EXPECTED_RESULT_MODES = { "clarification_required", + "honest_boundary_with_next_action", "limited_accounting_answer", "evidence_or_honest_boundary", "ranking_or_limited_accounting_answer", @@ -966,6 +973,23 @@ def is_margin_profitability_step(step_output: dict[str, Any]) -> bool: question = str(step_output.get("question_resolved") or step_output.get("question_template") or "") if is_nomenclature_margin_context(step_output, question): return True + margin_context_values = [ + str(step_output.get("scenario_id") or ""), + str(step_output.get("target_id") or ""), + str(step_output.get("fix_goal") or ""), + str(step_output.get("business_mismatch") or ""), + str(step_output.get("minimal_patch_direction") or ""), + *normalize_string_list(step_output.get("signals")), + ] + margin_context = " ".join(margin_context_values).casefold() + if ( + "inventory_margin_ranking_for_nomenclature" in margin_context + or "inventory_inventory_margin_ranking_for_nomenclature" in margin_context + or "margin_false_source" in margin_context + or "payment_false_source" in margin_context + or ("margin" in margin_context and ("оплат" in margin_context or "банк" in margin_context)) + ): + return True tokens = [ str(step_output.get("expected_business_answer_contract") or ""), str(step_output.get("required_answer_contract") or ""), @@ -978,6 +1002,8 @@ def derive_repair_issue_code(step_output: dict[str, Any], problem_type: str) -> violated = normalize_string_list(step_output.get("violated_invariants")) if "domain_leak_accounting_route" in violated and is_margin_profitability_step(step_output): return "margin_domain_leak_accounting_route" + if is_margin_profitability_step(step_output) and problem_type in {"route_gap", "capability_gap", "evidence_gap"}: + return "margin_domain_leak_accounting_route" for issue_code in ( "technical_garbage_in_answer", "business_direct_answer_missing", @@ -2141,6 +2167,36 @@ def is_nomenclature_margin_context(step_state: dict[str, Any], question: str) -> return has_subject and has_margin_signal and has_rank_signal +def is_margin_false_source_boundary_answer(step_state: dict[str, Any], question: str, assistant_text: str) -> bool: + tags = set(normalize_string_list(step_state.get("semantic_tags"))) + question_text = _review_text(question) + answer_text = _review_text(assistant_text) + has_false_source_question = ( + "payment_false_source" in tags + or ( + ("марж" in question_text or "прибыл" in question_text) + and ("товар" in question_text or "номенклатур" in question_text) + and ("оплат" in question_text or "банк" in question_text) + ) + ) + if not has_false_source_question: + return False + rejects_source = any( + marker in answer_text + for marker in ( + "нельзя", + "не подтвержд", + "не подтвержда", + "не использовать", + "не является", + "не расчет", + "не расчёт", + ) + ) + names_correct_basis = "выруч" in answer_text and "себестоим" in answer_text + return rejects_source and names_correct_basis + + def build_business_first_review(step_state: dict[str, Any]) -> dict[str, Any]: question = str(step_state.get("question_resolved") or step_state.get("question_template") or "").strip() assistant_text = str(step_state.get("assistant_text") or "") @@ -2171,11 +2227,18 @@ def build_business_first_review(step_state: dict[str, Any]) -> dict[str, Any]: limited_answer = _has_any_marker(assistant_text, BUSINESS_LIMITED_ANSWER_MARKERS) has_next_action = _has_any_marker(assistant_text, BUSINESS_NEXT_ACTION_MARKERS) nomenclature_margin_context = is_nomenclature_margin_context(step_state, question) - wrong_margin_domain_hits = ( + raw_wrong_margin_domain_hits = ( _marker_hits(assistant_text, NOMENCLATURE_MARGIN_WRONG_DOMAIN_ANSWER_MARKERS) if nomenclature_margin_context else [] ) + if raw_wrong_margin_domain_hits and is_margin_false_source_boundary_answer(step_state, question, assistant_text): + allowed_false_source_boundary_hits = {"банковск", "списание с расчетного", "списание с расчётного"} + wrong_margin_domain_hits = [ + hit for hit in raw_wrong_margin_domain_hits if hit not in allowed_false_source_boundary_hits + ] + else: + wrong_margin_domain_hits = raw_wrong_margin_domain_hits margin_contract_hits = ( _marker_hits(assistant_text, NOMENCLATURE_MARGIN_EXPECTED_ANSWER_MARKERS) if nomenclature_margin_context @@ -2440,6 +2503,21 @@ def business_expected_result_mode_matches(expected_result_mode: str, step_state: and reply_type in {"partial_coverage", "factual", "factual_with_explanation"} ) + if expected_result_mode == "honest_boundary_with_next_action": + business_review = step_state.get("business_first_review") if isinstance(step_state.get("business_first_review"), dict) else {} + return ( + clean_business_review + and bool(assistant_text) + and bool(business_review.get("next_action_present")) + and ( + truth_mode in GUARDED_INSUFFICIENCY_TRUTH_MODES + or answer_shape in GUARDED_INSUFFICIENCY_ANSWER_SHAPES + or step_state.get("balance_confirmed") is False + or is_margin_false_source_boundary_answer(step_state, str(step_state.get("question_resolved") or ""), assistant_text) + ) + and reply_type in {"partial_coverage", "factual", "factual_with_explanation"} + ) + if expected_result_mode == "ranking_or_limited_accounting_answer": return ( clean_business_review @@ -4357,6 +4435,16 @@ def normalize_analyst_priority_repair_target(raw_target: dict[str, Any], index: if not root_cause_layers: root_cause_layers = [problem_type] issue_code = str(raw_target.get("issue_code") or problem_type or "other").strip() + issue_probe = { + **raw_target, + "scenario_id": scenario_id, + "target_id": f"{scenario_id}:{step_id}", + "problem_type": problem_type, + "root_cause_layers": root_cause_layers, + "fix_goal": fix_goal, + } + if issue_code in {"route_gap", "capability_gap", "evidence_gap"} and is_margin_profitability_step(issue_probe): + issue_code = "margin_domain_leak_accounting_route" catalog_entry = issue_catalog_entry(issue_code) return { "issue_code": issue_code, @@ -5212,6 +5300,18 @@ def build_issue_catalog_snapshot(repair_targets: dict[str, Any], catalog: dict[s } +def detector_evidence_paths_for_target(target: dict[str, Any]) -> list[str]: + explicit = normalize_string_list(target.get("evidence_paths")) + if explicit: + return explicit + refs = target.get("artifact_refs") if isinstance(target.get("artifact_refs"), dict) else {} + step_state_path = str(refs.get("step_state_json") or "").strip() + if not step_state_path: + return [] + step_state = Path(step_state_path) + return [str(step_state.with_name("output.md")), str(step_state.with_name("turn.json"))] + + def build_detector_candidates(repair_targets: dict[str, Any], catalog: dict[str, Any] | None = None) -> dict[str, Any]: source = catalog if isinstance(catalog, dict) else load_issue_catalog() issues = source.get("issues") if isinstance(source.get("issues"), dict) else {} @@ -5225,6 +5325,7 @@ def build_detector_candidates(repair_targets: dict[str, Any], catalog: dict[str, detectors = normalize_string_list(entry.get("detectors")) if not detectors and issue_code: detectors = [f"{issue_code}_detector"] + evidence_paths = detector_evidence_paths_for_target(target) for detector in detectors: key = (issue_code, detector) if key in seen: @@ -5236,7 +5337,7 @@ def build_detector_candidates(repair_targets: dict[str, Any], catalog: dict[str, "detector": detector, "severity": target.get("severity"), "sample_target_id": target.get("target_id"), - "evidence_paths": target.get("evidence_paths") or [], + "evidence_paths": evidence_paths, } ) return {