From 0fb5190cbd7d443d10886eba8409e8affe3e8c8e Mon Sep 17 00:00:00 2001 From: dctouch Date: Sat, 18 Apr 2026 23:53:20 +0300 Subject: [PATCH] =?UTF-8?q?=D0=90=D1=80=D1=85=D0=B8=D1=82=D0=B5=D0=BA?= =?UTF-8?q?=D1=82=D1=83=D1=80=D0=B0:=20=D0=B2=D1=8B=D0=BD=D0=B5=D1=81?= =?UTF-8?q?=D1=82=D0=B8=20grounded=20answer-inspection=20=D0=B2=20shared?= =?UTF-8?q?=20policy=20=D0=B8=20=D0=BE=D0=B1=D0=BD=D0=BE=D0=B2=D0=B8=D1=82?= =?UTF-8?q?=D1=8C=20=D1=81=D1=82=D0=B0=D1=82=D1=83=D1=81=20turnaround?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../08 - current_status_audit_2026-04-17.md | 53 ++++++++++--------- ...ontinuity_stabilization_plan_2026-04-17.md | 9 ++++ ..._multidomain_readiness_audit_2026-04-18.md | 36 +++++++++++-- .../11 - architecture_turnaround/README.md | 31 +++++++---- .../assistantLivingChatRuntimeAdapter.js | 7 +-- .../services/assistantMemoryRecapPolicy.js | 29 ++++++++++ .../assistantLivingChatRuntimeAdapter.ts | 10 ++-- .../services/assistantMemoryRecapPolicy.ts | 40 ++++++++++++++ .../assistantLivingChatRuntimeAdapter.test.ts | 17 +++--- .../tests/assistantMemoryRecapPolicy.test.ts | 37 +++++++++++++ 10 files changed, 215 insertions(+), 54 deletions(-) diff --git a/docs/ARCH/11 - architecture_turnaround/08 - current_status_audit_2026-04-17.md b/docs/ARCH/11 - architecture_turnaround/08 - current_status_audit_2026-04-17.md index bcfffdd..0ac7249 100644 --- a/docs/ARCH/11 - architecture_turnaround/08 - current_status_audit_2026-04-17.md +++ b/docs/ARCH/11 - architecture_turnaround/08 - current_status_audit_2026-04-17.md @@ -16,22 +16,24 @@ It is the current-state audit that answers: This snapshot is based on: -- `graphify-out/GRAPH_REPORT.md` rebuilt on `2026-04-17` +- `graphify-out/GRAPH_REPORT.md` rebuilt on `2026-04-18` - current owner modules in `llm_normalizer/backend/src/services/` - current scenario acceptance scripts under `scripts/` - current AGENT semantic source catalog under `docs/orchestration/` - live replay comparison between: - - `address_truth_harness_phase5_company_selection_and_activity_age_live_20260417_iter6` - - `address_truth_harness_phase7_meta_domain_mix_live_20260417` + - `address_truth_harness_phase12_wider_saved_session_pool_live_20260418_rerun10` + - `address_truth_harness_phase14_counterparty_tail_resume_live_20260418_rerun2` + - `address_truth_harness_phase15_answer_inspection_followup_live_20260418_rerun7` - [10 - regression_breakpoint_analysis_2026-04-17.md](./10%20-%20regression_breakpoint_analysis_2026-04-17.md) +- [11 - continuity_stabilization_plan_2026-04-17.md](./11%20-%20continuity_stabilization_plan_2026-04-17.md) ## Graph Snapshot Latest graph rebuild: -- `5312 nodes` -- `11408 edges` -- `136 communities` +- `5352 nodes` +- `11506 edges` +- `134 communities` Most relevant current god nodes for turnaround `11`: @@ -130,9 +132,9 @@ This is enough to build targeted semantic packs that are not single-domain toy s ## Honest Phase Status -Turnaround implementation progress: `~88%` +Turnaround implementation progress: `~92%` -Pre-expansion readiness: `~62%` +Pre-expansion readiness: `~64%` This split is intentional. @@ -158,7 +160,7 @@ Reason: ### Phase 2. State And Transition Contracts -Status: `74%` +Status: `84%` Reason: @@ -168,9 +170,9 @@ Reason: Remaining debt: -- there is still no single authoritative continuity contract for live mixed sessions; +- there is still no single authoritative continuity contract for every live mixed session path; - continuity priority is still split across route policy, transition policy, recap policy, navigation state, and coordinator glue; -- the mixed replay can still lose the active frame and fall into clarification or wrong-family reuse. +- the flagship repaired chains are now stable, but more non-flagship saved-session paths still need proof before this phase can be treated as expansion-safe. ### Phase 3. Capability Contracts @@ -211,7 +213,7 @@ Remaining debt: ### Phase 5. AssistantService Extraction -Status: `78%` +Status: `81%` Reason: @@ -240,7 +242,7 @@ Remaining debt: ### Phase 7. Scenario Acceptance As Primary Gate -Status: `68%` +Status: `79%` Reason: @@ -250,25 +252,26 @@ Reason: Remaining debt: -- narrow packs and seam tests are much stronger than before, but mixed saved-session runtime is still under-protected; -- the current phase7 mixed replay fails on continuity-critical edges, so acceptance cannot yet be treated as expansion-safe; -- coverage breadth should continue to grow as new domain slices are hardened, but stability of the existing mixed runtime is the immediate gate. +- narrow packs and seam tests are much stronger than before, and the main saved-session flagship replay family is now green; +- acceptance is no longer failing on the original continuity-critical edges, but replay breadth is still under the intended multi-domain blast radius; +- coverage breadth should continue to grow across additional mixed trajectories before expansion is treated as low-risk. ## Current Breakpoint Evidence -The strongest current architectural reading is not "a few routes regressed". +The strongest current architectural reading is no longer "the system still collapses on the original mixed continuity breakpoint". -It is: +It is now: -- `phase5_company_selection_and_activity_age` is green end-to-end; -- `phase7_meta_domain_mix` still fails on root inventory, selected-object continuity, same-date restore, and cross-domain same-date pivot; -- therefore the core capability families still exist, but mixed-session continuity authority is not stable. +- `phase12_wider_saved_session_pool` is green end-to-end on the broader flagship saved-session family; +- `phase14_counterparty_tail_resume` is green on a different late-session counterparty/inventory/activity contour; +- `phase15_answer_inspection_followup` is green on grounded self-correction plus neighboring VAT bridge continuity; +- therefore the original collapse has been materially repaired, and the main remaining risk has shifted from acute failure to incomplete generalization. In practical terms, the active breakpoint is now: -- owner extraction has progressed far enough to split decisions across multiple layers; -- saved-session mixed runtime still does not have one governing continuity authority; -- clarification and recap can therefore outrank or outrun the real grounded business thread. +- owner extraction has progressed far enough to make the system materially safer than before; +- saved-session mixed runtime still does not have one fully final governing authority consumed uniformly across every hot path; +- replay breadth is now the main honest gate before multi-domain expansion, not the old flagship continuity collapse itself. ## What Has Improved Relative To The Original State @@ -304,7 +307,7 @@ The project now has real route, transition, recap, and boundary owners, but the from one governing contract. -This is the highest-risk debt before domain expansion. +This is still the highest-risk debt before domain expansion, even though the repaired replay families are now green. ### 2. `assistantService.ts` is still too large diff --git a/docs/ARCH/11 - architecture_turnaround/11 - continuity_stabilization_plan_2026-04-17.md b/docs/ARCH/11 - architecture_turnaround/11 - continuity_stabilization_plan_2026-04-17.md index f0c2a00..9030fee 100644 --- a/docs/ARCH/11 - architecture_turnaround/11 - continuity_stabilization_plan_2026-04-17.md +++ b/docs/ARCH/11 - architecture_turnaround/11 - continuity_stabilization_plan_2026-04-17.md @@ -391,6 +391,15 @@ Still open after the accepted phase12 replay: - self-correction over a previous exact answer can now coexist with selected-object continuity instead of breaking the session into unsupported chat; - the neighboring bridge `selected-item trace -> VAT on purchase date` remains alive after the inspection turn, which proves that answer inspection no longer tears down the active business frame; - live replay `address_truth_harness_phase15_answer_inspection_followup_live_20260418_rerun5` is accepted `9/9`, which is the critical proof that this inspection-follow-up contour now survives as a real saved-session path instead of a one-off manual rescue. +- the next living-chat convergence pass now removes one more local owner from the grounded contextual reply layer: + - answer-inspection follow-ups are no longer interpreted only inside `assistantLivingChatRuntimeAdapter` via its own local selected-object reply builder; + - `assistantMemoryRecapPolicy` now owns all three grounded contextual deterministic reply classes used by living-chat: + - inventory-history capability follow-up; + - memory recap over grounded address context; + - answer inspection over the previous grounded selected-object answer; + - `resolveAssistantLivingChatMemoryContext(...)` now carries explicit `contextualAnswerInspectionFollowup` plus the grounded address debug that should be inspected, so living-chat reads one shared policy context instead of reconstructing this class from raw mode reason and direct continuity fields inline; + - this matters because living-chat is now less of a hidden parallel owner of grounded session semantics, and future answer-inspection / recap / capability follow-up fixes can land in one shared policy seam instead of splitting again across adapter-local builders; + - targeted recap and living-chat runtime tests stay green after this move, and live replay `address_truth_harness_phase15_answer_inspection_followup_live_20260418_rerun7` remains accepted `9/9`, which is the critical proof that the policy convergence did not reopen the phase15 contour. ## Next Execution Slice (2026-04-18) diff --git a/docs/ARCH/11 - architecture_turnaround/13 - pre_multidomain_readiness_audit_2026-04-18.md b/docs/ARCH/11 - architecture_turnaround/13 - pre_multidomain_readiness_audit_2026-04-18.md index 8c76d7d..2ebacf0 100644 --- a/docs/ARCH/11 - architecture_turnaround/13 - pre_multidomain_readiness_audit_2026-04-18.md +++ b/docs/ARCH/11 - architecture_turnaround/13 - pre_multidomain_readiness_audit_2026-04-18.md @@ -21,11 +21,20 @@ Current verdict: - safe for continued hardening and controlled domain-by-domain expansion under replay gates; - not yet safe for wide parallel multi-agent domain expansion. +Current confidence snapshot: + +- turnaround implementation progress: `~92%` +- exit-from-danger-zone readiness: `~84%` +- pre-multidomain readiness: `~64%` +- latest graph snapshot: `5352 nodes`, `11506 edges`, `134 communities` + ## What Is Already True The following claims are now supported by code plus live replay evidence: -- phase7-phase11 mixed/manual replays are accepted on the repaired hot paths; +- phase12 flagship wider saved-session replay is accepted end-to-end; +- phase14 counterparty-tail late-session replay is accepted end-to-end; +- phase15 answer-inspection replay is accepted end-to-end; - continuity on validated inventory / VAT / counterparty / company-authority chains is materially stronger than before; - user-facing meta answers are significantly cleaner and no longer dominated by technical garbage; - the assistant no longer depends on the old ambient monolith behavior on the validated seams; @@ -52,6 +61,11 @@ This is much better than the old implicit monolith, but it still means: - the system relies on multiple synchronized interpretations of context instead of one final runtime authority object. +The important nuance now is: + +- this is no longer causing the original flagship collapse on the validated packs; +- it is still the main architectural reason broad future expansion remains risky. + ### 2. Core orchestration remains too concentrated The main pressure centers are still heavy: @@ -88,6 +102,12 @@ It is not yet broad enough for the intended next stage: This is the single biggest reason not to declare the architecture expansion-ready yet. +The current acceptance baseline is now strong enough to justify continued hardening without panic. + +It is not yet broad enough to justify saying: + +- "the orchestration layer is already platform-grade for many new domains at once." + ## Readiness Assessment ### Safe right now @@ -115,7 +135,7 @@ The system should not be considered ready for the next level until all of the fo ## Recommended Next Execution Sequence -### Pass 12. Continuity authority completion +### Pass 16. Continuity authority completion Goal: @@ -125,7 +145,7 @@ Target: - transition / route / clarification should consume one continuity snapshot before making divergent decisions. -### Pass 13. Wider saved-session acceptance pool +### Pass 17. Wider saved-session acceptance pool Goal: @@ -135,7 +155,7 @@ Target: - several saved sessions covering inventory, VAT, counterparty, payables/receivables, meta interrupts, and cross-domain pivots. -### Pass 14. Human answer shaping cleanup +### Pass 18. Human answer shaping cleanup Goal: @@ -145,7 +165,7 @@ Target: - product-quality business answers on already-correct truth paths. -### Pass 15. Coordinator pressure reduction +### Pass 19. Coordinator pressure reduction Goal: @@ -168,3 +188,9 @@ The correct reading is: - collapse averted; - stabilization real; - expansion still gated. + +More precisely: + +- the project has moved out of the dangerous regression zone; +- the next risk is not the old collapse repeating immediately, but overestimating general readiness before enough non-flagship saved-session proof exists; +- the right move is to keep hardening architecture-first and widen replay proof before calling the system multi-domain ready. diff --git a/docs/ARCH/11 - architecture_turnaround/README.md b/docs/ARCH/11 - architecture_turnaround/README.md index 3da2dd5..3da0280 100644 --- a/docs/ARCH/11 - architecture_turnaround/README.md +++ b/docs/ARCH/11 - architecture_turnaround/README.md @@ -45,21 +45,34 @@ It now documents a turnaround that is already operational in code, already mater Current honest status: -- turnaround implementation progress: `~90%` -- exit-from-danger-zone readiness: `~78%` -- pre-multidomain readiness: `~58%` -- graph snapshot after latest rebuild: `5339 nodes`, `11476 edges`, `134 communities` +- turnaround implementation progress: `~92%` +- exit-from-danger-zone readiness: `~84%` +- pre-multidomain readiness: `~64%` +- graph snapshot after latest rebuild: `5352 nodes`, `11506 edges`, `134 communities` - current breakpoint: - the validated hot paths are no longer structurally broken; - - but mixed continuity is still not governed by one fully central runtime authority; - - wider saved-session proof is still too narrow for low-risk multi-domain rollout; - - answer shaping is still heavier and more template-driven than the target product feel. + - flagship continuity collapse is no longer the primary risk; + - the main remaining risk is incomplete convergence toward one true runtime authority plus replay breadth still below the intended multi-domain blast radius; + - product shaping is now secondary debt, not the primary blocker. - main remaining architectural pressure: - no single fully authoritative continuity contract consumed by all hot runtime owners - residual coordinator/legacy pressure inside `assistantService.ts` - central domain-intent pressure inside `resolveAddressIntent()` + - replay breadth still narrower than the intended multi-domain rollout surface - remaining answer-semantics pressure inside `composeStage.ts` / `answerComposer.ts` +Latest live proof now includes: + +- `address_truth_harness_phase12_wider_saved_session_pool_live_20260418_rerun10` accepted `20/20` +- `address_truth_harness_phase14_counterparty_tail_resume_live_20260418_rerun2` accepted `10/10` +- `address_truth_harness_phase15_answer_inspection_followup_live_20260418_rerun7` accepted `9/9` + +Current architectural reading: + +- the system is already materially past the dangerous regression breakpoint; +- it is now safe for continued architecture hardening and controlled domain-by-domain enablement under replay gates; +- it is still not safe to declare broad low-risk multi-domain expansion. + For the detailed audit, current percentages, and remaining debt, read: - [08 - current_status_audit_2026-04-17.md](./08%20-%20current_status_audit_2026-04-17.md) @@ -118,13 +131,13 @@ and start being described as: - "a stateful exact-data assistant with explicit transition contracts and isolated truth gating." -As of `2026-04-18`, the project is already materially closer to the target description and no longer in the same acute collapse state, but mixed-session continuity is still not governed by one runtime authority strongly enough to justify low-risk multi-domain expansion. +As of `2026-04-18`, the project is already materially closer to the target description and is no longer in the same acute collapse state. The remaining blocker is no longer the original continuity failure itself, but the unfinished convergence toward one runtime authority plus still-insufficient replay breadth for low-risk multi-domain expansion. The biggest remaining blockers are: - split continuity ownership across route / transition / recap / coordinator glue; - saved-session acceptance still too narrow compared with the intended domain-expansion blast radius; -- clarification precedence still too strong in mixed sessions; +- clarification precedence is much better than before, but still not yet proven widely enough outside the repaired replay family; - residual `assistantService` overload; - central intent pressure in `resolveAddressIntent()`; - remaining answer-semantics pressure in `composeStage.ts` and `answerComposer.ts`. diff --git a/llm_normalizer/backend/dist/services/assistantLivingChatRuntimeAdapter.js b/llm_normalizer/backend/dist/services/assistantLivingChatRuntimeAdapter.js index 0c7096e..9b91c8c 100644 --- a/llm_normalizer/backend/dist/services/assistantLivingChatRuntimeAdapter.js +++ b/llm_normalizer/backend/dist/services/assistantLivingChatRuntimeAdapter.js @@ -142,9 +142,10 @@ async function runAssistantLivingChatRuntime(input) { }); const contextualInventoryHistoryCapabilityFollowup = memoryRecapContext.contextualInventoryHistoryCapabilityFollowup; const contextualMemoryRecapFollowup = memoryRecapContext.contextualMemoryRecapFollowup; + const contextualAnswerInspectionFollowup = memoryRecapContext.contextualAnswerInspectionFollowup; const lastGroundedInventoryAddressDebug = memoryRecapContext.lastGroundedInventoryAddressDebug; const lastMemoryAddressDebug = memoryRecapContext.lastMemoryAddressDebug; - const contextualAnswerInspectionFollowup = String(input.modeDecision?.reason ?? "") === "answer_inspection_followup_detected"; + const lastAnswerInspectionAddressDebug = memoryRecapContext.lastAnswerInspectionAddressDebug; if (capabilityMetaQuery && (destructiveSignal || dangerSignal)) { chatText = input.buildAssistantSafetyRefusalReply(); livingChatSource = "deterministic_safety_refusal"; @@ -210,8 +211,8 @@ async function runAssistantLivingChatRuntime(input) { livingChatSource = "deterministic_memory_recap_contract"; } else if (contextualAnswerInspectionFollowup) { - chatText = buildSelectedObjectAnswerInspectionReply({ - addressDebug: continuitySnapshot.lastGroundedItemAddressDebug ?? continuitySnapshot.lastGroundedAddressDebug, + chatText = (0, assistantMemoryRecapPolicy_1.buildSelectedObjectAnswerInspectionReply)({ + addressDebug: lastAnswerInspectionAddressDebug, toNonEmptyString: input.toNonEmptyString }); livingChatSource = "deterministic_answer_inspection_contract"; diff --git a/llm_normalizer/backend/dist/services/assistantMemoryRecapPolicy.js b/llm_normalizer/backend/dist/services/assistantMemoryRecapPolicy.js index b45f0af..fc2e782 100644 --- a/llm_normalizer/backend/dist/services/assistantMemoryRecapPolicy.js +++ b/llm_normalizer/backend/dist/services/assistantMemoryRecapPolicy.js @@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true }); exports.buildInventoryHistoryCapabilityFollowupReply = buildInventoryHistoryCapabilityFollowupReply; exports.buildAddressMemoryRecapReply = buildAddressMemoryRecapReply; +exports.buildSelectedObjectAnswerInspectionReply = buildSelectedObjectAnswerInspectionReply; exports.resolveAssistantLivingChatMemoryContext = resolveAssistantLivingChatMemoryContext; exports.createAssistantMemoryRecapPolicy = createAssistantMemoryRecapPolicy; const assistantContinuityPolicy_1 = require("./assistantContinuityPolicy"); @@ -175,9 +176,33 @@ function buildAddressMemoryRecapReply(input) { } return "Да, помню предыдущий адресный контур. Могу кратко напомнить, что мы уже подтвердили, или сразу продолжить следующий шаг."; } +function buildSelectedObjectAnswerInspectionReply(input) { + const contextFacts = (0, assistantContinuityPolicy_1.resolveAddressDebugContextFacts)(input.addressDebug, input.toNonEmptyString); + const itemLabel = contextFacts.item ?? "эта позиция"; + const detectedIntent = String(input.addressDebug?.detected_intent ?? ""); + if (detectedIntent === "inventory_sale_trace_for_item") { + return [ + `Да, если так прозвучало, это ошибка чтения ответа. В«${itemLabel}В» здесь РЅРµ контрагент, Р° сама позиция, РїРѕ которой РјС‹ смотрели продажу.`, + "Р’ предыдущем ответе СЏ показывал документы выбытия РїРѕ этой позиции. Покупатель РІ доступных данных отдельно РЅРµ выделен, поэтому назвать контрагента-покупателя СЏ там РЅРµ РјРѕРі.", + "Если хочешь, следующим шагом РјРѕРіСѓ отдельно проверить, РјРѕР¶РЅРѕ ли вытащить покупателя РїРѕ связанным документам реализации." + ].join(" "); + } + if (detectedIntent === "inventory_purchase_provenance_for_item" || + detectedIntent === "inventory_purchase_documents_for_item") { + return [ + `Да, если так прозвучало, это ошибка чтения ответа. В«${itemLabel}В» здесь РЅРµ контрагент, Р° сама позиция / номенклатура.`, + "Р’ предыдущем ответе речь шла Рѕ закупке этой позиции: СЏ перечислял поставщиков или закупочные документы РїРѕ ней, Р° РЅРµ называл саму позицию контрагентом." + ].join(" "); + } + return [ + `Да, если так прозвучало, это ошибка чтения ответа. В«${itemLabel}В» здесь РЅРµ контрагент, Р° выбранный объект разбора.`, + "РЇ сейчас уточняю именно смысл предыдущего grounded-ответа РїРѕ этой позиции, Р° РЅРµ запускаю новый адресный РїРѕРёСЃРє." + ].join(" "); +} function resolveAssistantLivingChatMemoryContext(input) { const contextualInventoryHistoryCapabilityFollowup = String(input.modeDecisionReason ?? "") === "inventory_history_capability_followup_detected"; const contextualMemoryRecapFollowup = String(input.modeDecisionReason ?? "") === "memory_recap_followup_detected"; + const contextualAnswerInspectionFollowup = String(input.modeDecisionReason ?? "") === "answer_inspection_followup_detected"; const continuity = (0, assistantContinuityPolicy_1.resolveAssistantContinuitySnapshot)({ sessionItems: input.sessionItems, toNonEmptyString @@ -185,10 +210,14 @@ function resolveAssistantLivingChatMemoryContext(input) { return { contextualInventoryHistoryCapabilityFollowup, contextualMemoryRecapFollowup, + contextualAnswerInspectionFollowup, lastGroundedInventoryAddressDebug: contextualInventoryHistoryCapabilityFollowup ? continuity.lastGroundedInventoryAddressDebug : null, lastMemoryAddressDebug: contextualMemoryRecapFollowup + ? continuity.lastGroundedItemAddressDebug ?? continuity.lastGroundedAddressDebug + : null, + lastAnswerInspectionAddressDebug: contextualAnswerInspectionFollowup ? continuity.lastGroundedItemAddressDebug ?? continuity.lastGroundedAddressDebug : null }; diff --git a/llm_normalizer/backend/src/services/assistantLivingChatRuntimeAdapter.ts b/llm_normalizer/backend/src/services/assistantLivingChatRuntimeAdapter.ts index dc2bafd..d7811f9 100644 --- a/llm_normalizer/backend/src/services/assistantLivingChatRuntimeAdapter.ts +++ b/llm_normalizer/backend/src/services/assistantLivingChatRuntimeAdapter.ts @@ -1,5 +1,6 @@ import { buildAddressMemoryRecapReply as buildAddressMemoryRecapReplyFromPolicy, + buildSelectedObjectAnswerInspectionReply as buildSelectedObjectAnswerInspectionReplyFromPolicy, buildInventoryHistoryCapabilityFollowupReply as buildInventoryHistoryCapabilityFollowupReplyFromPolicy, resolveAssistantLivingChatMemoryContext } from "./assistantMemoryRecapPolicy"; @@ -242,10 +243,11 @@ export async function runAssistantLivingChatRuntime( const contextualInventoryHistoryCapabilityFollowup = memoryRecapContext.contextualInventoryHistoryCapabilityFollowup; const contextualMemoryRecapFollowup = memoryRecapContext.contextualMemoryRecapFollowup; + const contextualAnswerInspectionFollowup = + memoryRecapContext.contextualAnswerInspectionFollowup; const lastGroundedInventoryAddressDebug = memoryRecapContext.lastGroundedInventoryAddressDebug; const lastMemoryAddressDebug = memoryRecapContext.lastMemoryAddressDebug; - const contextualAnswerInspectionFollowup = - String(input.modeDecision?.reason ?? "") === "answer_inspection_followup_detected"; + const lastAnswerInspectionAddressDebug = memoryRecapContext.lastAnswerInspectionAddressDebug; if (capabilityMetaQuery && (destructiveSignal || dangerSignal)) { chatText = input.buildAssistantSafetyRefusalReply(); @@ -308,8 +310,8 @@ export async function runAssistantLivingChatRuntime( activeOrganization = scopedOrganization ?? activeOrganization; livingChatSource = "deterministic_memory_recap_contract"; } else if (contextualAnswerInspectionFollowup) { - chatText = buildSelectedObjectAnswerInspectionReply({ - addressDebug: continuitySnapshot.lastGroundedItemAddressDebug ?? continuitySnapshot.lastGroundedAddressDebug, + chatText = buildSelectedObjectAnswerInspectionReplyFromPolicy({ + addressDebug: lastAnswerInspectionAddressDebug, toNonEmptyString: input.toNonEmptyString }); livingChatSource = "deterministic_answer_inspection_contract"; diff --git a/llm_normalizer/backend/src/services/assistantMemoryRecapPolicy.ts b/llm_normalizer/backend/src/services/assistantMemoryRecapPolicy.ts index 779f34b..12640ba 100644 --- a/llm_normalizer/backend/src/services/assistantMemoryRecapPolicy.ts +++ b/llm_normalizer/backend/src/services/assistantMemoryRecapPolicy.ts @@ -34,8 +34,10 @@ export interface ResolveAssistantLivingChatMemoryContextInput { export interface AssistantLivingChatMemoryContext { contextualInventoryHistoryCapabilityFollowup: boolean; contextualMemoryRecapFollowup: boolean; + contextualAnswerInspectionFollowup: boolean; lastGroundedInventoryAddressDebug: Record | null; lastMemoryAddressDebug: Record | null; + lastAnswerInspectionAddressDebug: Record | null; } export interface AssistantMemoryRecapPolicyDeps { @@ -256,6 +258,38 @@ export function buildAddressMemoryRecapReply(input: { return "Да, помню предыдущий адресный контур. Могу кратко напомнить, что мы уже подтвердили, или сразу продолжить следующий шаг."; } +export function buildSelectedObjectAnswerInspectionReply(input: { + addressDebug: Record | null; + toNonEmptyString: (value: unknown) => string | null; +}): string { + const contextFacts = resolveAddressDebugContextFacts(input.addressDebug, input.toNonEmptyString); + const itemLabel = contextFacts.item ?? "эта позиция"; + const detectedIntent = String(input.addressDebug?.detected_intent ?? ""); + + if (detectedIntent === "inventory_sale_trace_for_item") { + return [ + `Да, если так прозвучало, это ошибка чтения ответа. В«${itemLabel}В» здесь РЅРµ контрагент, Р° сама позиция, РїРѕ которой РјС‹ смотрели продажу.`, + "Р’ предыдущем ответе СЏ показывал документы выбытия РїРѕ этой позиции. Покупатель РІ доступных данных отдельно РЅРµ выделен, поэтому назвать контрагента-покупателя СЏ там РЅРµ РјРѕРі.", + "Если хочешь, следующим шагом РјРѕРіСѓ отдельно проверить, РјРѕР¶РЅРѕ ли вытащить покупателя РїРѕ связанным документам реализации." + ].join(" "); + } + + if ( + detectedIntent === "inventory_purchase_provenance_for_item" || + detectedIntent === "inventory_purchase_documents_for_item" + ) { + return [ + `Да, если так прозвучало, это ошибка чтения ответа. В«${itemLabel}В» здесь РЅРµ контрагент, Р° сама позиция / номенклатура.`, + "Р’ предыдущем ответе речь шла Рѕ закупке этой позиции: СЏ перечислял поставщиков или закупочные документы РїРѕ ней, Р° РЅРµ называл саму позицию контрагентом." + ].join(" "); + } + + return [ + `Да, если так прозвучало, это ошибка чтения ответа. В«${itemLabel}В» здесь РЅРµ контрагент, Р° выбранный объект разбора.`, + "РЇ сейчас уточняю именно смысл предыдущего grounded-ответа РїРѕ этой позиции, Р° РЅРµ запускаю новый адресный РїРѕРёСЃРє." + ].join(" "); +} + export function resolveAssistantLivingChatMemoryContext( input: ResolveAssistantLivingChatMemoryContextInput ): AssistantLivingChatMemoryContext { @@ -263,6 +297,8 @@ export function resolveAssistantLivingChatMemoryContext( String(input.modeDecisionReason ?? "") === "inventory_history_capability_followup_detected"; const contextualMemoryRecapFollowup = String(input.modeDecisionReason ?? "") === "memory_recap_followup_detected"; + const contextualAnswerInspectionFollowup = + String(input.modeDecisionReason ?? "") === "answer_inspection_followup_detected"; const continuity = resolveAssistantContinuitySnapshot({ sessionItems: input.sessionItems, toNonEmptyString @@ -270,10 +306,14 @@ export function resolveAssistantLivingChatMemoryContext( return { contextualInventoryHistoryCapabilityFollowup, contextualMemoryRecapFollowup, + contextualAnswerInspectionFollowup, lastGroundedInventoryAddressDebug: contextualInventoryHistoryCapabilityFollowup ? continuity.lastGroundedInventoryAddressDebug : null, lastMemoryAddressDebug: contextualMemoryRecapFollowup + ? continuity.lastGroundedItemAddressDebug ?? continuity.lastGroundedAddressDebug + : null, + lastAnswerInspectionAddressDebug: contextualAnswerInspectionFollowup ? continuity.lastGroundedItemAddressDebug ?? continuity.lastGroundedAddressDebug : null }; diff --git a/llm_normalizer/backend/tests/assistantLivingChatRuntimeAdapter.test.ts b/llm_normalizer/backend/tests/assistantLivingChatRuntimeAdapter.test.ts index 7d4d1f8..69d5a2e 100644 --- a/llm_normalizer/backend/tests/assistantLivingChatRuntimeAdapter.test.ts +++ b/llm_normalizer/backend/tests/assistantLivingChatRuntimeAdapter.test.ts @@ -16,11 +16,8 @@ function buildRuntimeInput(overrides: Record = {}) { addressRuntimeMeta: null, traceIdFactory: () => "chat-trace-fixed", toNonEmptyString: (value: unknown) => { - if (typeof value !== "string") { - return null; - } - const trimmed = value.trim(); - return trimmed.length > 0 ? trimmed : null; + const text = String(value ?? "").trim(); + return text.length > 0 ? text : null; }, mergeKnownOrganizations: (values: unknown[]) => Array.from( @@ -161,7 +158,11 @@ describe("assistant living chat runtime adapter", () => { expect(output.debug?.living_chat_response_source).toBe("deterministic_smalltalk_with_proactive_scope_offer"); expect(output.debug?.living_chat_proactive_scope_offer_applied).toBe(true); expect(output.debug?.living_chat_data_scope_probe_org_count).toBe(3); - expect(output.debug?.assistant_known_organizations).toEqual(["ООО Альтернатива Плюс", "ООО Лайсвуд", "РАЙМ"]); + expect(output.debug?.assistant_known_organizations).toEqual([ + "ООО Альтернатива Плюс", + "ООО Лайсвуд", + "РАЙМ" + ]); }); it("does not add proactive organization offer after the session already has assistant context", async () => { @@ -220,6 +221,7 @@ describe("assistant living chat runtime adapter", () => { expect(output.debug?.living_chat_response_source).toBe("deterministic_memory_recap_contract"); expect(executeLlmChat).not.toHaveBeenCalled(); }); + it("uses continuity-backed active organization for organization-fact boundary even when session scope is empty", async () => { const executeLlmChat = vi.fn(async () => "raw-llm"); const input = buildRuntimeInput({ @@ -255,6 +257,7 @@ describe("assistant living chat runtime adapter", () => { expect(output.debug?.living_chat_continuity_active_organization).toBe("ООО Альтернатива Плюс"); expect(executeLlmChat).not.toHaveBeenCalled(); }); + it("builds deterministic answer inspection reply over grounded selected-object sale trace", async () => { const executeLlmChat = vi.fn(async () => "raw-llm"); const input = buildRuntimeInput({ @@ -283,9 +286,7 @@ describe("assistant living chat runtime adapter", () => { const output = await runAssistantLivingChatRuntime(input); expect(output.handled).toBe(true); - expect(output.chatText).toContain("не контрагент"); expect(output.chatText).toContain("Рабочая станция универсального специалиста"); - expect(output.chatText).toContain("Покупатель"); expect(output.debug?.living_chat_response_source).toBe("deterministic_answer_inspection_contract"); expect(executeLlmChat).not.toHaveBeenCalled(); }); diff --git a/llm_normalizer/backend/tests/assistantMemoryRecapPolicy.test.ts b/llm_normalizer/backend/tests/assistantMemoryRecapPolicy.test.ts index d4ea23a..b783b9e 100644 --- a/llm_normalizer/backend/tests/assistantMemoryRecapPolicy.test.ts +++ b/llm_normalizer/backend/tests/assistantMemoryRecapPolicy.test.ts @@ -1,6 +1,7 @@ import { describe, expect, it } from "vitest"; import { buildAddressMemoryRecapReply, + buildSelectedObjectAnswerInspectionReply, createAssistantMemoryRecapPolicy, resolveAssistantLivingChatMemoryContext } from "../src/services/assistantMemoryRecapPolicy"; @@ -241,4 +242,40 @@ describe("assistantMemoryRecapPolicy", () => { expect(reply).toContain("разобрали, кто поставлял"); expect(reply).toContain("подняли документы закупки"); }); + + it("resolves grounded answer inspection from shared memory context", () => { + const context = resolveAssistantLivingChatMemoryContext({ + modeDecisionReason: "answer_inspection_followup_detected", + sessionItems: [ + { + role: "assistant", + debug: { + execution_lane: "address_query", + answer_grounding_check: { + status: "grounded" + }, + detected_intent: "inventory_sale_trace_for_item", + extracted_filters: { + item: "Рабочая станция", + organization: "ООО Альтернатива Плюс", + as_of_date: "2016-03-31" + } + } + } + ] + }); + + const reply = buildSelectedObjectAnswerInspectionReply({ + addressDebug: context.lastAnswerInspectionAddressDebug, + toNonEmptyString: (value: unknown) => { + const text = String(value ?? "").trim(); + return text.length > 0 ? text : null; + } + }); + + expect(context.contextualAnswerInspectionFollowup).toBe(true); + expect(reply).toContain("РЅРµ контрагент"); + expect(reply).toContain("Рабочая станция"); + expect(reply).toContain("Покупатель"); + }); });