diff --git a/AGENTS.md b/AGENTS.md index fb6baf2..f4d13b6 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -11,6 +11,15 @@ - `10` = high-risk architecture/runtime change with broad blast radius and mandatory close validation. - The score must reflect real project risk, not optimism, and should help the user decide how much manual attention and replay validation the change deserves. +## closeout_risk_reporting_rule +- After applying fixes, always provide `Потенциал регресса на текущем этапе: X%`. +- After applying fixes, always provide `Необходимость жирного ручного прогона: X%`. +- These two lines must be emitted together with the change-risk score and the ready commit title in every close-out. +- Both percentages must use an integer scale from `0%` to `100%`. +- `Потенциал регресса на текущем этапе` must reflect the real probability that nearby or not-yet-covered contours can regress at the current stabilization stage. +- `Необходимость жирного ручного прогона` must reflect how strongly the current change still needs a broad manual reality-check beyond unit tests, narrow replay, and build verification. +- The percentages must be honest, architecture-aware, and useful for deciding whether the current pass is safe enough to trust without additional human validation. + ## graphify This project has a graphify knowledge graph at graphify-out/. diff --git a/docs/ARCH/11 - architecture_turnaround/11 - continuity_stabilization_plan_2026-04-17.md b/docs/ARCH/11 - architecture_turnaround/11 - continuity_stabilization_plan_2026-04-17.md index ba0a867..cb2debc 100644 --- a/docs/ARCH/11 - architecture_turnaround/11 - continuity_stabilization_plan_2026-04-17.md +++ b/docs/ARCH/11 - architecture_turnaround/11 - continuity_stabilization_plan_2026-04-17.md @@ -335,6 +335,12 @@ Still open after the accepted phase12 replay: - this matters because mixed follow-up questions that pivot after assistant-side company fixation no longer depend on whether the previous address debug happened to still carry `organization` in its own extracted filters; - targeted transition regression now protects the case where grounded history is empty but assistant-side organization authority is already present; - wide saved-session replay `address_truth_harness_phase12_wider_saved_session_pool_live_20260418_rerun5` remains accepted `20/20`, which is the critical proof that this transition-layer convergence did not reopen the broader continuity path. +- the next active-context convergence pass now removes one more duplicate `address debug -> item/date/organization` parser from memory-recap and living-chat follow-up builders: + - `assistantContinuityPolicy` now exposes one shared helper for `item`, `organization`, and `scopedDate` extraction from grounded address debug, including root-frame fallback; + - `assistantMemoryRecapPolicy` now consumes this shared helper instead of manually rebuilding the same context from `extracted_filters` and `address_root_frame_context` in multiple places; + - this matters because deterministic memory-recap and historical-inventory capability replies now depend on the same context interpretation as the rest of continuity policy, rather than on a separate local parser that could drift on root-frame-only turns; + - targeted continuity / memory-recap / living-chat tests now protect the root-frame fallback path explicitly; + - wide saved-session replay `address_truth_harness_phase12_wider_saved_session_pool_live_20260418_rerun6` remains accepted `20/20`, which is the critical proof that this context-helper convergence did not reopen the broader living-chat continuity path. ## Next Execution Slice (2026-04-18) diff --git a/llm_normalizer/backend/dist/services/assistantContinuityPolicy.js b/llm_normalizer/backend/dist/services/assistantContinuityPolicy.js index 9316e63..820d4d5 100644 --- a/llm_normalizer/backend/dist/services/assistantContinuityPolicy.js +++ b/llm_normalizer/backend/dist/services/assistantContinuityPolicy.js @@ -5,6 +5,7 @@ exports.readAddressDebugFilters = readAddressDebugFilters; exports.readAddressDebugItem = readAddressDebugItem; exports.readAddressDebugOrganization = readAddressDebugOrganization; exports.readAddressDebugScopedDate = readAddressDebugScopedDate; +exports.resolveAddressDebugContextFacts = resolveAddressDebugContextFacts; exports.buildInventoryRootFrameFromAddressDebug = buildInventoryRootFrameFromAddressDebug; exports.isGroundedAddressDebug = isGroundedAddressDebug; exports.resolveAssistantContinuitySnapshot = resolveAssistantContinuitySnapshot; @@ -59,6 +60,13 @@ function readAddressDebugScopedDate(debug) { formatIsoDateForReply(rootFrameContext?.as_of_date) ?? formatIsoDateForReply(extractedFilters?.period_to)); } +function resolveAddressDebugContextFacts(debug, toNonEmptyString = fallbackToNonEmptyString) { + return { + item: readAddressDebugItem(debug, toNonEmptyString), + organization: readAddressDebugOrganization(debug, toNonEmptyString), + scopedDate: readAddressDebugScopedDate(debug) + }; +} function buildInventoryRootFrameFromAddressDebug(debug, toNonEmptyString = fallbackToNonEmptyString) { if (!debug || typeof debug !== "object") { return null; diff --git a/llm_normalizer/backend/dist/services/assistantMemoryRecapPolicy.js b/llm_normalizer/backend/dist/services/assistantMemoryRecapPolicy.js index 4761720..b45f0af 100644 --- a/llm_normalizer/backend/dist/services/assistantMemoryRecapPolicy.js +++ b/llm_normalizer/backend/dist/services/assistantMemoryRecapPolicy.js @@ -31,16 +31,9 @@ function hasExplicitRecapPromptSignal(samples) { return samples.some((sample) => /(?:что\s+мы\s+.*(?:обсуждали|выяснили)|что\s+уже\s+выяснили|что\s+уже\s+поняли|напомни\s+что\s+мы)/iu.test(sample)); } function buildInventoryHistoryCapabilityFollowupReply(input) { - const rootFrameContext = input.addressDebug?.address_root_frame_context && - typeof input.addressDebug.address_root_frame_context === "object" - ? input.addressDebug.address_root_frame_context - : null; - const extractedFilters = (0, assistantContinuityPolicy_1.readAddressDebugFilters)(input.addressDebug); - const organization = input.organization ?? - input.toNonEmptyString(rootFrameContext?.organization) ?? - input.toNonEmptyString(extractedFilters?.organization); - const lastAsOfDate = (0, assistantContinuityPolicy_1.formatIsoDateForReply)(rootFrameContext?.as_of_date) ?? - (0, assistantContinuityPolicy_1.formatIsoDateForReply)(extractedFilters?.as_of_date); + const contextFacts = (0, assistantContinuityPolicy_1.resolveAddressDebugContextFacts)(input.addressDebug, input.toNonEmptyString); + const organization = input.organization ?? contextFacts.organization; + const lastAsOfDate = contextFacts.scopedDate; const organizationPart = organization ? ` по компании «${organization}»` : ""; const referenceLine = lastAsOfDate ? `Да, могу. Сейчас мы уже смотрели складской срез${organizationPart} на ${lastAsOfDate}.` @@ -65,7 +58,7 @@ function normalizeRecapIdentity(value) { } function buildRecapFactLine(input) { const detectedIntent = String(input.debug?.detected_intent ?? ""); - const scopedDate = (0, assistantContinuityPolicy_1.readAddressDebugScopedDate)(input.debug); + const scopedDate = (0, assistantContinuityPolicy_1.resolveAddressDebugContextFacts)(input.debug).scopedDate; const itemPart = input.item ? `по позиции «${input.item}»` : null; const organizationPart = input.organization ? `по компании «${input.organization}»` : null; const datePart = scopedDate ? ` на ${scopedDate}` : ""; @@ -115,8 +108,9 @@ function collectRecentRecapFacts(input) { if (!(0, assistantContinuityPolicy_1.isGroundedAddressDebug)(item.debug, input.toNonEmptyString)) { continue; } - const debugItem = (0, assistantContinuityPolicy_1.readAddressDebugItem)(item.debug, input.toNonEmptyString); - const debugOrganization = (0, assistantContinuityPolicy_1.readAddressDebugOrganization)(item.debug, input.toNonEmptyString); + const debugContext = (0, assistantContinuityPolicy_1.resolveAddressDebugContextFacts)(item.debug, input.toNonEmptyString); + const debugItem = debugContext.item; + const debugOrganization = debugContext.organization; const itemMatches = currentItemKey ? normalizeRecapIdentity(debugItem) === currentItemKey : false; const organizationMatches = currentOrganizationKey ? normalizeRecapIdentity(debugOrganization) === currentOrganizationKey @@ -144,18 +138,10 @@ function collectRecentRecapFacts(input) { return facts.reverse(); } function buildAddressMemoryRecapReply(input) { - const extractedFilters = input.addressDebug?.extracted_filters && typeof input.addressDebug.extracted_filters === "object" - ? input.addressDebug.extracted_filters - : null; - const rootFrameContext = input.addressDebug?.address_root_frame_context && - typeof input.addressDebug.address_root_frame_context === "object" - ? input.addressDebug.address_root_frame_context - : null; - const item = (0, assistantContinuityPolicy_1.readAddressDebugItem)(input.addressDebug, input.toNonEmptyString); - const organization = input.organization ?? - input.toNonEmptyString(extractedFilters?.organization) ?? - input.toNonEmptyString(rootFrameContext?.organization); - const scopedDate = (0, assistantContinuityPolicy_1.readAddressDebugScopedDate)(input.addressDebug); + const contextFacts = (0, assistantContinuityPolicy_1.resolveAddressDebugContextFacts)(input.addressDebug, input.toNonEmptyString); + const item = contextFacts.item; + const organization = input.organization ?? contextFacts.organization; + const scopedDate = contextFacts.scopedDate; const recapFacts = collectRecentRecapFacts({ sessionItems: input.sessionItems, item, diff --git a/llm_normalizer/backend/src/services/assistantContinuityPolicy.ts b/llm_normalizer/backend/src/services/assistantContinuityPolicy.ts index b5189a5..5730ec2 100644 --- a/llm_normalizer/backend/src/services/assistantContinuityPolicy.ts +++ b/llm_normalizer/backend/src/services/assistantContinuityPolicy.ts @@ -27,6 +27,12 @@ export interface AssistantContinuitySnapshot { hasGroundedInventoryContext: boolean; } +export interface AssistantAddressDebugContextFacts { + item: string | null; + organization: string | null; + scopedDate: string | null; +} + export interface AssistantOrganizationAuthorityInput { sessionItems?: unknown[]; sessionKnownOrganizations?: unknown[]; @@ -118,6 +124,17 @@ export function readAddressDebugScopedDate(debug: Record | null ); } +export function resolveAddressDebugContextFacts( + debug: Record | null, + toNonEmptyString: (value: unknown) => string | null = fallbackToNonEmptyString +): AssistantAddressDebugContextFacts { + return { + item: readAddressDebugItem(debug, toNonEmptyString), + organization: readAddressDebugOrganization(debug, toNonEmptyString), + scopedDate: readAddressDebugScopedDate(debug) + }; +} + export function buildInventoryRootFrameFromAddressDebug( debug: Record | null, toNonEmptyString: (value: unknown) => string | null = fallbackToNonEmptyString diff --git a/llm_normalizer/backend/src/services/assistantMemoryRecapPolicy.ts b/llm_normalizer/backend/src/services/assistantMemoryRecapPolicy.ts index 87ba6ed..779f34b 100644 --- a/llm_normalizer/backend/src/services/assistantMemoryRecapPolicy.ts +++ b/llm_normalizer/backend/src/services/assistantMemoryRecapPolicy.ts @@ -1,12 +1,8 @@ // @ts-nocheck import { - formatIsoDateForReply, isGroundedAddressDebug, - readAddressDebugFilters, - readAddressDebugItem, - readAddressDebugOrganization, - readAddressDebugScopedDate, + resolveAddressDebugContextFacts, resolveAssistantContinuitySnapshot } from "./assistantContinuityPolicy"; @@ -92,19 +88,9 @@ export function buildInventoryHistoryCapabilityFollowupReply(input: { addressDebug: Record | null; toNonEmptyString: (value: unknown) => string | null; }): string { - const rootFrameContext = - input.addressDebug?.address_root_frame_context && - typeof input.addressDebug.address_root_frame_context === "object" - ? (input.addressDebug.address_root_frame_context as Record) - : null; - const extractedFilters = readAddressDebugFilters(input.addressDebug); - const organization = - input.organization ?? - input.toNonEmptyString(rootFrameContext?.organization) ?? - input.toNonEmptyString(extractedFilters?.organization); - const lastAsOfDate = - formatIsoDateForReply(rootFrameContext?.as_of_date) ?? - formatIsoDateForReply(extractedFilters?.as_of_date); + const contextFacts = resolveAddressDebugContextFacts(input.addressDebug, input.toNonEmptyString); + const organization = input.organization ?? contextFacts.organization; + const lastAsOfDate = contextFacts.scopedDate; const organizationPart = organization ? ` по компании «${organization}»` : ""; const referenceLine = lastAsOfDate ? `Да, могу. Сейчас мы уже смотрели складской срез${organizationPart} на ${lastAsOfDate}.` @@ -135,7 +121,7 @@ function buildRecapFactLine(input: { organization: string | null; }): string | null { const detectedIntent = String(input.debug?.detected_intent ?? ""); - const scopedDate = readAddressDebugScopedDate(input.debug); + const scopedDate = resolveAddressDebugContextFacts(input.debug).scopedDate; const itemPart = input.item ? `по позиции «${input.item}»` : null; const organizationPart = input.organization ? `по компании «${input.organization}»` : null; const datePart = scopedDate ? ` на ${scopedDate}` : ""; @@ -192,8 +178,9 @@ function collectRecentRecapFacts(input: { if (!isGroundedAddressDebug(item.debug, input.toNonEmptyString)) { continue; } - const debugItem = readAddressDebugItem(item.debug, input.toNonEmptyString); - const debugOrganization = readAddressDebugOrganization(item.debug, input.toNonEmptyString); + const debugContext = resolveAddressDebugContextFacts(item.debug, input.toNonEmptyString); + const debugItem = debugContext.item; + const debugOrganization = debugContext.organization; const itemMatches = currentItemKey ? normalizeRecapIdentity(debugItem) === currentItemKey : false; const organizationMatches = currentOrganizationKey ? normalizeRecapIdentity(debugOrganization) === currentOrganizationKey @@ -228,21 +215,10 @@ export function buildAddressMemoryRecapReply(input: { sessionItems?: unknown[]; toNonEmptyString: (value: unknown) => string | null; }): string { - const extractedFilters = - input.addressDebug?.extracted_filters && typeof input.addressDebug.extracted_filters === "object" - ? (input.addressDebug.extracted_filters as Record) - : null; - const rootFrameContext = - input.addressDebug?.address_root_frame_context && - typeof input.addressDebug.address_root_frame_context === "object" - ? (input.addressDebug.address_root_frame_context as Record) - : null; - const item = readAddressDebugItem(input.addressDebug, input.toNonEmptyString); - const organization = - input.organization ?? - input.toNonEmptyString(extractedFilters?.organization) ?? - input.toNonEmptyString(rootFrameContext?.organization); - const scopedDate = readAddressDebugScopedDate(input.addressDebug); + const contextFacts = resolveAddressDebugContextFacts(input.addressDebug, input.toNonEmptyString); + const item = contextFacts.item; + const organization = input.organization ?? contextFacts.organization; + const scopedDate = contextFacts.scopedDate; const recapFacts = collectRecentRecapFacts({ sessionItems: input.sessionItems, item, diff --git a/llm_normalizer/backend/tests/assistantContinuityPolicy.test.ts b/llm_normalizer/backend/tests/assistantContinuityPolicy.test.ts index 58f41c6..5904179 100644 --- a/llm_normalizer/backend/tests/assistantContinuityPolicy.test.ts +++ b/llm_normalizer/backend/tests/assistantContinuityPolicy.test.ts @@ -1,5 +1,8 @@ import { describe, expect, it } from "vitest"; -import { resolveAssistantOrganizationAuthority } from "../src/services/assistantContinuityPolicy"; +import { + resolveAddressDebugContextFacts, + resolveAssistantOrganizationAuthority +} from "../src/services/assistantContinuityPolicy"; describe("assistantContinuityPolicy organization authority", () => { it("prefers explicit assistant organization authority over older grounded continuity and merges known organizations once", () => { @@ -55,4 +58,21 @@ describe("assistantContinuityPolicy organization authority", () => { ]); expect(authority.organizationClarificationSelectionFromScope).toBe("Org Selected"); }); + + it("reads item, organization and scoped date from root-frame fallback when direct filters are missing", () => { + const facts = resolveAddressDebugContextFacts({ + anchor_type: "item", + anchor_value_resolved: "Рабочая станция", + address_root_frame_context: { + organization: 'ООО "Альтернатива Плюс"', + as_of_date: "2020-03-31" + } + }); + + expect(facts).toEqual({ + item: "Рабочая станция", + organization: 'ООО "Альтернатива Плюс"', + scopedDate: "31.03.2020" + }); + }); }); diff --git a/llm_normalizer/data/eval_cases/assistant_saved_session_runtime_job-ZZ7WuoccOO.json b/llm_normalizer/data/eval_cases/assistant_saved_session_runtime_job-ZZ7WuoccOO.json new file mode 100644 index 0000000..d5b4a0a --- /dev/null +++ b/llm_normalizer/data/eval_cases/assistant_saved_session_runtime_job-ZZ7WuoccOO.json @@ -0,0 +1,120 @@ +{ + "suite_id": "assistant_saved_session_runtime_job-ZZ7WuoccOO", + "suite_version": "0.1.0", + "schema_version": "assistant_saved_session_runtime_v0_1", + "title": "БОЛЬШОЙ ОБЩИЙ Ручная сессия 16.04.2026, 21:26:06", + "scenario_count": 1, + "case_ids": [ + "SAVED-001" + ], + "cases": [ + { + "case_id": "SAVED-001", + "scenario_tag": "saved_user_sessions_runtime", + "title": "БОЛЬШОЙ ОБЩИЙ Ручная сессия 16.04.2026, 21:26:06", + "question_type": "followup", + "broadness_level": "medium", + "turns": [ + { + "user_message": "приветик - че как там дела" + }, + { + "user_message": "расскажи что можешь интересного" + }, + { + "user_message": "кайф - что там на складе по остаткам?" + }, + { + "user_message": "АЛЬТЕРНАТИВА" + }, + { + "user_message": "а исторические остатки на другие даты умеешь?" + }, + { + "user_message": "давай на июль 2017" + }, + { + "user_message": "март 2016" + }, + { + "user_message": "По выбранному объекту \"Рабочая станция универсального специалиста (индивидуальное изготовление)\": где взяли это?" + }, + { + "user_message": "а кому продали?" + }, + { + "user_message": "у тебя написано кто контрагент: рабочая станция - это ошибка?" + }, + { + "user_message": "ндс можешь прикинуть на дату покупки рабочей станции?" + }, + { + "user_message": "а какой ндс мы должны сгрузить на март 2020?" + }, + { + "user_message": "прикинь какой ндс нам надо заплатить на февраль 2017" + }, + { + "user_message": "кто у нас самый доходный клиент за все время" + }, + { + "user_message": "кто нам должен денег на май 2017" + }, + { + "user_message": "а какой ндс мы должны примерно заплатить за этот период?" + }, + { + "user_message": "мы должны комуто денег на сегодня?" + }, + { + "user_message": "а нам?" + }, + { + "user_message": "какой у нас самый доходный год" + }, + { + "user_message": "а за 2017 мы скок заработали?" + }, + { + "user_message": "сколько вообще денег мы заработали за все время?" + }, + { + "user_message": "ты умеешь считать дельту по договорам?" + }, + { + "user_message": "по чепурнову покажи все доки" + }, + { + "user_message": "а по свк" + }, + { + "user_message": "а сейчас у нас есть что на складе?" + }, + { + "user_message": "что нам отгружал чепурнов? какой товар или услугу?" + }, + { + "user_message": "какие остатки на складе на сегодня" + }, + { + "user_message": "остатки на март 2016" + }, + { + "user_message": "хвосты покажи по счету 60 на август 2022" + }, + { + "user_message": "Есть ли остатки товара, которые закупались очень давно" + }, + { + "user_message": "Какие конкретно номенклатуры формируют остаток по складу на май 2020" + }, + { + "user_message": "а по Альтернативе Плюс сколько лет активности в базе 1С?" + }, + { + "user_message": "Как ты оценишь деятельность компании?" + } + ] + } + ] +} \ No newline at end of file