diff --git a/docs/ARCH/11 - architecture_turnaround/11 - continuity_stabilization_plan_2026-04-17.md b/docs/ARCH/11 - architecture_turnaround/11 - continuity_stabilization_plan_2026-04-17.md index 84565da..3e618f6 100644 --- a/docs/ARCH/11 - architecture_turnaround/11 - continuity_stabilization_plan_2026-04-17.md +++ b/docs/ARCH/11 - architecture_turnaround/11 - continuity_stabilization_plan_2026-04-17.md @@ -315,6 +315,16 @@ Still open after the accepted phase12 replay: - this matters because route, living-chat, data-scope, and early session-scope bootstrap are now closer to the same organization merge order instead of keeping a separate pre-route branch with its own drift risk; - targeted tests now explicitly protect the case where assistant-side continuity authority is present in prior assistant debug even when legacy history helpers are empty; - live replay `address_truth_harness_phase12_wider_saved_session_pool_live_20260418_rerun4` remains accepted `20/20`, which is the critical proof that this authority convergence did not reopen the wider saved-session path. +- the next architecture pass exposed and fixed a deeper hybrid-investigation contract break outside the flagship address chain: + - phase13 replay showed that hybrid anomaly/tails questions were no longer failing only on follow-up continuity; the root entry itself had stopped entering the hybrid lane because deep-turn normalization was calling the normalizer with `promptVersion=address_query_runtime_v1`; + - the model was already returning valid `normalized_query_v2_0_2` payloads for these questions, but the runtime validated them as legacy `v1`, discarded the parsed normalized object, and then dropped into claim-bound clarification with empty `fragments / route_summary`; + - deep-turn normalization now forces an actual normalizer contract (`promptVersion=normalizer_v2_0_2`, `schemaVersion=v2_0_2`) unless the caller already supplied a normalizer-family prompt explicitly; + - this is a real architecture fix, not a case patch: deep-turn no longer inherits a user-facing address runtime prompt as if it were a normalizer schema contract; + - targeted deep-turn normalization tests now protect this seam directly, including the case where an address-lane prompt would previously have caused silent `v1` validation and total fragment loss; + - live replay `address_truth_harness_phase13_hybrid_followup_authority_live_20260418_rerun3` is now accepted: + - root supplier tails anomaly questions re-enter `hybrid_store_plus_live` with grounded fragments and non-empty deterministic route summaries; + - narrowing follow-up for `2020-06 / account 60` now keeps hybrid/batch routing instead of collapsing into empty clarification; + - the broader hybrid investigation contour is therefore back under explicit runtime authority rather than ambient luck. ## Next Execution Slice (2026-04-18) @@ -330,13 +340,15 @@ This next slice should be executed in the following order: 1. Finish continuity authority convergence in the hot runtime path. 2. Widen saved-session replay coverage beyond the already repaired flagship chains. -3. Tighten human answer shaping on long exact answers without reintroducing template drift. -4. Only after that, begin controlled domain-by-domain expansion toward the multi-domain stage. +3. Prove that non-flagship hybrid/deep-turn contours survive the same runtime contracts instead of silently inheriting wrong prompt/schema assumptions. +4. Only after that, continue secondary answer-shaping cleanup where it materially affects acceptance. +5. Only after that, begin controlled domain-by-domain expansion toward the multi-domain stage. Current explicit goals for this slice: - fewer owners independently reconstruct `active context`; - more replay breadth before any large expansion claim; +- fewer hidden runtime contract mismatches between entry prompts, schema validation, and deep-turn orchestration; - cleaner user-facing business answers on already-correct truth paths; - lower risk that new domains multiply orchestration chaos faster than capability growth. diff --git a/docs/orchestration/address_truth_harness_phase13_hybrid_followup_authority.json b/docs/orchestration/address_truth_harness_phase13_hybrid_followup_authority.json new file mode 100644 index 0000000..e0dc3d5 --- /dev/null +++ b/docs/orchestration/address_truth_harness_phase13_hybrid_followup_authority.json @@ -0,0 +1,125 @@ +{ + "schema_version": "domain_truth_harness_spec_v1", + "scenario_id": "address_truth_harness_phase13_hybrid_followup_authority", + "domain": "address_phase13_hybrid_followup_authority", + "title": "Phase 13 hybrid investigation follow-up authority replay", + "description": "Focused AGENT replay for the non-flagship hybrid investigation contour. The scenario validates that anomaly investigation roots stay in the hybrid lane, narrowing follow-ups preserve investigation state instead of drifting into address live queries, period-close impact follow-ups reuse the active anomaly frame, and transliterated supplier-tail wording still enters the intended hybrid contour.", + "bindings": {}, + "steps": [ + { + "step_id": "step_01_supplier_tails_root", + "title": "Root supplier tails investigation enters the hybrid investigation lane", + "question": "Разложи хвосты по поставщикам: где разрыв между оплатой и документами выглядит системным.", + "allowed_reply_types": [ + "factual_with_explanation", + "factual", + "partial_coverage" + ], + "required_answer_patterns_any": [ + "(?i)разрыв|хвост|поставщик|оплат|документ", + "(?i)системн|проблем" + ], + "forbidden_answer_patterns": [ + "(?i)mcp fetch failed", + "(?i)address_query_runtime_v1", + "(?i)live-?запрос в v1", + "(?i)tool_gate_reason", + "(?i)address_mode" + ], + "criticality": "critical", + "semantic_tags": [ + "hybrid_investigation_root", + "settlements_60_62", + "anomaly_probe" + ] + }, + { + "step_id": "step_02_narrow_to_june_2020_account_60", + "title": "Narrowing follow-up keeps the same investigation instead of drifting into address live lane", + "question": "Сузь до периода 2020-06 и счета 60, покажи только случаи с максимальным риском закрытия периода.", + "allowed_reply_types": [ + "factual_with_explanation", + "factual", + "partial_coverage" + ], + "required_answer_patterns_any": [ + "(?i)2020", + "(?i)июн|06", + "(?i)счет 60|60", + "(?i)риск закрытия периода|закрыти[ея] периода|максимальн" + ], + "forbidden_answer_patterns": [ + "(?i)mcp fetch failed", + "(?i)live-?запрос в v1", + "(?i)адресн(ый|ого) live", + "(?i)выберите организац", + "(?i)tool_gate_reason", + "(?i)address_mode" + ], + "criticality": "critical", + "semantic_tags": [ + "hybrid_investigation_followup", + "period_narrowing", + "account_60", + "period_close_risk" + ] + }, + { + "step_id": "step_03_period_close_impact_followup", + "title": "Period-close impact follow-up reuses the active anomaly frame", + "question": "Что в текущих аномалиях сильнее всего повлияет на закрытие периода, если ничего не исправлять?", + "allowed_reply_types": [ + "factual_with_explanation", + "factual", + "partial_coverage", + "clarification_required" + ], + "required_answer_patterns_any": [ + "(?i)закрыти[ея] периода|период", + "(?i)аномал|риск|сильнее всего|наибольш" + ], + "forbidden_answer_patterns": [ + "(?i)нужны дополнительные уточнения по периоду или объекту проверки", + "(?i)domain_or_scope_unclear", + "(?i)insufficient_specificity", + "(?i)mcp fetch failed", + "(?i)tool_gate_reason", + "(?i)address_mode" + ], + "criticality": "critical", + "semantic_tags": [ + "hybrid_investigation_followup", + "period_close_impact", + "context_reuse" + ] + }, + { + "step_id": "step_04_translit_supplier_tails_root", + "title": "Transliterated supplier tails wording still resolves into the hybrid contour", + "question": "Prover schet 60 za 2020-06, gde taili postavshikov i kakie dokumenty ne zakryvayut oplaty.", + "allowed_reply_types": [ + "factual_with_explanation", + "factual", + "partial_coverage" + ], + "required_answer_patterns_any": [ + "(?i)60", + "(?i)2020", + "(?i)документ|оплат|поставщик|хвост|разрыв|tail" + ], + "forbidden_answer_patterns": [ + "(?i)mcp fetch failed", + "(?i)live-?запрос в v1", + "(?i)выберите организац", + "(?i)tool_gate_reason", + "(?i)address_mode" + ], + "criticality": "important", + "semantic_tags": [ + "hybrid_investigation_root", + "translit_wording", + "account_60" + ] + } + ] +} diff --git a/llm_normalizer/backend/dist/services/assistantDeepTurnNormalizationRuntimeAdapter.js b/llm_normalizer/backend/dist/services/assistantDeepTurnNormalizationRuntimeAdapter.js index f679adf..5515cb6 100644 --- a/llm_normalizer/backend/dist/services/assistantDeepTurnNormalizationRuntimeAdapter.js +++ b/llm_normalizer/backend/dist/services/assistantDeepTurnNormalizationRuntimeAdapter.js @@ -1,6 +1,24 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.buildAssistantDeepTurnNormalizationRuntime = buildAssistantDeepTurnNormalizationRuntime; +const routeHintAdapter_1 = require("./routeHintAdapter"); +function resolveDeepTurnNormalizerPromptVersion(promptVersion) { + const normalized = String(promptVersion ?? "").trim().toLowerCase(); + if (normalized === "normalizer_v2_0_2" || + normalized === "normalizer_v2_0_1" || + normalized === "normalizer_v2" || + normalized === "normalizer_v1") { + return String(promptVersion); + } + return "normalizer_v2_0_2"; +} +function canSynthesizeRouteSummary(normalized) { + if (!normalized || typeof normalized !== "object") { + return false; + } + const source = normalized; + return Array.isArray(source.fragments); +} async function buildAssistantDeepTurnNormalizationRuntime(input) { const investigationState = input.sessionInvestigationState; const canUseFollowupBinding = input.featureInvestigationStateV1 && @@ -25,7 +43,8 @@ async function buildAssistantDeepTurnNormalizationRuntime(input) { baseUrl: input.payload.baseUrl, temperature: input.payload.temperature, maxOutputTokens: input.payload.maxOutputTokens, - promptVersion: input.payload.promptVersion ?? "address_query_runtime_v1", + promptVersion: resolveDeepTurnNormalizerPromptVersion(input.payload.promptVersion), + schemaVersion: "v2_0_2", systemPrompt: input.payload.systemPrompt, developerPrompt: input.payload.developerPrompt, domainPrompt: input.payload.domainPrompt, @@ -35,9 +54,18 @@ async function buildAssistantDeepTurnNormalizationRuntime(input) { useMock: Boolean(input.payload.useMock) }; const normalized = await input.normalize(normalizePayload); + const normalizedPayload = normalized.normalized; + const synthesizedRouteSummary = normalized.route_hint_summary ?? + (normalizedPayload && canSynthesizeRouteSummary(normalizedPayload) ? (0, routeHintAdapter_1.toRouteHintSummary)(normalizedPayload) : null); + const normalizedWithRouteSummary = synthesizedRouteSummary === normalized.route_hint_summary + ? normalized + : { + ...normalized, + route_hint_summary: synthesizedRouteSummary + }; return { followupBinding, normalizePayload, - normalized + normalized: normalizedWithRouteSummary }; } diff --git a/llm_normalizer/backend/src/services/assistantDeepTurnNormalizationRuntimeAdapter.ts b/llm_normalizer/backend/src/services/assistantDeepTurnNormalizationRuntimeAdapter.ts index 65c5416..f8258b7 100644 --- a/llm_normalizer/backend/src/services/assistantDeepTurnNormalizationRuntimeAdapter.ts +++ b/llm_normalizer/backend/src/services/assistantDeepTurnNormalizationRuntimeAdapter.ts @@ -2,6 +2,7 @@ import type { AssistantMessageRequestPayload } from "../types/assistant"; import type { NormalizeRequestPayload, NormalizeResponsePayload } from "../types/normalizer"; import type { InvestigationStateWithProblemUnits } from "../types/stage2ProblemUnits"; import type { AssistantFollowupUsage } from "./assistantFollowupUsage"; +import { toRouteHintSummary } from "./routeHintAdapter"; export interface AssistantDeepTurnFollowupBinding { normalizedQuestion: string; @@ -29,6 +30,27 @@ export interface BuildAssistantDeepTurnNormalizationRuntimeOutput { normalized: NormalizeResponsePayload; } +function resolveDeepTurnNormalizerPromptVersion(promptVersion: NormalizeRequestPayload["promptVersion"]): string { + const normalized = String(promptVersion ?? "").trim().toLowerCase(); + if ( + normalized === "normalizer_v2_0_2" || + normalized === "normalizer_v2_0_1" || + normalized === "normalizer_v2" || + normalized === "normalizer_v1" + ) { + return String(promptVersion); + } + return "normalizer_v2_0_2"; +} + +function canSynthesizeRouteSummary(normalized: NormalizeResponsePayload["normalized"]): boolean { + if (!normalized || typeof normalized !== "object") { + return false; + } + const source = normalized as unknown as Record; + return Array.isArray(source.fragments); +} + export async function buildAssistantDeepTurnNormalizationRuntime( input: BuildAssistantDeepTurnNormalizationRuntimeInput ): Promise { @@ -58,7 +80,8 @@ export async function buildAssistantDeepTurnNormalizationRuntime( baseUrl: input.payload.baseUrl, temperature: input.payload.temperature, maxOutputTokens: input.payload.maxOutputTokens, - promptVersion: input.payload.promptVersion ?? "address_query_runtime_v1", + promptVersion: resolveDeepTurnNormalizerPromptVersion(input.payload.promptVersion), + schemaVersion: "v2_0_2", systemPrompt: input.payload.systemPrompt, developerPrompt: input.payload.developerPrompt, domainPrompt: input.payload.domainPrompt, @@ -69,10 +92,21 @@ export async function buildAssistantDeepTurnNormalizationRuntime( }; const normalized = await input.normalize(normalizePayload); + const normalizedPayload = normalized.normalized; + const synthesizedRouteSummary = + normalized.route_hint_summary ?? + (normalizedPayload && canSynthesizeRouteSummary(normalizedPayload) ? toRouteHintSummary(normalizedPayload) : null); + const normalizedWithRouteSummary: NormalizeResponsePayload = + synthesizedRouteSummary === normalized.route_hint_summary + ? normalized + : { + ...normalized, + route_hint_summary: synthesizedRouteSummary + }; return { followupBinding, normalizePayload, - normalized + normalized: normalizedWithRouteSummary }; } diff --git a/llm_normalizer/backend/tests/assistantDeepTurnNormalizationRuntimeAdapter.test.ts b/llm_normalizer/backend/tests/assistantDeepTurnNormalizationRuntimeAdapter.test.ts index de4d526..4eb44a9 100644 --- a/llm_normalizer/backend/tests/assistantDeepTurnNormalizationRuntimeAdapter.test.ts +++ b/llm_normalizer/backend/tests/assistantDeepTurnNormalizationRuntimeAdapter.test.ts @@ -66,6 +66,7 @@ describe("assistant deep turn normalization runtime adapter", () => { temperature: 0.2, maxOutputTokens: 333, promptVersion: "normalizer_v2_0_2", + schemaVersion: "v2_0_2", systemPrompt: "sys", developerPrompt: "dev", domainPrompt: "dom", @@ -123,7 +124,8 @@ describe("assistant deep turn normalization runtime adapter", () => { baseUrl: undefined, temperature: undefined, maxOutputTokens: undefined, - promptVersion: "address_query_runtime_v1", + promptVersion: "normalizer_v2_0_2", + schemaVersion: "v2_0_2", systemPrompt: undefined, developerPrompt: undefined, domainPrompt: undefined, @@ -142,4 +144,110 @@ describe("assistant deep turn normalization runtime adapter", () => { usage: null }); }); + + it("synthesizes route summary from normalized payload when normalize omits it", async () => { + const normalize = vi.fn(async () => ({ + trace_id: "trace-3", + ok: true, + normalized: { + schema_version: "normalized_query_v2_0_2", + user_message_raw: "Разложи хвосты по поставщикам", + message_in_scope: true, + scope_confidence: "high", + contains_multiple_tasks: false, + discarded_fragments: [], + global_notes: { + needs_clarification: false, + clarification_reason: null + }, + fragments: [ + { + fragment_id: "F1", + raw_fragment_text: "Разложи хвосты по поставщикам: где разрыв между оплатой и документами выглядит системным.", + normalized_fragment_text: + "Разложи хвосты по поставщикам: где разрыв между оплатой и документами выглядит системным.", + domain_relevance: "in_scope", + business_scope: "company_specific_accounting", + entity_hints: ["контрагент", "документ"], + account_hints: ["60"], + document_hints: ["документ"], + register_hints: [], + time_scope: { + type: "missing", + value: null, + confidence: "low" + }, + flags: { + has_multi_entity_scope: true, + asks_for_chain_explanation: true, + asks_for_ranking_or_top: false, + asks_for_period_summary: false, + asks_for_rule_check: false, + asks_for_anomaly_scan: true, + asks_for_exact_object_trace: false, + asks_for_evidence: true, + mentions_period_close_context: false + }, + semantic_hints: { + scope_target_kind: "none", + scope_target_text: null, + date_scope_kind: "missing", + self_scope_detected: false, + selected_object_scope_detected: false + }, + candidate_labels: ["cross_entity", "anomaly_probe"], + confidence: "medium", + execution_readiness: "executable_with_soft_assumptions", + clarification_reason: null, + soft_assumption_used: ["problem_scan_mode_enabled"], + route_status: "routed", + no_route_reason: null + } + ] + }, + route_hint_summary: null, + raw_model_output: {}, + validation: { passed: true, errors: [] }, + usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 }, + latency_ms: 1, + prompt_version: "normalizer_v2_0_2", + schema_version: "normalized_query_v2_0_2", + request_count_for_case: 1 + })); + + const runtime = await buildAssistantDeepTurnNormalizationRuntime({ + userMessage: "Разложи хвосты по поставщикам", + payload: { + llmProvider: "openai", + promptVersion: "address_query_runtime_v1", + useMock: true + }, + featureInvestigationStateV1: false, + featureStateFollowupBindingV1: false, + sessionInvestigationState: null, + buildFollowupStateBinding: vi.fn(), + normalize + }); + + expect(runtime.normalized.route_hint_summary).toEqual( + expect.objectContaining({ + mode: "deterministic_v2", + fallback: expect.objectContaining({ + type: "none" + }), + decisions: [ + expect.objectContaining({ + fragment_id: "F1", + route: "hybrid_store_plus_live" + }) + ] + }) + ); + expect(normalize).toHaveBeenCalledWith( + expect.objectContaining({ + promptVersion: "normalizer_v2_0_2", + schemaVersion: "v2_0_2" + }) + ); + }); }); diff --git a/llm_normalizer/data/eval_cases/assistant_saved_session_runtime_job-_tMDIcKCq-.json b/llm_normalizer/data/eval_cases/assistant_saved_session_runtime_job-_tMDIcKCq-.json new file mode 100644 index 0000000..498b763 --- /dev/null +++ b/llm_normalizer/data/eval_cases/assistant_saved_session_runtime_job-_tMDIcKCq-.json @@ -0,0 +1,120 @@ +{ + "suite_id": "assistant_saved_session_runtime_job-_tMDIcKCq-", + "suite_version": "0.1.0", + "schema_version": "assistant_saved_session_runtime_v0_1", + "title": "БОЛЬШОЙ ОБЩИЙ Ручная сессия 16.04.2026, 21:26:06", + "scenario_count": 1, + "case_ids": [ + "SAVED-001" + ], + "cases": [ + { + "case_id": "SAVED-001", + "scenario_tag": "saved_user_sessions_runtime", + "title": "БОЛЬШОЙ ОБЩИЙ Ручная сессия 16.04.2026, 21:26:06", + "question_type": "followup", + "broadness_level": "medium", + "turns": [ + { + "user_message": "приветик - че как там дела" + }, + { + "user_message": "расскажи что можешь интересного" + }, + { + "user_message": "кайф - что там на складе по остаткам?" + }, + { + "user_message": "АЛЬТЕРНАТИВА" + }, + { + "user_message": "а исторические остатки на другие даты умеешь?" + }, + { + "user_message": "давай на июль 2017" + }, + { + "user_message": "март 2016" + }, + { + "user_message": "По выбранному объекту \"Рабочая станция универсального специалиста (индивидуальное изготовление)\": где взяли это?" + }, + { + "user_message": "а кому продали?" + }, + { + "user_message": "у тебя написано кто контрагент: рабочая станция - это ошибка?" + }, + { + "user_message": "ндс можешь прикинуть на дату покупки рабочей станции?" + }, + { + "user_message": "а какой ндс мы должны сгрузить на март 2020?" + }, + { + "user_message": "прикинь какой ндс нам надо заплатить на февраль 2017" + }, + { + "user_message": "кто у нас самый доходный клиент за все время" + }, + { + "user_message": "кто нам должен денег на май 2017" + }, + { + "user_message": "а какой ндс мы должны примерно заплатить за этот период?" + }, + { + "user_message": "мы должны комуто денег на сегодня?" + }, + { + "user_message": "а нам?" + }, + { + "user_message": "какой у нас самый доходный год" + }, + { + "user_message": "а за 2017 мы скок заработали?" + }, + { + "user_message": "сколько вообще денег мы заработали за все время?" + }, + { + "user_message": "ты умеешь считать дельту по договорам?" + }, + { + "user_message": "по чепурнову покажи все доки" + }, + { + "user_message": "а по свк" + }, + { + "user_message": "а сейчас у нас есть что на складе?" + }, + { + "user_message": "что нам отгружал чепурнов? какой товар или услугу?" + }, + { + "user_message": "какие остатки на складе на сегодня" + }, + { + "user_message": "остатки на март 2016" + }, + { + "user_message": "хвосты покажи по счету 60 на август 2022" + }, + { + "user_message": "Есть ли остатки товара, которые закупались очень давно" + }, + { + "user_message": "Какие конкретно номенклатуры формируют остаток по складу на май 2020" + }, + { + "user_message": "а по Альтернативе Плюс сколько лет активности в базе 1С?" + }, + { + "user_message": "Как ты оценишь деятельность компании?" + } + ] + } + ] +} \ No newline at end of file