From 963f0aa3729717ea62745c5f382d183b07e6e912 Mon Sep 17 00:00:00 2001 From: dctouch Date: Sun, 12 Apr 2026 01:24:46 +0300 Subject: [PATCH] =?UTF-8?q?=D0=93=D0=9B=D0=9E=D0=91=D0=90=D0=9B=D0=AC?= =?UTF-8?q?=D0=9D=D0=AB=D0=99=20=D0=A0=D0=95=D0=A4=D0=90=D0=9A=D0=A2=D0=9E?= =?UTF-8?q?=D0=A0=D0=98=D0=9D=D0=93=20=D0=90=D0=A0=D0=A5=D0=98=D0=A2=D0=95?= =?UTF-8?q?=D0=9A=D0=A2=D0=A3=D0=A0=D0=AB=20-=20=D0=AD=D1=82=D0=B0=D0=BF?= =?UTF-8?q?=204.5:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=20?= =?UTF-8?q?=D0=B0=D1=83=D0=B4=D0=B8=D1=82=20=D0=BA=D0=BE=D0=BD=D1=82=D1=80?= =?UTF-8?q?=D0=B0=D0=BA=D1=82=D0=B0=20Stage=204=20=D0=B2=20debug/log=20?= =?UTF-8?q?=D0=B8=20=D0=B7=D0=B0=D0=BA=D1=80=D1=8B=D1=82=20=D1=80=D0=B0?= =?UTF-8?q?=D1=81=D1=88=D0=B8=D1=80=D0=B5=D0=BD=D0=BD=D1=8B=D0=B9=20qualit?= =?UTF-8?q?y-loop?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/TECH/1CLLMARCH-FACT.md | 32 +++++++++++- .../assistantDebugPayloadAssembler.js | 3 ++ .../services/assistantDeepTurnPackaging.js | 1 + .../services/assistantMessageLogAssembler.js | 3 ++ .../assistantStage4AnswerContractAudit.js | 38 ++++++++++++++ .../assistantDebugPayloadAssembler.ts | 4 ++ .../services/assistantDeepTurnPackaging.ts | 1 + .../services/assistantMessageLogAssembler.ts | 3 ++ .../assistantStage4AnswerContractAudit.ts | 51 +++++++++++++++++++ llm_normalizer/backend/src/types/assistant.ts | 10 ++++ .../assistantDebugPayloadAssembler.test.ts | 23 +++++++++ .../assistantMessageLogAssembler.test.ts | 9 +++- 12 files changed, 176 insertions(+), 2 deletions(-) create mode 100644 llm_normalizer/backend/dist/services/assistantStage4AnswerContractAudit.js create mode 100644 llm_normalizer/backend/src/services/assistantStage4AnswerContractAudit.ts diff --git a/docs/TECH/1CLLMARCH-FACT.md b/docs/TECH/1CLLMARCH-FACT.md index 65b8aa4..c40456f 100644 --- a/docs/TECH/1CLLMARCH-FACT.md +++ b/docs/TECH/1CLLMARCH-FACT.md @@ -2679,7 +2679,37 @@ Implemented in current pass (Stage 4.3 contract consistency hardening, 2026-04-1 - focused Stage 4 contract pack passed: `4 files / 14 tests` (`assistantStage4AnswerContractShape`, `assistantBoundaryFallbackReply`, `assistantSoftPolicyReply`, `assistantAnswerPolicyV11`); - `npm --prefix llm_normalizer/backend run build` passed. -Status: In progress (Stage 4.1-4.3 completed; continue with quality loop on real runs/manual comments) +Implemented in current pass (Stage 4.4 quality-loop verification on real/manual scenarios, 2026-04-12): +1. Ran expanded regression gate aligned with Stage 3 exit + Stage 4 contract checks: + - `addressQueryRuntimeM23` + - `assistantWave17RunRegression20260411` + - `assistantWave18ManualCommentsRegression` + - `assistantLivingRouter` + - `assistantLivingChatMode` + - `assistantSoftPolicyReply` + - `assistantBoundaryFallbackReply` + - `assistantAnswerPolicyV11` + - `assistantSemanticExtractionContract` + - `assistantStage4AnswerContractShape` +2. Validation snapshot: + - expanded quality-loop pack passed: `10 files / 359 tests`; + - `npm --prefix llm_normalizer/backend run build` passed. + +Implemented in current pass (Stage 4.5 contract observability in debug/log, 2026-04-12): +1. Added Stage 4 answer-contract audit module: + - `assistantStage4AnswerContractAudit.ts` + - computes presence/missing of required Stage 4 blocks and detects legacy section leakage. +2. Integrated audit into deep-lane observability artifacts: + - `debug.answer_contract_stage4_v1` + - processed log details `answer_contract_stage4_v1` +3. Added focused unit coverage: + - `assistantDebugPayloadAssembler.test.ts` (positive and non-Stage4 legacy case) + - `assistantMessageLogAssembler.test.ts` +4. Validation snapshot: + - focused assembler pack passed: `2 files / 5 tests`; + - `npm --prefix llm_normalizer/backend run build` passed. + +Status: In progress (Stage 4.1-4.5 completed; continue with focused wave/manual-comment quality backlog) ## Stage 5 (P3): Quality Loop Driven By GUI Markup diff --git a/llm_normalizer/backend/dist/services/assistantDebugPayloadAssembler.js b/llm_normalizer/backend/dist/services/assistantDebugPayloadAssembler.js index bea494a..fba2ce4 100644 --- a/llm_normalizer/backend/dist/services/assistantDebugPayloadAssembler.js +++ b/llm_normalizer/backend/dist/services/assistantDebugPayloadAssembler.js @@ -1,6 +1,7 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.buildDeepAnalysisDebugPayload = buildDeepAnalysisDebugPayload; +const assistantStage4AnswerContractAudit_1 = require("./assistantStage4AnswerContractAudit"); function toAnalysisContext(input) { if (!input.active) { return null; @@ -15,6 +16,7 @@ function toAnalysisContext(input) { } function buildDeepAnalysisDebugPayload(input) { const analysisContext = toAnalysisContext(input.runtimeAnalysisContext); + const answerContractStage4Audit = (0, assistantStage4AnswerContractAudit_1.buildStage4AnswerContractAuditV1)(input.assistantReply); return { trace_id: input.traceId, prompt_version: input.promptVersion, @@ -86,6 +88,7 @@ function buildDeepAnalysisDebugPayload(input) { orchestration_contract_v1: input.addressRuntimeMetaForDeep?.orchestrationContract ?? null, assistant_outcome_class_v1: input.outcomeClassV1, assistant_orchestration_contracts_v1: input.assistantOrchestrationContractsV1, + answer_contract_stage4_v1: answerContractStage4Audit, answer_structure_v11: input.answerStructureV11, investigation_state_snapshot: input.investigationStateSnapshot, normalized: input.normalizedPayload diff --git a/llm_normalizer/backend/dist/services/assistantDeepTurnPackaging.js b/llm_normalizer/backend/dist/services/assistantDeepTurnPackaging.js index 8372b8b..9bd2c10 100644 --- a/llm_normalizer/backend/dist/services/assistantDeepTurnPackaging.js +++ b/llm_normalizer/backend/dist/services/assistantDeepTurnPackaging.js @@ -81,6 +81,7 @@ function assembleAssistantDeepTurnPackaging(input) { outcomeClassV1: contractsBundleV1.outcomeClassV1, assistantOrchestrationContractsV1: contractsBundleV1.assistantOrchestrationContractsV1, answerStructureV11: deepAnswerArtifacts.answerStructureV11, + assistantReply: deepAnswerArtifacts.safeAssistantReply, investigationStateSnapshot: input.investigationStateSnapshot, normalizedPayload: normalizedPayload }); diff --git a/llm_normalizer/backend/dist/services/assistantMessageLogAssembler.js b/llm_normalizer/backend/dist/services/assistantMessageLogAssembler.js index 28e113e..be54dd5 100644 --- a/llm_normalizer/backend/dist/services/assistantMessageLogAssembler.js +++ b/llm_normalizer/backend/dist/services/assistantMessageLogAssembler.js @@ -1,6 +1,7 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.buildDeepAnalysisProcessedLogDetails = buildDeepAnalysisProcessedLogDetails; +const assistantStage4AnswerContractAudit_1 = require("./assistantStage4AnswerContractAudit"); function toAnalysisContext(input) { if (!input.active) { return null; @@ -22,6 +23,7 @@ function resolveCoverageStatus(coverageReport) { } function buildDeepAnalysisProcessedLogDetails(input) { const analysisContext = toAnalysisContext(input.runtimeAnalysisContext); + const answerContractStage4Audit = (0, assistantStage4AnswerContractAudit_1.buildStage4AnswerContractAuditV1)(input.assistantReply); return { session_id: input.sessionId, message_id: input.messageId, @@ -92,6 +94,7 @@ function buildDeepAnalysisProcessedLogDetails(input) { : {}), assistant_outcome_class_v1: input.outcomeClassV1, assistant_orchestration_contracts_v1: input.assistantOrchestrationContractsV1, + answer_contract_stage4_v1: answerContractStage4Audit, answer_structure_v11: input.answerStructureV11, investigation_state_snapshot: input.investigationStateSnapshot, fallback_type: input.compositionDebug.fallback_type, diff --git a/llm_normalizer/backend/dist/services/assistantStage4AnswerContractAudit.js b/llm_normalizer/backend/dist/services/assistantStage4AnswerContractAudit.js new file mode 100644 index 0000000..78bfce6 --- /dev/null +++ b/llm_normalizer/backend/dist/services/assistantStage4AnswerContractAudit.js @@ -0,0 +1,38 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.buildStage4AnswerContractAuditV1 = buildStage4AnswerContractAuditV1; +const STAGE4_EXPECTED_BLOCKS = [ + "Коротко", + "Что именно проверено", + "Что найдено", + "Что пока не доказано" +]; +const STAGE4_NEXT_STEP_BLOCKS = ["Что проверить первым", "Что могу сделать сейчас"]; +const LEGACY_BLOCKS = [ + "Что сломано", + "Почему это похоже на проблему", + "На чем это основано", + "Ограничения" +]; +function hasBlock(reply, title) { + return reply.includes(`${title}:`); +} +function buildStage4AnswerContractAuditV1(assistantReply) { + const reply = String(assistantReply ?? ""); + const expected = [...STAGE4_EXPECTED_BLOCKS, "Что проверить первым/Что могу сделать сейчас"]; + const present = STAGE4_EXPECTED_BLOCKS.filter((title) => hasBlock(reply, title)); + const hasPrimaryNextStep = STAGE4_NEXT_STEP_BLOCKS.some((title) => hasBlock(reply, title)); + if (hasPrimaryNextStep) { + present.push("Что проверить первым/Что могу сделать сейчас"); + } + const missing = expected.filter((title) => !present.includes(title)); + const legacyPresent = LEGACY_BLOCKS.filter((title) => hasBlock(reply, title)); + return { + schema_version: "stage4_answer_contract_audit_v1", + stage4_blocks_expected: expected, + stage4_blocks_present: present, + stage4_blocks_missing: missing, + legacy_blocks_present: legacyPresent, + is_stage4_shape: missing.length === 0 && legacyPresent.length === 0 + }; +} diff --git a/llm_normalizer/backend/src/services/assistantDebugPayloadAssembler.ts b/llm_normalizer/backend/src/services/assistantDebugPayloadAssembler.ts index 8510c84..a274820 100644 --- a/llm_normalizer/backend/src/services/assistantDebugPayloadAssembler.ts +++ b/llm_normalizer/backend/src/services/assistantDebugPayloadAssembler.ts @@ -23,6 +23,7 @@ import type { GroundedAnswerEligibilityAudit, TemporalGuardAudit } from "./assistantRuntimeGuards"; +import { buildStage4AnswerContractAuditV1 } from "./assistantStage4AnswerContractAudit"; type RetrievalStatusItem = AssistantDebugPayload["retrieval_status"][number]; @@ -75,6 +76,7 @@ export interface DeepAnalysisDebugPayloadInput { outcomeClassV1: AssistantOutcomeClassV1; assistantOrchestrationContractsV1: AssistantContractsBundleV1["assistantOrchestrationContractsV1"]; answerStructureV11: AnswerStructureV11 | null; + assistantReply: string; investigationStateSnapshot: InvestigationStateWithProblemUnits | null; normalizedPayload: NormalizeResponsePayload["normalized"]; } @@ -94,6 +96,7 @@ function toAnalysisContext(input: DeepAnalysisDebugPayloadInput["runtimeAnalysis export function buildDeepAnalysisDebugPayload(input: DeepAnalysisDebugPayloadInput): AssistantDebugPayload { const analysisContext = toAnalysisContext(input.runtimeAnalysisContext); + const answerContractStage4Audit = buildStage4AnswerContractAuditV1(input.assistantReply); return { trace_id: input.traceId, prompt_version: input.promptVersion, @@ -165,6 +168,7 @@ export function buildDeepAnalysisDebugPayload(input: DeepAnalysisDebugPayloadInp orchestration_contract_v1: input.addressRuntimeMetaForDeep?.orchestrationContract ?? null, assistant_outcome_class_v1: input.outcomeClassV1, assistant_orchestration_contracts_v1: input.assistantOrchestrationContractsV1, + answer_contract_stage4_v1: answerContractStage4Audit, answer_structure_v11: input.answerStructureV11, investigation_state_snapshot: input.investigationStateSnapshot, normalized: input.normalizedPayload diff --git a/llm_normalizer/backend/src/services/assistantDeepTurnPackaging.ts b/llm_normalizer/backend/src/services/assistantDeepTurnPackaging.ts index 2794554..8170de0 100644 --- a/llm_normalizer/backend/src/services/assistantDeepTurnPackaging.ts +++ b/llm_normalizer/backend/src/services/assistantDeepTurnPackaging.ts @@ -216,6 +216,7 @@ export function assembleAssistantDeepTurnPackaging(input: AssistantDeepTurnPacka outcomeClassV1: contractsBundleV1.outcomeClassV1, assistantOrchestrationContractsV1: contractsBundleV1.assistantOrchestrationContractsV1, answerStructureV11: deepAnswerArtifacts.answerStructureV11, + assistantReply: deepAnswerArtifacts.safeAssistantReply, investigationStateSnapshot: input.investigationStateSnapshot, normalizedPayload: normalizedPayload }); diff --git a/llm_normalizer/backend/src/services/assistantMessageLogAssembler.ts b/llm_normalizer/backend/src/services/assistantMessageLogAssembler.ts index af51cf5..2878d6b 100644 --- a/llm_normalizer/backend/src/services/assistantMessageLogAssembler.ts +++ b/llm_normalizer/backend/src/services/assistantMessageLogAssembler.ts @@ -28,6 +28,7 @@ import type { GroundedAnswerEligibilityAudit, TemporalGuardAudit } from "./assistantRuntimeGuards"; +import { buildStage4AnswerContractAuditV1 } from "./assistantStage4AnswerContractAudit"; export interface DeepAnalysisMessageLogDetailsInput { sessionId: string; @@ -110,6 +111,7 @@ export type DeepAnalysisLogDetails = Record; export function buildDeepAnalysisProcessedLogDetails(input: DeepAnalysisMessageLogDetailsInput): DeepAnalysisLogDetails { const analysisContext = toAnalysisContext(input.runtimeAnalysisContext); + const answerContractStage4Audit = buildStage4AnswerContractAuditV1(input.assistantReply); return { session_id: input.sessionId, message_id: input.messageId, @@ -180,6 +182,7 @@ export function buildDeepAnalysisProcessedLogDetails(input: DeepAnalysisMessageL : {}), assistant_outcome_class_v1: input.outcomeClassV1, assistant_orchestration_contracts_v1: input.assistantOrchestrationContractsV1, + answer_contract_stage4_v1: answerContractStage4Audit, answer_structure_v11: input.answerStructureV11, investigation_state_snapshot: input.investigationStateSnapshot, fallback_type: input.compositionDebug.fallback_type, diff --git a/llm_normalizer/backend/src/services/assistantStage4AnswerContractAudit.ts b/llm_normalizer/backend/src/services/assistantStage4AnswerContractAudit.ts new file mode 100644 index 0000000..57cf4fd --- /dev/null +++ b/llm_normalizer/backend/src/services/assistantStage4AnswerContractAudit.ts @@ -0,0 +1,51 @@ +export interface Stage4AnswerContractAuditV1 { + schema_version: "stage4_answer_contract_audit_v1"; + stage4_blocks_expected: string[]; + stage4_blocks_present: string[]; + stage4_blocks_missing: string[]; + legacy_blocks_present: string[]; + is_stage4_shape: boolean; +} + +const STAGE4_EXPECTED_BLOCKS = [ + "Коротко", + "Что именно проверено", + "Что найдено", + "Что пока не доказано" +] as const; + +const STAGE4_NEXT_STEP_BLOCKS = ["Что проверить первым", "Что могу сделать сейчас"] as const; + +const LEGACY_BLOCKS = [ + "Что сломано", + "Почему это похоже на проблему", + "На чем это основано", + "Ограничения" +] as const; + +function hasBlock(reply: string, title: string): boolean { + return reply.includes(`${title}:`); +} + +export function buildStage4AnswerContractAuditV1(assistantReply: string): Stage4AnswerContractAuditV1 { + const reply = String(assistantReply ?? ""); + const expected: string[] = [...STAGE4_EXPECTED_BLOCKS, "Что проверить первым/Что могу сделать сейчас"]; + + const present: string[] = STAGE4_EXPECTED_BLOCKS.filter((title) => hasBlock(reply, title)); + const hasPrimaryNextStep = STAGE4_NEXT_STEP_BLOCKS.some((title) => hasBlock(reply, title)); + if (hasPrimaryNextStep) { + present.push("Что проверить первым/Что могу сделать сейчас"); + } + + const missing = expected.filter((title) => !present.includes(title)); + const legacyPresent = LEGACY_BLOCKS.filter((title) => hasBlock(reply, title)); + + return { + schema_version: "stage4_answer_contract_audit_v1", + stage4_blocks_expected: expected, + stage4_blocks_present: present, + stage4_blocks_missing: missing, + legacy_blocks_present: legacyPresent, + is_stage4_shape: missing.length === 0 && legacyPresent.length === 0 + }; +} diff --git a/llm_normalizer/backend/src/types/assistant.ts b/llm_normalizer/backend/src/types/assistant.ts index 6d1e86b..2a7f96f 100644 --- a/llm_normalizer/backend/src/types/assistant.ts +++ b/llm_normalizer/backend/src/types/assistant.ts @@ -271,6 +271,15 @@ export interface GroundedAnswerEligibilityGuardDebug { reason_codes: string[]; } +export interface Stage4AnswerContractAuditV1 { + schema_version: "stage4_answer_contract_audit_v1"; + stage4_blocks_expected: string[]; + stage4_blocks_present: string[]; + stage4_blocks_missing: string[]; + legacy_blocks_present: string[]; + is_stage4_shape: boolean; +} + export interface RbpLiveRouteAuditDebug { claim_type: "prove_rbp_tail_state"; required_live_calls: string[]; @@ -470,6 +479,7 @@ export interface AssistantDebugPayload { problem_units_used_count?: number; problem_answer_mode?: AssistantProblemAnswerMode; problem_unit_ids_used?: string[]; + answer_contract_stage4_v1?: Stage4AnswerContractAuditV1; answer_structure_v11: AnswerStructureV11 | null; investigation_state_snapshot: InvestigationStateWithProblemUnits | null; normalized: NormalizeResponsePayload["normalized"]; diff --git a/llm_normalizer/backend/tests/assistantDebugPayloadAssembler.test.ts b/llm_normalizer/backend/tests/assistantDebugPayloadAssembler.test.ts index 2c296c6..210d6ea 100644 --- a/llm_normalizer/backend/tests/assistantDebugPayloadAssembler.test.ts +++ b/llm_normalizer/backend/tests/assistantDebugPayloadAssembler.test.ts @@ -90,6 +90,13 @@ function baseInput() { outcomeClassV1: "FULLY_ANSWERED", assistantOrchestrationContractsV1: { query_frame: {}, execution_plan: {}, evidence_bundle: {}, coverage: {} }, answerStructureV11: { schema_version: "answer_structure_v1_1" }, + assistantReply: [ + "Коротко: Признак проблемы подтвержден частично.", + "Что именно проверено:\n- Проверен учетный контур.", + "Что найдено:\n- Есть признак разрыва цепочки.", + "Что пока не доказано:\n- Не хватает части подтверждений.", + "Что проверить первым:\n- Уточнить период." + ].join("\n\n"), investigationStateSnapshot: { status: "active" }, normalizedPayload: { schema_version: "normalized_query_v2_0_2" } }; @@ -108,6 +115,7 @@ describe("assistant debug payload assembler", () => { expect(payload.problem_unit_ids_used).toEqual(["pu-1", "pu-2"]); expect(payload.address_llm_predecompose_applied).toBe(true); expect(payload.assistant_outcome_class_v1).toBe("FULLY_ANSWERED"); + expect(payload.answer_contract_stage4_v1?.is_stage4_shape).toBe(true); }); it("omits optional fields when they are not provided", () => { @@ -127,4 +135,19 @@ describe("assistant debug payload assembler", () => { expect(payload.address_llm_predecompose_applied).toBe(false); expect(payload.address_llm_predecompose_contract).toBeNull(); }); + + it("marks non-stage4 answer shapes in contract audit", () => { + const input = baseInput(); + input.assistantReply = [ + "Коротко: Есть проблема.", + "Что сломано:\n- Разрыв перехода.", + "Ограничения:\n- Частичная опора." + ].join("\n\n"); + + const payload = buildDeepAnalysisDebugPayload(input); + + expect(payload.answer_contract_stage4_v1?.is_stage4_shape).toBe(false); + expect(payload.answer_contract_stage4_v1?.legacy_blocks_present).toContain("Что сломано"); + expect(payload.answer_contract_stage4_v1?.legacy_blocks_present).toContain("Ограничения"); + }); }); diff --git a/llm_normalizer/backend/tests/assistantMessageLogAssembler.test.ts b/llm_normalizer/backend/tests/assistantMessageLogAssembler.test.ts index 7a904c5..19a5b03 100644 --- a/llm_normalizer/backend/tests/assistantMessageLogAssembler.test.ts +++ b/llm_normalizer/backend/tests/assistantMessageLogAssembler.test.ts @@ -89,7 +89,13 @@ function baseInput() { assistantOrchestrationContractsV1: { query_frame: {}, execution_plan: {}, evidence_bundle: {}, coverage: {} }, answerStructureV11: { schema_version: "answer_structure_v1_1" }, investigationStateSnapshot: { status: "active" }, - assistantReply: "ok", + assistantReply: [ + "Коротко: Признак проблемы подтвержден частично.", + "Что именно проверено:\n- Проверен учетный контур.", + "Что найдено:\n- Есть признак разрыва цепочки.", + "Что пока не доказано:\n- Не хватает части подтверждений.", + "Что проверить первым:\n- Уточнить период." + ].join("\n\n"), traceId: "trace-1" }; } @@ -104,6 +110,7 @@ describe("assistant message log assembler", () => { }); expect(details.problem_unit_ids_used).toEqual(["pu-1"]); expect(details.reply_type).toBe("factual"); + expect((details.answer_contract_stage4_v1 as { is_stage4_shape?: boolean } | undefined)?.is_stage4_shape).toBe(true); }); it("marks partial coverage and omits optional sections when empty", () => {