ГЛОБАЛЬНЫЙ РЕФАКТОРИНГ АРХИТЕКТУРЫ - Этап 4.5: добавлен аудит контракта Stage 4 в debug/log и закрыт расширенный quality-loop
This commit is contained in:
parent
88a7da4f0a
commit
963f0aa372
|
|
@ -2679,7 +2679,37 @@ Implemented in current pass (Stage 4.3 contract consistency hardening, 2026-04-1
|
|||
- focused Stage 4 contract pack passed: `4 files / 14 tests` (`assistantStage4AnswerContractShape`, `assistantBoundaryFallbackReply`, `assistantSoftPolicyReply`, `assistantAnswerPolicyV11`);
|
||||
- `npm --prefix llm_normalizer/backend run build` passed.
|
||||
|
||||
Status: In progress (Stage 4.1-4.3 completed; continue with quality loop on real runs/manual comments)
|
||||
Implemented in current pass (Stage 4.4 quality-loop verification on real/manual scenarios, 2026-04-12):
|
||||
1. Ran expanded regression gate aligned with Stage 3 exit + Stage 4 contract checks:
|
||||
- `addressQueryRuntimeM23`
|
||||
- `assistantWave17RunRegression20260411`
|
||||
- `assistantWave18ManualCommentsRegression`
|
||||
- `assistantLivingRouter`
|
||||
- `assistantLivingChatMode`
|
||||
- `assistantSoftPolicyReply`
|
||||
- `assistantBoundaryFallbackReply`
|
||||
- `assistantAnswerPolicyV11`
|
||||
- `assistantSemanticExtractionContract`
|
||||
- `assistantStage4AnswerContractShape`
|
||||
2. Validation snapshot:
|
||||
- expanded quality-loop pack passed: `10 files / 359 tests`;
|
||||
- `npm --prefix llm_normalizer/backend run build` passed.
|
||||
|
||||
Implemented in current pass (Stage 4.5 contract observability in debug/log, 2026-04-12):
|
||||
1. Added Stage 4 answer-contract audit module:
|
||||
- `assistantStage4AnswerContractAudit.ts`
|
||||
- computes presence/missing of required Stage 4 blocks and detects legacy section leakage.
|
||||
2. Integrated audit into deep-lane observability artifacts:
|
||||
- `debug.answer_contract_stage4_v1`
|
||||
- processed log details `answer_contract_stage4_v1`
|
||||
3. Added focused unit coverage:
|
||||
- `assistantDebugPayloadAssembler.test.ts` (positive and non-Stage4 legacy case)
|
||||
- `assistantMessageLogAssembler.test.ts`
|
||||
4. Validation snapshot:
|
||||
- focused assembler pack passed: `2 files / 5 tests`;
|
||||
- `npm --prefix llm_normalizer/backend run build` passed.
|
||||
|
||||
Status: In progress (Stage 4.1-4.5 completed; continue with focused wave/manual-comment quality backlog)
|
||||
|
||||
## Stage 5 (P3): Quality Loop Driven By GUI Markup
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.buildDeepAnalysisDebugPayload = buildDeepAnalysisDebugPayload;
|
||||
const assistantStage4AnswerContractAudit_1 = require("./assistantStage4AnswerContractAudit");
|
||||
function toAnalysisContext(input) {
|
||||
if (!input.active) {
|
||||
return null;
|
||||
|
|
@ -15,6 +16,7 @@ function toAnalysisContext(input) {
|
|||
}
|
||||
function buildDeepAnalysisDebugPayload(input) {
|
||||
const analysisContext = toAnalysisContext(input.runtimeAnalysisContext);
|
||||
const answerContractStage4Audit = (0, assistantStage4AnswerContractAudit_1.buildStage4AnswerContractAuditV1)(input.assistantReply);
|
||||
return {
|
||||
trace_id: input.traceId,
|
||||
prompt_version: input.promptVersion,
|
||||
|
|
@ -86,6 +88,7 @@ function buildDeepAnalysisDebugPayload(input) {
|
|||
orchestration_contract_v1: input.addressRuntimeMetaForDeep?.orchestrationContract ?? null,
|
||||
assistant_outcome_class_v1: input.outcomeClassV1,
|
||||
assistant_orchestration_contracts_v1: input.assistantOrchestrationContractsV1,
|
||||
answer_contract_stage4_v1: answerContractStage4Audit,
|
||||
answer_structure_v11: input.answerStructureV11,
|
||||
investigation_state_snapshot: input.investigationStateSnapshot,
|
||||
normalized: input.normalizedPayload
|
||||
|
|
|
|||
|
|
@ -81,6 +81,7 @@ function assembleAssistantDeepTurnPackaging(input) {
|
|||
outcomeClassV1: contractsBundleV1.outcomeClassV1,
|
||||
assistantOrchestrationContractsV1: contractsBundleV1.assistantOrchestrationContractsV1,
|
||||
answerStructureV11: deepAnswerArtifacts.answerStructureV11,
|
||||
assistantReply: deepAnswerArtifacts.safeAssistantReply,
|
||||
investigationStateSnapshot: input.investigationStateSnapshot,
|
||||
normalizedPayload: normalizedPayload
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.buildDeepAnalysisProcessedLogDetails = buildDeepAnalysisProcessedLogDetails;
|
||||
const assistantStage4AnswerContractAudit_1 = require("./assistantStage4AnswerContractAudit");
|
||||
function toAnalysisContext(input) {
|
||||
if (!input.active) {
|
||||
return null;
|
||||
|
|
@ -22,6 +23,7 @@ function resolveCoverageStatus(coverageReport) {
|
|||
}
|
||||
function buildDeepAnalysisProcessedLogDetails(input) {
|
||||
const analysisContext = toAnalysisContext(input.runtimeAnalysisContext);
|
||||
const answerContractStage4Audit = (0, assistantStage4AnswerContractAudit_1.buildStage4AnswerContractAuditV1)(input.assistantReply);
|
||||
return {
|
||||
session_id: input.sessionId,
|
||||
message_id: input.messageId,
|
||||
|
|
@ -92,6 +94,7 @@ function buildDeepAnalysisProcessedLogDetails(input) {
|
|||
: {}),
|
||||
assistant_outcome_class_v1: input.outcomeClassV1,
|
||||
assistant_orchestration_contracts_v1: input.assistantOrchestrationContractsV1,
|
||||
answer_contract_stage4_v1: answerContractStage4Audit,
|
||||
answer_structure_v11: input.answerStructureV11,
|
||||
investigation_state_snapshot: input.investigationStateSnapshot,
|
||||
fallback_type: input.compositionDebug.fallback_type,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,38 @@
|
|||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.buildStage4AnswerContractAuditV1 = buildStage4AnswerContractAuditV1;
|
||||
const STAGE4_EXPECTED_BLOCKS = [
|
||||
"Коротко",
|
||||
"Что именно проверено",
|
||||
"Что найдено",
|
||||
"Что пока не доказано"
|
||||
];
|
||||
const STAGE4_NEXT_STEP_BLOCKS = ["Что проверить первым", "Что могу сделать сейчас"];
|
||||
const LEGACY_BLOCKS = [
|
||||
"Что сломано",
|
||||
"Почему это похоже на проблему",
|
||||
"На чем это основано",
|
||||
"Ограничения"
|
||||
];
|
||||
function hasBlock(reply, title) {
|
||||
return reply.includes(`${title}:`);
|
||||
}
|
||||
function buildStage4AnswerContractAuditV1(assistantReply) {
|
||||
const reply = String(assistantReply ?? "");
|
||||
const expected = [...STAGE4_EXPECTED_BLOCKS, "Что проверить первым/Что могу сделать сейчас"];
|
||||
const present = STAGE4_EXPECTED_BLOCKS.filter((title) => hasBlock(reply, title));
|
||||
const hasPrimaryNextStep = STAGE4_NEXT_STEP_BLOCKS.some((title) => hasBlock(reply, title));
|
||||
if (hasPrimaryNextStep) {
|
||||
present.push("Что проверить первым/Что могу сделать сейчас");
|
||||
}
|
||||
const missing = expected.filter((title) => !present.includes(title));
|
||||
const legacyPresent = LEGACY_BLOCKS.filter((title) => hasBlock(reply, title));
|
||||
return {
|
||||
schema_version: "stage4_answer_contract_audit_v1",
|
||||
stage4_blocks_expected: expected,
|
||||
stage4_blocks_present: present,
|
||||
stage4_blocks_missing: missing,
|
||||
legacy_blocks_present: legacyPresent,
|
||||
is_stage4_shape: missing.length === 0 && legacyPresent.length === 0
|
||||
};
|
||||
}
|
||||
|
|
@ -23,6 +23,7 @@ import type {
|
|||
GroundedAnswerEligibilityAudit,
|
||||
TemporalGuardAudit
|
||||
} from "./assistantRuntimeGuards";
|
||||
import { buildStage4AnswerContractAuditV1 } from "./assistantStage4AnswerContractAudit";
|
||||
|
||||
type RetrievalStatusItem = AssistantDebugPayload["retrieval_status"][number];
|
||||
|
||||
|
|
@ -75,6 +76,7 @@ export interface DeepAnalysisDebugPayloadInput {
|
|||
outcomeClassV1: AssistantOutcomeClassV1;
|
||||
assistantOrchestrationContractsV1: AssistantContractsBundleV1["assistantOrchestrationContractsV1"];
|
||||
answerStructureV11: AnswerStructureV11 | null;
|
||||
assistantReply: string;
|
||||
investigationStateSnapshot: InvestigationStateWithProblemUnits | null;
|
||||
normalizedPayload: NormalizeResponsePayload["normalized"];
|
||||
}
|
||||
|
|
@ -94,6 +96,7 @@ function toAnalysisContext(input: DeepAnalysisDebugPayloadInput["runtimeAnalysis
|
|||
|
||||
export function buildDeepAnalysisDebugPayload(input: DeepAnalysisDebugPayloadInput): AssistantDebugPayload {
|
||||
const analysisContext = toAnalysisContext(input.runtimeAnalysisContext);
|
||||
const answerContractStage4Audit = buildStage4AnswerContractAuditV1(input.assistantReply);
|
||||
return {
|
||||
trace_id: input.traceId,
|
||||
prompt_version: input.promptVersion,
|
||||
|
|
@ -165,6 +168,7 @@ export function buildDeepAnalysisDebugPayload(input: DeepAnalysisDebugPayloadInp
|
|||
orchestration_contract_v1: input.addressRuntimeMetaForDeep?.orchestrationContract ?? null,
|
||||
assistant_outcome_class_v1: input.outcomeClassV1,
|
||||
assistant_orchestration_contracts_v1: input.assistantOrchestrationContractsV1,
|
||||
answer_contract_stage4_v1: answerContractStage4Audit,
|
||||
answer_structure_v11: input.answerStructureV11,
|
||||
investigation_state_snapshot: input.investigationStateSnapshot,
|
||||
normalized: input.normalizedPayload
|
||||
|
|
|
|||
|
|
@ -216,6 +216,7 @@ export function assembleAssistantDeepTurnPackaging(input: AssistantDeepTurnPacka
|
|||
outcomeClassV1: contractsBundleV1.outcomeClassV1,
|
||||
assistantOrchestrationContractsV1: contractsBundleV1.assistantOrchestrationContractsV1,
|
||||
answerStructureV11: deepAnswerArtifacts.answerStructureV11,
|
||||
assistantReply: deepAnswerArtifacts.safeAssistantReply,
|
||||
investigationStateSnapshot: input.investigationStateSnapshot,
|
||||
normalizedPayload: normalizedPayload
|
||||
});
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ import type {
|
|||
GroundedAnswerEligibilityAudit,
|
||||
TemporalGuardAudit
|
||||
} from "./assistantRuntimeGuards";
|
||||
import { buildStage4AnswerContractAuditV1 } from "./assistantStage4AnswerContractAudit";
|
||||
|
||||
export interface DeepAnalysisMessageLogDetailsInput {
|
||||
sessionId: string;
|
||||
|
|
@ -110,6 +111,7 @@ export type DeepAnalysisLogDetails = Record<string, unknown>;
|
|||
|
||||
export function buildDeepAnalysisProcessedLogDetails(input: DeepAnalysisMessageLogDetailsInput): DeepAnalysisLogDetails {
|
||||
const analysisContext = toAnalysisContext(input.runtimeAnalysisContext);
|
||||
const answerContractStage4Audit = buildStage4AnswerContractAuditV1(input.assistantReply);
|
||||
return {
|
||||
session_id: input.sessionId,
|
||||
message_id: input.messageId,
|
||||
|
|
@ -180,6 +182,7 @@ export function buildDeepAnalysisProcessedLogDetails(input: DeepAnalysisMessageL
|
|||
: {}),
|
||||
assistant_outcome_class_v1: input.outcomeClassV1,
|
||||
assistant_orchestration_contracts_v1: input.assistantOrchestrationContractsV1,
|
||||
answer_contract_stage4_v1: answerContractStage4Audit,
|
||||
answer_structure_v11: input.answerStructureV11,
|
||||
investigation_state_snapshot: input.investigationStateSnapshot,
|
||||
fallback_type: input.compositionDebug.fallback_type,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,51 @@
|
|||
export interface Stage4AnswerContractAuditV1 {
|
||||
schema_version: "stage4_answer_contract_audit_v1";
|
||||
stage4_blocks_expected: string[];
|
||||
stage4_blocks_present: string[];
|
||||
stage4_blocks_missing: string[];
|
||||
legacy_blocks_present: string[];
|
||||
is_stage4_shape: boolean;
|
||||
}
|
||||
|
||||
const STAGE4_EXPECTED_BLOCKS = [
|
||||
"Коротко",
|
||||
"Что именно проверено",
|
||||
"Что найдено",
|
||||
"Что пока не доказано"
|
||||
] as const;
|
||||
|
||||
const STAGE4_NEXT_STEP_BLOCKS = ["Что проверить первым", "Что могу сделать сейчас"] as const;
|
||||
|
||||
const LEGACY_BLOCKS = [
|
||||
"Что сломано",
|
||||
"Почему это похоже на проблему",
|
||||
"На чем это основано",
|
||||
"Ограничения"
|
||||
] as const;
|
||||
|
||||
function hasBlock(reply: string, title: string): boolean {
|
||||
return reply.includes(`${title}:`);
|
||||
}
|
||||
|
||||
export function buildStage4AnswerContractAuditV1(assistantReply: string): Stage4AnswerContractAuditV1 {
|
||||
const reply = String(assistantReply ?? "");
|
||||
const expected: string[] = [...STAGE4_EXPECTED_BLOCKS, "Что проверить первым/Что могу сделать сейчас"];
|
||||
|
||||
const present: string[] = STAGE4_EXPECTED_BLOCKS.filter((title) => hasBlock(reply, title));
|
||||
const hasPrimaryNextStep = STAGE4_NEXT_STEP_BLOCKS.some((title) => hasBlock(reply, title));
|
||||
if (hasPrimaryNextStep) {
|
||||
present.push("Что проверить первым/Что могу сделать сейчас");
|
||||
}
|
||||
|
||||
const missing = expected.filter((title) => !present.includes(title));
|
||||
const legacyPresent = LEGACY_BLOCKS.filter((title) => hasBlock(reply, title));
|
||||
|
||||
return {
|
||||
schema_version: "stage4_answer_contract_audit_v1",
|
||||
stage4_blocks_expected: expected,
|
||||
stage4_blocks_present: present,
|
||||
stage4_blocks_missing: missing,
|
||||
legacy_blocks_present: legacyPresent,
|
||||
is_stage4_shape: missing.length === 0 && legacyPresent.length === 0
|
||||
};
|
||||
}
|
||||
|
|
@ -271,6 +271,15 @@ export interface GroundedAnswerEligibilityGuardDebug {
|
|||
reason_codes: string[];
|
||||
}
|
||||
|
||||
export interface Stage4AnswerContractAuditV1 {
|
||||
schema_version: "stage4_answer_contract_audit_v1";
|
||||
stage4_blocks_expected: string[];
|
||||
stage4_blocks_present: string[];
|
||||
stage4_blocks_missing: string[];
|
||||
legacy_blocks_present: string[];
|
||||
is_stage4_shape: boolean;
|
||||
}
|
||||
|
||||
export interface RbpLiveRouteAuditDebug {
|
||||
claim_type: "prove_rbp_tail_state";
|
||||
required_live_calls: string[];
|
||||
|
|
@ -470,6 +479,7 @@ export interface AssistantDebugPayload {
|
|||
problem_units_used_count?: number;
|
||||
problem_answer_mode?: AssistantProblemAnswerMode;
|
||||
problem_unit_ids_used?: string[];
|
||||
answer_contract_stage4_v1?: Stage4AnswerContractAuditV1;
|
||||
answer_structure_v11: AnswerStructureV11 | null;
|
||||
investigation_state_snapshot: InvestigationStateWithProblemUnits | null;
|
||||
normalized: NormalizeResponsePayload["normalized"];
|
||||
|
|
|
|||
|
|
@ -90,6 +90,13 @@ function baseInput() {
|
|||
outcomeClassV1: "FULLY_ANSWERED",
|
||||
assistantOrchestrationContractsV1: { query_frame: {}, execution_plan: {}, evidence_bundle: {}, coverage: {} },
|
||||
answerStructureV11: { schema_version: "answer_structure_v1_1" },
|
||||
assistantReply: [
|
||||
"Коротко: Признак проблемы подтвержден частично.",
|
||||
"Что именно проверено:\n- Проверен учетный контур.",
|
||||
"Что найдено:\n- Есть признак разрыва цепочки.",
|
||||
"Что пока не доказано:\n- Не хватает части подтверждений.",
|
||||
"Что проверить первым:\n- Уточнить период."
|
||||
].join("\n\n"),
|
||||
investigationStateSnapshot: { status: "active" },
|
||||
normalizedPayload: { schema_version: "normalized_query_v2_0_2" }
|
||||
};
|
||||
|
|
@ -108,6 +115,7 @@ describe("assistant debug payload assembler", () => {
|
|||
expect(payload.problem_unit_ids_used).toEqual(["pu-1", "pu-2"]);
|
||||
expect(payload.address_llm_predecompose_applied).toBe(true);
|
||||
expect(payload.assistant_outcome_class_v1).toBe("FULLY_ANSWERED");
|
||||
expect(payload.answer_contract_stage4_v1?.is_stage4_shape).toBe(true);
|
||||
});
|
||||
|
||||
it("omits optional fields when they are not provided", () => {
|
||||
|
|
@ -127,4 +135,19 @@ describe("assistant debug payload assembler", () => {
|
|||
expect(payload.address_llm_predecompose_applied).toBe(false);
|
||||
expect(payload.address_llm_predecompose_contract).toBeNull();
|
||||
});
|
||||
|
||||
it("marks non-stage4 answer shapes in contract audit", () => {
|
||||
const input = baseInput();
|
||||
input.assistantReply = [
|
||||
"Коротко: Есть проблема.",
|
||||
"Что сломано:\n- Разрыв перехода.",
|
||||
"Ограничения:\n- Частичная опора."
|
||||
].join("\n\n");
|
||||
|
||||
const payload = buildDeepAnalysisDebugPayload(input);
|
||||
|
||||
expect(payload.answer_contract_stage4_v1?.is_stage4_shape).toBe(false);
|
||||
expect(payload.answer_contract_stage4_v1?.legacy_blocks_present).toContain("Что сломано");
|
||||
expect(payload.answer_contract_stage4_v1?.legacy_blocks_present).toContain("Ограничения");
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -89,7 +89,13 @@ function baseInput() {
|
|||
assistantOrchestrationContractsV1: { query_frame: {}, execution_plan: {}, evidence_bundle: {}, coverage: {} },
|
||||
answerStructureV11: { schema_version: "answer_structure_v1_1" },
|
||||
investigationStateSnapshot: { status: "active" },
|
||||
assistantReply: "ok",
|
||||
assistantReply: [
|
||||
"Коротко: Признак проблемы подтвержден частично.",
|
||||
"Что именно проверено:\n- Проверен учетный контур.",
|
||||
"Что найдено:\n- Есть признак разрыва цепочки.",
|
||||
"Что пока не доказано:\n- Не хватает части подтверждений.",
|
||||
"Что проверить первым:\n- Уточнить период."
|
||||
].join("\n\n"),
|
||||
traceId: "trace-1"
|
||||
};
|
||||
}
|
||||
|
|
@ -104,6 +110,7 @@ describe("assistant message log assembler", () => {
|
|||
});
|
||||
expect(details.problem_unit_ids_used).toEqual(["pu-1"]);
|
||||
expect(details.reply_type).toBe("factual");
|
||||
expect((details.answer_contract_stage4_v1 as { is_stage4_shape?: boolean } | undefined)?.is_stage4_shape).toBe(true);
|
||||
});
|
||||
|
||||
it("marks partial coverage and omits optional sections when empty", () => {
|
||||
|
|
|
|||
Loading…
Reference in New Issue