NODEDC_1C/llm_normalizer/backend/tests/assistantAnswerPolicyV11.te...

296 lines
11 KiB
TypeScript

import request from "supertest";
import { afterEach, describe, expect, it, vi } from "vitest";
import { composeAssistantAnswer } from "../src/services/answerComposer";
import type { UnifiedRetrievalResult } from "../src/types/assistant";
const FLAG_KEYS = [
"FEATURE_ASSISTANT_ANSWER_POLICY_V11",
"FEATURE_ASSISTANT_BROAD_GUARD_V1",
"FEATURE_ASSISTANT_MIN_EVIDENCE_GATE_V1",
"FEATURE_ASSISTANT_ANTI_GENERIC_RANKING_GUARD_V1"
] as const;
const ORIGINAL_FLAGS: Record<string, string | undefined> = Object.fromEntries(
FLAG_KEYS.map((key) => [key, process.env[key]])
);
function restoreFlags(): void {
for (const key of FLAG_KEYS) {
const original = ORIGINAL_FLAGS[key];
if (original === undefined) {
delete process.env[key];
} else {
process.env[key] = original;
}
}
}
async function createAppWithFlags(flags: {
answerPolicy: "0" | "1";
broad: "0" | "1";
evidenceGate: "0" | "1";
antiGeneric: "0" | "1";
}) {
process.env.FEATURE_ASSISTANT_ANSWER_POLICY_V11 = flags.answerPolicy;
process.env.FEATURE_ASSISTANT_BROAD_GUARD_V1 = flags.broad;
process.env.FEATURE_ASSISTANT_MIN_EVIDENCE_GATE_V1 = flags.evidenceGate;
process.env.FEATURE_ASSISTANT_ANTI_GENERIC_RANKING_GUARD_V1 = flags.antiGeneric;
vi.resetModules();
const { createApp } = await import("../src/server");
return createApp();
}
function firstRoutedResult(body: Record<string, unknown>): Record<string, unknown> | null {
const retrieval = Array.isArray((body.debug as { retrieval_results?: unknown[] } | undefined)?.retrieval_results)
? ((body.debug as { retrieval_results?: unknown[] }).retrieval_results as Record<string, unknown>[])
: [];
return retrieval.find((item) => String(item.route ?? "") !== "no_route") ?? null;
}
describe.sequential("assistant answer policy v1.1", () => {
afterEach(() => {
restoreFlags();
vi.resetModules();
});
it("keeps focused grounded answer direct and useful", async () => {
const app = await createAppWithFlags({
answerPolicy: "1",
broad: "1",
evidenceGate: "1",
antiGeneric: "1"
});
const response = await request(app).post("/api/assistant/message").send({
useMock: true,
promptVersion: "normalizer_v2_0_2",
user_message: "Проверь счет 97 за 2020-06 по документам и выдели отклонения."
});
expect(response.status).toBe(200);
expect(["factual_with_explanation", "partial_coverage"]).toContain(response.body.reply_type);
expect(String(response.body.assistant_reply)).toContain("Коротко:");
expect(String(response.body.assistant_reply)).toContain("Что сломано:");
expect(String(response.body.assistant_reply)).toContain("Ограничения:");
const structure = response.body.debug?.answer_structure_v11;
expect(structure?.mechanism_block).toBeTruthy();
expect(["grounded", "limited", "unresolved"]).toContain(structure?.mechanism_block?.status);
const routed = firstRoutedResult(response.body);
const summary = (routed?.summary as Record<string, unknown>) ?? {};
expect(summary.minimum_evidence_failed).not.toBe(true);
});
it("renders broad partial answer with explicit limitations and concrete next steps", async () => {
const app = await createAppWithFlags({
answerPolicy: "1",
broad: "1",
evidenceGate: "1",
antiGeneric: "1"
});
const response = await request(app).post("/api/assistant/message").send({
useMock: true,
promptVersion: "normalizer_v2_0_2",
user_message: "Покажи в целом общую картину и топ рисков по документам за июнь 2020."
});
expect(response.status).toBe(200);
expect(response.body.reply_type).toBe("partial_coverage");
expect(String(response.body.assistant_reply)).toContain("Ограничения:");
expect(String(response.body.assistant_reply)).toContain("Что проверить первым:");
const structure = response.body.debug?.answer_structure_v11;
expect(typeof structure?.answer_summary).toBe("string");
expect(String(structure?.answer_summary).length).toBeGreaterThan(15);
expect(Array.isArray(structure?.uncertainty_block?.limitations)).toBe(true);
expect(structure?.uncertainty_block?.limitations?.length).toBeGreaterThan(0);
expect(Array.isArray(structure?.next_step_block?.recommended_actions)).toBe(true);
expect(structure?.next_step_block?.recommended_actions?.length).toBeGreaterThan(0);
});
it("uses domain-specific clarification prompts when support is insufficient", async () => {
const app = await createAppWithFlags({
answerPolicy: "1",
broad: "1",
evidenceGate: "1",
antiGeneric: "1"
});
const response = await request(app).post("/api/assistant/message").send({
useMock: true,
promptVersion: "normalizer_v2_0_2",
user_message: "Что не так по документ #123?"
});
expect(response.status).toBe(200);
expect(response.body.reply_type).toBe("clarification_required");
const structure = response.body.debug?.answer_structure_v11;
const clarifications = structure?.next_step_block?.clarification_questions ?? [];
expect(Array.isArray(clarifications)).toBe(true);
expect(clarifications.length).toBeGreaterThan(0);
expect(
clarifications.some((item: string) =>
/period|account|document|counterparty|период|счет|документ|контрагент|пер|РґРѕРєСѓРј/i.test(String(item))
)
).toBe(true);
expect(String(response.body.assistant_reply)).toContain("Что проверить первым:");
expect(String(response.body.assistant_reply)).toMatch(/уточните|период|счет|документ|контрагент/i);
});
it("does not fabricate mechanism when mechanism_note is unresolved", () => {
const retrievalResult: UnifiedRetrievalResult = {
fragment_id: "F1",
requirement_ids: ["R1"],
route: "store_feature_risk",
status: "ok",
result_type: "list",
items: [{ source_entity: "Document", source_id: "doc-weak-1" }],
summary: {
broad_query_detected: false,
broad_result_flag: false,
minimum_evidence_failed: false,
narrowing_strength: "strong"
},
evidence: [
{
evidence_id: "ev-weak",
claim_ref: "requirement:R1",
source_type: "retrieval_item",
source_ref: {
schema_version: "evidence_source_ref_v1",
namespace: "snapshot_2020",
entity: "document",
id: "doc-weak-1",
period: "2020-06",
canonical_ref: "evidence_source_ref_v1|snapshot_2020|document|doc-weak-1|2020-06"
},
pointer: {
fragment_id: "F1",
route: "store_feature_risk",
source: {
namespace: "snapshot_2020",
entity: "document",
id: "doc-weak-1",
period: "2020-06"
},
locator: {
field_path: "risk_score",
item_index: 0
}
},
evidence_kind: "anomaly_signal",
mechanism_note: null,
confidence: "low",
limitation: {
reason_code: "missing_mechanism",
note: "Mechanism could not be resolved."
},
payload: {
risk_score: 1
}
}
],
why_included: ["synthetic-test"],
selection_reason: ["synthetic-test"],
risk_factors: [],
business_interpretation: [],
confidence: "low",
limitations: ["Weak mechanism evidence."],
errors: []
};
const output = composeAssistantAnswer({
userMessage: "Проверь риск по документу doc-weak-1 за 2020-06.",
routeSummary: {
mode: "deterministic_v2",
message_in_scope: true,
scope_confidence: "high",
planner: {
total_fragments: 1,
in_scope_fragments: 1,
out_of_scope_fragments: 0,
discarded_fragments: 0,
contains_multiple_tasks: false
},
decisions: [],
fallback: {
type: "none",
message: null
}
},
retrievalResults: [retrievalResult],
requirements: [
{
requirement_id: "R1",
source_fragment_id: "F1",
requirement_text: "Проверить риск документа",
subject_tokens: ["документ"],
status: "covered",
route: "store_feature_risk"
}
],
coverageReport: {
requirements_total: 1,
requirements_covered: 1,
requirements_uncovered: [],
requirements_partially_covered: [],
clarification_needed_for: [],
out_of_scope_requirements: []
},
groundingCheck: {
status: "grounded",
route_subject_match: true,
missing_requirements: [],
reasons: [],
why_included_summary: ["synthetic-test"],
selection_reason_summary: ["synthetic-test"]
},
enableAnswerPolicyV11: true
});
expect(output.answer_structure_v11?.mechanism_block?.status).toBe("unresolved");
expect(output.answer_structure_v11?.mechanism_block?.mechanism_notes).toEqual([]);
expect(output.answer_structure_v11?.mechanism_block?.limitation_reason_codes).toContain("missing_mechanism");
expect(output.assistant_reply).toContain("Ограничения:");
expect(output.assistant_reply).not.toMatch(/mechanism_note|source_ref|canonical_ref|route|profile/i);
});
it("preserves legacy reply path when policy flag is OFF", async () => {
const appLegacy = await createAppWithFlags({
answerPolicy: "0",
broad: "1",
evidenceGate: "1",
antiGeneric: "1"
});
const legacy = await request(appLegacy).post("/api/assistant/message").send({
useMock: true,
promptVersion: "normalizer_v2_0_2",
user_message: "Проверь счет 97 за 2020-06 по документам и выдели отклонения."
});
expect(legacy.status).toBe(200);
expect(String(legacy.body.assistant_reply)).not.toContain("Что сломано:");
const appPolicy = await createAppWithFlags({
answerPolicy: "1",
broad: "1",
evidenceGate: "1",
antiGeneric: "1"
});
const policy = await request(appPolicy).post("/api/assistant/message").send({
useMock: true,
promptVersion: "normalizer_v2_0_2",
user_message: "Проверь счет 97 за 2020-06 по документам и выдели отклонения."
});
expect(policy.status).toBe(200);
expect(String(policy.body.assistant_reply)).toContain("Что сломано:");
expect(String(policy.body.assistant_reply)).not.toBe(String(legacy.body.assistant_reply));
});
});