NODEDC_1C/llm_normalizer/backend/tests/assistantBroadGuard.test.ts

150 lines
5.4 KiB
TypeScript

import request from "supertest";
import { afterEach, describe, expect, it, vi } from "vitest";
const FLAG_KEYS = [
"FEATURE_ASSISTANT_BROAD_GUARD_V1",
"FEATURE_ASSISTANT_MIN_EVIDENCE_GATE_V1",
"FEATURE_ASSISTANT_ANTI_GENERIC_RANKING_GUARD_V1"
] as const;
const ORIGINAL_FLAGS: Record<string, string | undefined> = Object.fromEntries(
FLAG_KEYS.map((key) => [key, process.env[key]])
);
function restoreFlags(): void {
for (const key of FLAG_KEYS) {
const original = ORIGINAL_FLAGS[key];
if (original === undefined) {
delete process.env[key];
} else {
process.env[key] = original;
}
}
}
async function createAppWithFlags(flags: {
broad: "0" | "1";
evidenceGate: "0" | "1";
antiGeneric: "0" | "1";
}) {
process.env.FEATURE_ASSISTANT_BROAD_GUARD_V1 = flags.broad;
process.env.FEATURE_ASSISTANT_MIN_EVIDENCE_GATE_V1 = flags.evidenceGate;
process.env.FEATURE_ASSISTANT_ANTI_GENERIC_RANKING_GUARD_V1 = flags.antiGeneric;
vi.resetModules();
const { createApp } = await import("../src/server");
return createApp();
}
function firstRoutedResult(body: Record<string, unknown>): Record<string, unknown> | null {
const retrieval = Array.isArray((body.debug as { retrieval_results?: unknown[] } | undefined)?.retrieval_results)
? ((body.debug as { retrieval_results?: unknown[] }).retrieval_results as Record<string, unknown>[])
: [];
return retrieval.find((item) => String(item.route ?? "") !== "no_route") ?? null;
}
describe.sequential("assistant broad guard", () => {
afterEach(() => {
restoreFlags();
vi.resetModules();
});
it("keeps focused queries from degrading under broad guard", async () => {
const app = await createAppWithFlags({
broad: "1",
evidenceGate: "1",
antiGeneric: "1"
});
const response = await request(app).post("/api/assistant/message").send({
useMock: true,
promptVersion: "normalizer_v2_0_2",
user_message: "Проверь НДС по счету 19 за 2020-06 и рискованные записи по документам."
});
expect(response.status).toBe(200);
const routed = firstRoutedResult(response.body);
expect(routed).toBeTruthy();
const summary = (routed?.summary as Record<string, unknown>) ?? {};
expect(summary.broad_guard_applied).toBe(false);
expect(summary.minimum_evidence_failed).toBe(false);
expect(response.body.reply_type).not.toBe("clarification_required");
});
it("degrades broad ranking output to partial instead of deceptively strong factual", async () => {
const app = await createAppWithFlags({
broad: "1",
evidenceGate: "1",
antiGeneric: "1"
});
const response = await request(app).post("/api/assistant/message").send({
useMock: true,
promptVersion: "normalizer_v2_0_2",
user_message: "Покажи в целом общую картину и топ рисков по документам за июнь 2020."
});
expect(response.status).toBe(200);
const routed = firstRoutedResult(response.body);
expect(routed).toBeTruthy();
expect(routed?.route).toBe("batch_refresh_then_store");
const summary = (routed?.summary as Record<string, unknown>) ?? {};
expect(summary.broad_guard_applied).toBe(true);
expect(summary.minimum_evidence_failed).toBe(true);
expect(summary.anti_generic_guard_applied).toBe(true);
expect(summary.broad_result_flag).toBe(true);
expect(["partial_coverage", "clarification_required"]).toContain(String(response.body.reply_type));
expect(response.body.reply_type).not.toBe("factual_with_explanation");
});
it("returns clarification when broad query has insufficient support", async () => {
const app = await createAppWithFlags({
broad: "1",
evidenceGate: "1",
antiGeneric: "1"
});
const response = await request(app).post("/api/assistant/message").send({
useMock: true,
promptVersion: "normalizer_v2_0_2",
user_message: "Что не так по документ #123?"
});
expect(response.status).toBe(200);
const routed = firstRoutedResult(response.body);
expect(routed).toBeTruthy();
expect(routed?.route).toBe("live_mcp_drilldown");
const summary = (routed?.summary as Record<string, unknown>) ?? {};
expect(summary.broad_guard_applied).toBe(true);
expect(summary.minimum_evidence_failed).toBe(true);
expect(summary.broad_result_flag).toBe(true);
expect(summary.degraded_to).toBe("clarification");
expect(response.body.reply_type).toBe("clarification_required");
});
it("supports legacy behavior when broad guard flags are OFF", async () => {
const app = await createAppWithFlags({
broad: "0",
evidenceGate: "0",
antiGeneric: "0"
});
const response = await request(app).post("/api/assistant/message").send({
useMock: true,
promptVersion: "normalizer_v2_0_2",
user_message: "Покажи в целом общую картину и топ рисков по документам за июнь 2020."
});
expect(response.status).toBe(200);
const routed = firstRoutedResult(response.body);
expect(routed).toBeTruthy();
const summary = (routed?.summary as Record<string, unknown>) ?? {};
expect(summary.broad_guard_applied).toBeUndefined();
expect(summary.minimum_evidence_failed).toBeUndefined();
expect(summary.anti_generic_guard_applied).toBeUndefined();
});
});