150 lines
5.4 KiB
TypeScript
150 lines
5.4 KiB
TypeScript
import request from "supertest";
|
|
import { afterEach, describe, expect, it, vi } from "vitest";
|
|
|
|
const FLAG_KEYS = [
|
|
"FEATURE_ASSISTANT_BROAD_GUARD_V1",
|
|
"FEATURE_ASSISTANT_MIN_EVIDENCE_GATE_V1",
|
|
"FEATURE_ASSISTANT_ANTI_GENERIC_RANKING_GUARD_V1"
|
|
] as const;
|
|
|
|
const ORIGINAL_FLAGS: Record<string, string | undefined> = Object.fromEntries(
|
|
FLAG_KEYS.map((key) => [key, process.env[key]])
|
|
);
|
|
|
|
function restoreFlags(): void {
|
|
for (const key of FLAG_KEYS) {
|
|
const original = ORIGINAL_FLAGS[key];
|
|
if (original === undefined) {
|
|
delete process.env[key];
|
|
} else {
|
|
process.env[key] = original;
|
|
}
|
|
}
|
|
}
|
|
|
|
async function createAppWithFlags(flags: {
|
|
broad: "0" | "1";
|
|
evidenceGate: "0" | "1";
|
|
antiGeneric: "0" | "1";
|
|
}) {
|
|
process.env.FEATURE_ASSISTANT_BROAD_GUARD_V1 = flags.broad;
|
|
process.env.FEATURE_ASSISTANT_MIN_EVIDENCE_GATE_V1 = flags.evidenceGate;
|
|
process.env.FEATURE_ASSISTANT_ANTI_GENERIC_RANKING_GUARD_V1 = flags.antiGeneric;
|
|
vi.resetModules();
|
|
const { createApp } = await import("../src/server");
|
|
return createApp();
|
|
}
|
|
|
|
function firstRoutedResult(body: Record<string, unknown>): Record<string, unknown> | null {
|
|
const retrieval = Array.isArray((body.debug as { retrieval_results?: unknown[] } | undefined)?.retrieval_results)
|
|
? ((body.debug as { retrieval_results?: unknown[] }).retrieval_results as Record<string, unknown>[])
|
|
: [];
|
|
return retrieval.find((item) => String(item.route ?? "") !== "no_route") ?? null;
|
|
}
|
|
|
|
describe.sequential("assistant broad guard", () => {
|
|
afterEach(() => {
|
|
restoreFlags();
|
|
vi.resetModules();
|
|
});
|
|
|
|
it("keeps focused queries from degrading under broad guard", async () => {
|
|
const app = await createAppWithFlags({
|
|
broad: "1",
|
|
evidenceGate: "1",
|
|
antiGeneric: "1"
|
|
});
|
|
|
|
const response = await request(app).post("/api/assistant/message").send({
|
|
useMock: true,
|
|
promptVersion: "normalizer_v2_0_2",
|
|
user_message: "Проверь НДС по счету 19 за 2020-06 и рискованные записи по документам."
|
|
});
|
|
|
|
expect(response.status).toBe(200);
|
|
const routed = firstRoutedResult(response.body);
|
|
expect(routed).toBeTruthy();
|
|
|
|
const summary = (routed?.summary as Record<string, unknown>) ?? {};
|
|
expect(summary.broad_guard_applied).toBe(false);
|
|
expect(summary.minimum_evidence_failed).toBe(false);
|
|
expect(response.body.reply_type).not.toBe("clarification_required");
|
|
});
|
|
|
|
it("degrades broad ranking output to partial instead of deceptively strong factual", async () => {
|
|
const app = await createAppWithFlags({
|
|
broad: "1",
|
|
evidenceGate: "1",
|
|
antiGeneric: "1"
|
|
});
|
|
|
|
const response = await request(app).post("/api/assistant/message").send({
|
|
useMock: true,
|
|
promptVersion: "normalizer_v2_0_2",
|
|
user_message: "Покажи в целом общую картину и топ рисков по документам за июнь 2020."
|
|
});
|
|
|
|
expect(response.status).toBe(200);
|
|
const routed = firstRoutedResult(response.body);
|
|
expect(routed).toBeTruthy();
|
|
expect(routed?.route).toBe("batch_refresh_then_store");
|
|
|
|
const summary = (routed?.summary as Record<string, unknown>) ?? {};
|
|
expect(summary.broad_guard_applied).toBe(true);
|
|
expect(summary.minimum_evidence_failed).toBe(true);
|
|
expect(summary.anti_generic_guard_applied).toBe(true);
|
|
expect(summary.broad_result_flag).toBe(true);
|
|
expect(["partial_coverage", "clarification_required"]).toContain(String(response.body.reply_type));
|
|
expect(response.body.reply_type).not.toBe("factual_with_explanation");
|
|
});
|
|
|
|
it("returns clarification when broad query has insufficient support", async () => {
|
|
const app = await createAppWithFlags({
|
|
broad: "1",
|
|
evidenceGate: "1",
|
|
antiGeneric: "1"
|
|
});
|
|
|
|
const response = await request(app).post("/api/assistant/message").send({
|
|
useMock: true,
|
|
promptVersion: "normalizer_v2_0_2",
|
|
user_message: "Что не так по документ #123?"
|
|
});
|
|
|
|
expect(response.status).toBe(200);
|
|
const routed = firstRoutedResult(response.body);
|
|
expect(routed).toBeTruthy();
|
|
expect(routed?.route).toBe("live_mcp_drilldown");
|
|
|
|
const summary = (routed?.summary as Record<string, unknown>) ?? {};
|
|
expect(summary.broad_guard_applied).toBe(true);
|
|
expect(summary.minimum_evidence_failed).toBe(true);
|
|
expect(summary.broad_result_flag).toBe(true);
|
|
expect(summary.degraded_to).toBe("clarification");
|
|
expect(response.body.reply_type).toBe("clarification_required");
|
|
});
|
|
|
|
it("supports legacy behavior when broad guard flags are OFF", async () => {
|
|
const app = await createAppWithFlags({
|
|
broad: "0",
|
|
evidenceGate: "0",
|
|
antiGeneric: "0"
|
|
});
|
|
|
|
const response = await request(app).post("/api/assistant/message").send({
|
|
useMock: true,
|
|
promptVersion: "normalizer_v2_0_2",
|
|
user_message: "Покажи в целом общую картину и топ рисков по документам за июнь 2020."
|
|
});
|
|
|
|
expect(response.status).toBe(200);
|
|
const routed = firstRoutedResult(response.body);
|
|
expect(routed).toBeTruthy();
|
|
|
|
const summary = (routed?.summary as Record<string, unknown>) ?? {};
|
|
expect(summary.broad_guard_applied).toBeUndefined();
|
|
expect(summary.minimum_evidence_failed).toBeUndefined();
|
|
expect(summary.anti_generic_guard_applied).toBeUndefined();
|
|
});
|
|
});
|