NODEDC_1C/llm_normalizer/backend/tests/assistantProblemUnitRuntime...

150 lines
6.0 KiB
TypeScript

import request from "supertest";
import { afterEach, describe, expect, it, vi } from "vitest";
const FLAG_KEYS = [
"FEATURE_ASSISTANT_PROBLEM_UNITS_V1",
"FEATURE_ASSISTANT_ANSWER_POLICY_V11",
"FEATURE_ASSISTANT_BROAD_GUARD_V1",
"FEATURE_ASSISTANT_MIN_EVIDENCE_GATE_V1",
"FEATURE_ASSISTANT_ANTI_GENERIC_RANKING_GUARD_V1"
] as const;
const ORIGINAL_FLAGS: Record<string, string | undefined> = Object.fromEntries(
FLAG_KEYS.map((key) => [key, process.env[key]])
);
function restoreFlags(): void {
for (const key of FLAG_KEYS) {
const original = ORIGINAL_FLAGS[key];
if (original === undefined) {
delete process.env[key];
} else {
process.env[key] = original;
}
}
}
async function createAppWithProblemUnitsFlag(flagValue: "0" | "1") {
process.env.FEATURE_ASSISTANT_PROBLEM_UNITS_V1 = flagValue;
process.env.FEATURE_ASSISTANT_ANSWER_POLICY_V11 = "1";
process.env.FEATURE_ASSISTANT_BROAD_GUARD_V1 = "1";
process.env.FEATURE_ASSISTANT_MIN_EVIDENCE_GATE_V1 = "1";
process.env.FEATURE_ASSISTANT_ANTI_GENERIC_RANKING_GUARD_V1 = "1";
vi.resetModules();
const { createApp } = await import("../src/server");
return createApp();
}
function routedRetrievalResults(body: Record<string, unknown>): Record<string, unknown>[] {
const results = Array.isArray((body.debug as { retrieval_results?: unknown[] } | undefined)?.retrieval_results)
? ((body.debug as { retrieval_results?: unknown[] }).retrieval_results as Record<string, unknown>[])
: [];
return results.filter((item) => String(item.route ?? "") !== "no_route");
}
describe.sequential("assistant problem-unit runtime rollout", () => {
afterEach(() => {
restoreFlags();
vi.resetModules();
});
it("emits problem-unit layer on problem-heavy scenarios when flag is ON", async () => {
const app = await createAppWithProblemUnitsFlag("1");
const cases = [
{
tag: "chain",
user_message: "Разложи цепочку документов и оплат по контрагентам за 2020-06, где разрыв механизма закрытия."
},
{
tag: "anomaly",
user_message: "Разложи lifecycle по счету 97 за 2020-06 и покажи аномалии списания по последовательности."
},
{
tag: "contradiction",
user_message: "Проверь НДС за 2020-06: где противоречия между документами, проводками и регистрами."
},
{
tag: "period_risk",
user_message: "Разложи по счетам 51 и 60 за 2020-06, что создаёт риск закрытия периода и где разрывы цепочки."
}
];
const observedTypes = new Set<string>();
let scenariosWithProblemUnits = 0;
for (const scenario of cases) {
const response = await request(app).post("/api/assistant/message").send({
useMock: true,
promptVersion: "normalizer_v2_0_2",
user_message: scenario.user_message
});
expect(response.status).toBe(200);
const routed = routedRetrievalResults(response.body as Record<string, unknown>);
expect(routed.length).toBeGreaterThan(0);
const withProblemUnits = routed.filter((item) => Array.isArray(item.problem_units) && item.problem_units.length > 0);
if (withProblemUnits.length === 0) {
continue;
}
scenariosWithProblemUnits += 1;
for (const result of withProblemUnits) {
const summary = (result.summary as Record<string, unknown>) ?? {};
const candidateEvidence = result.candidate_evidence as Array<Record<string, unknown>>;
const problemUnits = result.problem_units as Array<Record<string, unknown>>;
const problemSummary = (result.problem_unit_summary as Record<string, unknown>) ?? {};
expect(summary.problem_units_enabled).toBe(true);
expect(summary.candidate_evidence_count).toBe(candidateEvidence.length);
expect(summary.problem_units_count).toBe(problemUnits.length);
expect(Array.isArray(summary.problem_unit_types)).toBe(true);
expect(typeof summary.problem_unit_duplicate_collapses).toBe("number");
expect(problemSummary.units_total).toBe(problemUnits.length);
for (const unit of problemUnits) {
expect(typeof unit.problem_unit_id).toBe("string");
expect(typeof unit.problem_unit_type).toBe("string");
expect(typeof unit.mechanism_summary).toBe("string");
expect(typeof unit.severity?.score).toBe("number");
expect(typeof unit.confidence?.score).toBe("number");
observedTypes.add(String(unit.problem_unit_type));
}
}
}
expect(scenariosWithProblemUnits).toBeGreaterThan(0);
expect(observedTypes.size).toBeGreaterThan(0);
expect(Array.from(observedTypes).every((item) =>
[
"document_conflict",
"broken_chain_segment",
"lifecycle_anomaly_node",
"unresolved_settlement_cluster",
"period_risk_cluster",
"cross_branch_inconsistency_cluster"
].includes(item)
)).toBe(true);
});
it("does not emit problem-unit layer when flag is OFF", async () => {
const app = await createAppWithProblemUnitsFlag("0");
const response = await request(app).post("/api/assistant/message").send({
useMock: true,
promptVersion: "normalizer_v2_0_2",
user_message: "Разложи цепочку документов и оплат по контрагентам за 2020-06."
});
expect(response.status).toBe(200);
const routed = routedRetrievalResults(response.body as Record<string, unknown>);
expect(routed.length).toBeGreaterThan(0);
for (const result of routed) {
expect(result.raw_entities).toBeUndefined();
expect(result.candidate_evidence).toBeUndefined();
expect(result.problem_units).toBeUndefined();
expect(result.problem_unit_summary).toBeUndefined();
expect((result.summary as Record<string, unknown>).problem_units_enabled).toBeUndefined();
}
});
});