383 lines
13 KiB
TypeScript
383 lines
13 KiB
TypeScript
import request from "supertest";
|
|
import { afterEach, describe, expect, it, vi } from "vitest";
|
|
import { composeAssistantAnswer } from "../src/services/answerComposer";
|
|
import type { UnifiedRetrievalResult } from "../src/types/assistant";
|
|
|
|
const FLAG_KEYS = [
|
|
"FEATURE_ASSISTANT_ANSWER_POLICY_V11",
|
|
"FEATURE_ASSISTANT_BROAD_GUARD_V1",
|
|
"FEATURE_ASSISTANT_MIN_EVIDENCE_GATE_V1",
|
|
"FEATURE_ASSISTANT_ANTI_GENERIC_RANKING_GUARD_V1"
|
|
] as const;
|
|
|
|
const ORIGINAL_FLAGS: Record<string, string | undefined> = Object.fromEntries(
|
|
FLAG_KEYS.map((key) => [key, process.env[key]])
|
|
);
|
|
|
|
function restoreFlags(): void {
|
|
for (const key of FLAG_KEYS) {
|
|
const original = ORIGINAL_FLAGS[key];
|
|
if (original === undefined) {
|
|
delete process.env[key];
|
|
} else {
|
|
process.env[key] = original;
|
|
}
|
|
}
|
|
}
|
|
|
|
async function createAppWithFlags(flags: {
|
|
answerPolicy: "0" | "1";
|
|
broad: "0" | "1";
|
|
evidenceGate: "0" | "1";
|
|
antiGeneric: "0" | "1";
|
|
}) {
|
|
process.env.FEATURE_ASSISTANT_ANSWER_POLICY_V11 = flags.answerPolicy;
|
|
process.env.FEATURE_ASSISTANT_BROAD_GUARD_V1 = flags.broad;
|
|
process.env.FEATURE_ASSISTANT_MIN_EVIDENCE_GATE_V1 = flags.evidenceGate;
|
|
process.env.FEATURE_ASSISTANT_ANTI_GENERIC_RANKING_GUARD_V1 = flags.antiGeneric;
|
|
vi.resetModules();
|
|
const { createApp } = await import("../src/server");
|
|
return createApp();
|
|
}
|
|
|
|
function firstRoutedResult(body: Record<string, unknown>): Record<string, unknown> | null {
|
|
const retrieval = Array.isArray((body.debug as { retrieval_results?: unknown[] } | undefined)?.retrieval_results)
|
|
? ((body.debug as { retrieval_results?: unknown[] }).retrieval_results as Record<string, unknown>[])
|
|
: [];
|
|
return retrieval.find((item) => String(item.route ?? "") !== "no_route") ?? null;
|
|
}
|
|
|
|
describe.sequential("assistant answer policy v1.1", () => {
|
|
afterEach(() => {
|
|
restoreFlags();
|
|
vi.resetModules();
|
|
});
|
|
|
|
it("keeps focused grounded answer direct and useful", async () => {
|
|
const app = await createAppWithFlags({
|
|
answerPolicy: "1",
|
|
broad: "1",
|
|
evidenceGate: "1",
|
|
antiGeneric: "1"
|
|
});
|
|
|
|
const response = await request(app).post("/api/assistant/message").send({
|
|
useMock: true,
|
|
promptVersion: "normalizer_v2_0_2",
|
|
user_message: "Проверь счет 97 за 2020-06 по документам и выдели отклонения."
|
|
});
|
|
|
|
expect(response.status).toBe(200);
|
|
expect(["factual", "factual_with_explanation", "partial_coverage"]).toContain(response.body.reply_type);
|
|
expect(String(response.body.assistant_reply).length).toBeGreaterThan(40);
|
|
expect(String(response.body.assistant_reply)).not.toMatch(/technical_debug_payload_json|source_ref|canonical_ref/i);
|
|
|
|
const structure = response.body.debug?.answer_structure_v11;
|
|
if (structure) {
|
|
expect(structure?.mechanism_block).toBeTruthy();
|
|
expect(["grounded", "limited", "unresolved"]).toContain(structure?.mechanism_block?.status);
|
|
}
|
|
|
|
const routed = firstRoutedResult(response.body);
|
|
const summary = (routed?.summary as Record<string, unknown>) ?? {};
|
|
expect(summary.minimum_evidence_failed).not.toBe(true);
|
|
}, 20000);
|
|
|
|
it("renders broad partial answer with explicit limitations and concrete next steps", async () => {
|
|
const app = await createAppWithFlags({
|
|
answerPolicy: "1",
|
|
broad: "1",
|
|
evidenceGate: "1",
|
|
antiGeneric: "1"
|
|
});
|
|
|
|
const response = await request(app).post("/api/assistant/message").send({
|
|
useMock: true,
|
|
promptVersion: "normalizer_v2_0_2",
|
|
user_message: "Покажи в целом общую картину и топ рисков по документам за июнь 2020."
|
|
});
|
|
|
|
expect(response.status).toBe(200);
|
|
expect(["partial_coverage", "factual_with_explanation", "factual"]).toContain(response.body.reply_type);
|
|
expect(String(response.body.assistant_reply)).toMatch(/не хватает|уточните|опорного ориентира|Что пока не доказано:/i);
|
|
expect(String(response.body.assistant_reply)).toMatch(/Что проверить первым:|Что могу сделать сейчас:/i);
|
|
|
|
const structure = response.body.debug?.answer_structure_v11;
|
|
if (structure) {
|
|
expect(typeof structure?.answer_summary).toBe("string");
|
|
expect(String(structure?.answer_summary).length).toBeGreaterThan(15);
|
|
expect(Array.isArray(structure?.next_step_block?.recommended_actions)).toBe(true);
|
|
expect(structure?.next_step_block?.recommended_actions?.length).toBeGreaterThan(0);
|
|
}
|
|
}, 20000);
|
|
|
|
it("uses domain-specific clarification prompts when support is insufficient", async () => {
|
|
const app = await createAppWithFlags({
|
|
answerPolicy: "1",
|
|
broad: "1",
|
|
evidenceGate: "1",
|
|
antiGeneric: "1"
|
|
});
|
|
|
|
const response = await request(app).post("/api/assistant/message").send({
|
|
useMock: true,
|
|
promptVersion: "normalizer_v2_0_2",
|
|
user_message: "Что не так по документ #123?"
|
|
});
|
|
|
|
expect(response.status).toBe(200);
|
|
expect(["clarification_required", "partial_coverage", "factual"]).toContain(response.body.reply_type);
|
|
|
|
const structure = response.body.debug?.answer_structure_v11;
|
|
const clarifications = structure?.next_step_block?.clarification_questions ?? [];
|
|
expect(Array.isArray(clarifications)).toBe(true);
|
|
if (clarifications.length > 0) {
|
|
expect(
|
|
clarifications.some((item: string) =>
|
|
/period|account|document|counterparty|период|счет|документ|контрагент|пер|РґРѕРєСѓРј/i.test(String(item))
|
|
)
|
|
).toBe(true);
|
|
}
|
|
expect(String(response.body.assistant_reply)).toMatch(
|
|
/уточните|период|счет|документ|контрагент|ориентир|Найдено документов|Собран список документов|Строк отобрано/i
|
|
);
|
|
}, 20000);
|
|
|
|
it("does not fabricate mechanism when mechanism_note is unresolved", () => {
|
|
const retrievalResult: UnifiedRetrievalResult = {
|
|
fragment_id: "F1",
|
|
requirement_ids: ["R1"],
|
|
route: "store_feature_risk",
|
|
status: "ok",
|
|
result_type: "list",
|
|
items: [{ source_entity: "Document", source_id: "doc-weak-1" }],
|
|
summary: {
|
|
broad_query_detected: false,
|
|
broad_result_flag: false,
|
|
minimum_evidence_failed: false,
|
|
narrowing_strength: "strong"
|
|
},
|
|
evidence: [
|
|
{
|
|
evidence_id: "ev-weak",
|
|
claim_ref: "requirement:R1",
|
|
source_type: "retrieval_item",
|
|
source_ref: {
|
|
schema_version: "evidence_source_ref_v1",
|
|
namespace: "snapshot_2020",
|
|
entity: "document",
|
|
id: "doc-weak-1",
|
|
period: "2020-06",
|
|
canonical_ref: "evidence_source_ref_v1|snapshot_2020|document|doc-weak-1|2020-06"
|
|
},
|
|
pointer: {
|
|
fragment_id: "F1",
|
|
route: "store_feature_risk",
|
|
source: {
|
|
namespace: "snapshot_2020",
|
|
entity: "document",
|
|
id: "doc-weak-1",
|
|
period: "2020-06"
|
|
},
|
|
locator: {
|
|
field_path: "risk_score",
|
|
item_index: 0
|
|
}
|
|
},
|
|
evidence_kind: "anomaly_signal",
|
|
mechanism_note: null,
|
|
confidence: "low",
|
|
limitation: {
|
|
reason_code: "missing_mechanism",
|
|
note: "Mechanism could not be resolved."
|
|
},
|
|
payload: {
|
|
risk_score: 1
|
|
}
|
|
}
|
|
],
|
|
why_included: ["synthetic-test"],
|
|
selection_reason: ["synthetic-test"],
|
|
risk_factors: [],
|
|
business_interpretation: [],
|
|
confidence: "low",
|
|
limitations: ["Weak mechanism evidence."],
|
|
errors: []
|
|
};
|
|
|
|
const output = composeAssistantAnswer({
|
|
userMessage: "Проверь риск по документу doc-weak-1 за 2020-06.",
|
|
routeSummary: {
|
|
mode: "deterministic_v2",
|
|
message_in_scope: true,
|
|
scope_confidence: "high",
|
|
planner: {
|
|
total_fragments: 1,
|
|
in_scope_fragments: 1,
|
|
out_of_scope_fragments: 0,
|
|
discarded_fragments: 0,
|
|
contains_multiple_tasks: false
|
|
},
|
|
decisions: [],
|
|
fallback: {
|
|
type: "none",
|
|
message: null
|
|
}
|
|
},
|
|
retrievalResults: [retrievalResult],
|
|
requirements: [
|
|
{
|
|
requirement_id: "R1",
|
|
source_fragment_id: "F1",
|
|
requirement_text: "Проверить риск документа",
|
|
subject_tokens: ["документ"],
|
|
status: "covered",
|
|
route: "store_feature_risk"
|
|
}
|
|
],
|
|
coverageReport: {
|
|
requirements_total: 1,
|
|
requirements_covered: 1,
|
|
requirements_uncovered: [],
|
|
requirements_partially_covered: [],
|
|
clarification_needed_for: [],
|
|
out_of_scope_requirements: []
|
|
},
|
|
groundingCheck: {
|
|
status: "grounded",
|
|
route_subject_match: true,
|
|
missing_requirements: [],
|
|
reasons: [],
|
|
why_included_summary: ["synthetic-test"],
|
|
selection_reason_summary: ["synthetic-test"]
|
|
},
|
|
enableAnswerPolicyV11: true
|
|
});
|
|
|
|
expect(output.answer_structure_v11?.mechanism_block?.status).toBe("unresolved");
|
|
expect(output.answer_structure_v11?.mechanism_block?.mechanism_notes).toEqual([]);
|
|
expect(output.answer_structure_v11?.mechanism_block?.limitation_reason_codes).toContain("missing_mechanism");
|
|
expect(output.assistant_reply).toMatch(/Ограничения:|Что пока не доказано:/);
|
|
expect(output.assistant_reply).not.toMatch(/mechanism_note|source_ref|canonical_ref|route|profile/i);
|
|
});
|
|
|
|
it("preserves legacy reply path when policy flag is OFF", () => {
|
|
const retrievalResult: UnifiedRetrievalResult = {
|
|
fragment_id: "F1",
|
|
requirement_ids: ["R1"],
|
|
route: "store_feature_risk",
|
|
status: "ok",
|
|
result_type: "list",
|
|
items: [{ source_entity: "Document", source_id: "doc-weak-1" }],
|
|
summary: {
|
|
broad_query_detected: false,
|
|
broad_result_flag: false,
|
|
minimum_evidence_failed: false,
|
|
narrowing_strength: "strong"
|
|
},
|
|
evidence: [
|
|
{
|
|
evidence_id: "ev-weak",
|
|
claim_ref: "requirement:R1",
|
|
source_type: "retrieval_item",
|
|
source_ref: {
|
|
schema_version: "evidence_source_ref_v1",
|
|
namespace: "snapshot_2020",
|
|
entity: "document",
|
|
id: "doc-weak-1",
|
|
period: "2020-06",
|
|
canonical_ref: "evidence_source_ref_v1|snapshot_2020|document|doc-weak-1|2020-06"
|
|
},
|
|
pointer: {
|
|
fragment_id: "F1",
|
|
route: "store_feature_risk",
|
|
source: {
|
|
namespace: "snapshot_2020",
|
|
entity: "document",
|
|
id: "doc-weak-1",
|
|
period: "2020-06"
|
|
},
|
|
locator: {
|
|
field_path: "risk_score",
|
|
item_index: 0
|
|
}
|
|
},
|
|
evidence_kind: "anomaly_signal",
|
|
mechanism_note: null,
|
|
confidence: "low",
|
|
limitation: {
|
|
reason_code: "missing_mechanism",
|
|
note: "Mechanism could not be resolved."
|
|
},
|
|
payload: {
|
|
risk_score: 1
|
|
}
|
|
}
|
|
],
|
|
why_included: ["synthetic-test"],
|
|
selection_reason: ["synthetic-test"],
|
|
risk_factors: [],
|
|
business_interpretation: [],
|
|
confidence: "low",
|
|
limitations: ["Weak mechanism evidence."],
|
|
errors: []
|
|
};
|
|
|
|
const baseInput = {
|
|
userMessage: "Проверь риск по документу doc-weak-1 за 2020-06.",
|
|
routeSummary: {
|
|
mode: "deterministic_v2" as const,
|
|
message_in_scope: true,
|
|
scope_confidence: "high" as const,
|
|
planner: {
|
|
total_fragments: 1,
|
|
in_scope_fragments: 1,
|
|
out_of_scope_fragments: 0,
|
|
discarded_fragments: 0,
|
|
contains_multiple_tasks: false
|
|
},
|
|
decisions: [],
|
|
fallback: {
|
|
type: "none" as const,
|
|
message: null
|
|
}
|
|
},
|
|
retrievalResults: [retrievalResult],
|
|
requirements: [
|
|
{
|
|
requirement_id: "R1",
|
|
source_fragment_id: "F1",
|
|
requirement_text: "Проверить риск документа",
|
|
subject_tokens: ["документ"],
|
|
status: "covered" as const,
|
|
route: "store_feature_risk"
|
|
}
|
|
],
|
|
coverageReport: {
|
|
requirements_total: 1,
|
|
requirements_covered: 1,
|
|
requirements_uncovered: [],
|
|
requirements_partially_covered: [],
|
|
clarification_needed_for: [],
|
|
out_of_scope_requirements: []
|
|
},
|
|
groundingCheck: {
|
|
status: "grounded" as const,
|
|
route_subject_match: true,
|
|
missing_requirements: [],
|
|
reasons: [],
|
|
why_included_summary: ["synthetic-test"],
|
|
selection_reason_summary: ["synthetic-test"]
|
|
}
|
|
};
|
|
|
|
const legacy = composeAssistantAnswer({ ...baseInput, enableAnswerPolicyV11: false });
|
|
const policy = composeAssistantAnswer({ ...baseInput, enableAnswerPolicyV11: true });
|
|
|
|
expect(legacy.answer_structure_v11).toBeUndefined();
|
|
expect(policy.answer_structure_v11).toBeTruthy();
|
|
expect(String(policy.assistant_reply).length).toBeGreaterThan(40);
|
|
expect(String(policy.assistant_reply)).not.toBe(String(legacy.assistant_reply));
|
|
});
|
|
});
|