import request from "supertest"; import { afterEach, describe, expect, it, vi } from "vitest"; import { composeAssistantAnswer } from "../src/services/answerComposer"; import type { UnifiedRetrievalResult } from "../src/types/assistant"; const FLAG_KEYS = [ "FEATURE_ASSISTANT_ANSWER_POLICY_V11", "FEATURE_ASSISTANT_BROAD_GUARD_V1", "FEATURE_ASSISTANT_MIN_EVIDENCE_GATE_V1", "FEATURE_ASSISTANT_ANTI_GENERIC_RANKING_GUARD_V1" ] as const; const ORIGINAL_FLAGS: Record = Object.fromEntries( FLAG_KEYS.map((key) => [key, process.env[key]]) ); function restoreFlags(): void { for (const key of FLAG_KEYS) { const original = ORIGINAL_FLAGS[key]; if (original === undefined) { delete process.env[key]; } else { process.env[key] = original; } } } async function createAppWithFlags(flags: { answerPolicy: "0" | "1"; broad: "0" | "1"; evidenceGate: "0" | "1"; antiGeneric: "0" | "1"; }) { process.env.FEATURE_ASSISTANT_ANSWER_POLICY_V11 = flags.answerPolicy; process.env.FEATURE_ASSISTANT_BROAD_GUARD_V1 = flags.broad; process.env.FEATURE_ASSISTANT_MIN_EVIDENCE_GATE_V1 = flags.evidenceGate; process.env.FEATURE_ASSISTANT_ANTI_GENERIC_RANKING_GUARD_V1 = flags.antiGeneric; vi.resetModules(); const { createApp } = await import("../src/server"); return createApp(); } function firstRoutedResult(body: Record): Record | null { const retrieval = Array.isArray((body.debug as { retrieval_results?: unknown[] } | undefined)?.retrieval_results) ? ((body.debug as { retrieval_results?: unknown[] }).retrieval_results as Record[]) : []; return retrieval.find((item) => String(item.route ?? "") !== "no_route") ?? null; } describe.sequential("assistant answer policy v1.1", () => { afterEach(() => { restoreFlags(); vi.resetModules(); }); it("keeps focused grounded answer direct and useful", async () => { const app = await createAppWithFlags({ answerPolicy: "1", broad: "1", evidenceGate: "1", antiGeneric: "1" }); const response = await request(app).post("/api/assistant/message").send({ useMock: true, promptVersion: "normalizer_v2_0_2", user_message: "Проверь счет 97 за 2020-06 по документам и выдели отклонения." }); expect(response.status).toBe(200); expect(["factual", "factual_with_explanation", "partial_coverage"]).toContain(response.body.reply_type); expect(String(response.body.assistant_reply).length).toBeGreaterThan(40); expect(String(response.body.assistant_reply)).not.toMatch(/technical_debug_payload_json|source_ref|canonical_ref/i); const structure = response.body.debug?.answer_structure_v11; if (structure) { expect(structure?.mechanism_block).toBeTruthy(); expect(["grounded", "limited", "unresolved"]).toContain(structure?.mechanism_block?.status); } const routed = firstRoutedResult(response.body); const summary = (routed?.summary as Record) ?? {}; expect(summary.minimum_evidence_failed).not.toBe(true); }, 20000); it("renders broad partial answer with explicit limitations and concrete next steps", async () => { const app = await createAppWithFlags({ answerPolicy: "1", broad: "1", evidenceGate: "1", antiGeneric: "1" }); const response = await request(app).post("/api/assistant/message").send({ useMock: true, promptVersion: "normalizer_v2_0_2", user_message: "Покажи в целом общую картину и топ рисков по документам за июнь 2020." }); expect(response.status).toBe(200); expect(["partial_coverage", "factual_with_explanation", "factual"]).toContain(response.body.reply_type); expect(String(response.body.assistant_reply)).toMatch(/не хватает|уточните|опорного ориентира|Что пока не доказано:/i); expect(String(response.body.assistant_reply)).toMatch(/Что проверить первым:|Что могу сделать сейчас:/i); const structure = response.body.debug?.answer_structure_v11; if (structure) { expect(typeof structure?.answer_summary).toBe("string"); expect(String(structure?.answer_summary).length).toBeGreaterThan(15); expect(Array.isArray(structure?.next_step_block?.recommended_actions)).toBe(true); expect(structure?.next_step_block?.recommended_actions?.length).toBeGreaterThan(0); } }, 20000); it("uses domain-specific clarification prompts when support is insufficient", async () => { const app = await createAppWithFlags({ answerPolicy: "1", broad: "1", evidenceGate: "1", antiGeneric: "1" }); const response = await request(app).post("/api/assistant/message").send({ useMock: true, promptVersion: "normalizer_v2_0_2", user_message: "Что не так по документ #123?" }); expect(response.status).toBe(200); expect(["clarification_required", "partial_coverage", "factual"]).toContain(response.body.reply_type); const structure = response.body.debug?.answer_structure_v11; const clarifications = structure?.next_step_block?.clarification_questions ?? []; expect(Array.isArray(clarifications)).toBe(true); if (clarifications.length > 0) { expect( clarifications.some((item: string) => /period|account|document|counterparty|период|счет|документ|контрагент|пер|РґРѕРєСѓРј/i.test(String(item)) ) ).toBe(true); } expect(String(response.body.assistant_reply)).toMatch( /уточните|период|счет|документ|контрагент|ориентир|Найдено документов|Собран список документов|Строк отобрано/i ); }, 20000); it("does not fabricate mechanism when mechanism_note is unresolved", () => { const retrievalResult: UnifiedRetrievalResult = { fragment_id: "F1", requirement_ids: ["R1"], route: "store_feature_risk", status: "ok", result_type: "list", items: [{ source_entity: "Document", source_id: "doc-weak-1" }], summary: { broad_query_detected: false, broad_result_flag: false, minimum_evidence_failed: false, narrowing_strength: "strong" }, evidence: [ { evidence_id: "ev-weak", claim_ref: "requirement:R1", source_type: "retrieval_item", source_ref: { schema_version: "evidence_source_ref_v1", namespace: "snapshot_2020", entity: "document", id: "doc-weak-1", period: "2020-06", canonical_ref: "evidence_source_ref_v1|snapshot_2020|document|doc-weak-1|2020-06" }, pointer: { fragment_id: "F1", route: "store_feature_risk", source: { namespace: "snapshot_2020", entity: "document", id: "doc-weak-1", period: "2020-06" }, locator: { field_path: "risk_score", item_index: 0 } }, evidence_kind: "anomaly_signal", mechanism_note: null, confidence: "low", limitation: { reason_code: "missing_mechanism", note: "Mechanism could not be resolved." }, payload: { risk_score: 1 } } ], why_included: ["synthetic-test"], selection_reason: ["synthetic-test"], risk_factors: [], business_interpretation: [], confidence: "low", limitations: ["Weak mechanism evidence."], errors: [] }; const output = composeAssistantAnswer({ userMessage: "Проверь риск по документу doc-weak-1 за 2020-06.", routeSummary: { mode: "deterministic_v2", message_in_scope: true, scope_confidence: "high", planner: { total_fragments: 1, in_scope_fragments: 1, out_of_scope_fragments: 0, discarded_fragments: 0, contains_multiple_tasks: false }, decisions: [], fallback: { type: "none", message: null } }, retrievalResults: [retrievalResult], requirements: [ { requirement_id: "R1", source_fragment_id: "F1", requirement_text: "Проверить риск документа", subject_tokens: ["документ"], status: "covered", route: "store_feature_risk" } ], coverageReport: { requirements_total: 1, requirements_covered: 1, requirements_uncovered: [], requirements_partially_covered: [], clarification_needed_for: [], out_of_scope_requirements: [] }, groundingCheck: { status: "grounded", route_subject_match: true, missing_requirements: [], reasons: [], why_included_summary: ["synthetic-test"], selection_reason_summary: ["synthetic-test"] }, enableAnswerPolicyV11: true }); expect(output.answer_structure_v11?.mechanism_block?.status).toBe("unresolved"); expect(output.answer_structure_v11?.mechanism_block?.mechanism_notes).toEqual([]); expect(output.answer_structure_v11?.mechanism_block?.limitation_reason_codes).toContain("missing_mechanism"); expect(output.assistant_reply).toMatch(/Ограничения:|Что пока не доказано:/); expect(output.assistant_reply).not.toMatch(/mechanism_note|source_ref|canonical_ref|route|profile/i); }); it("preserves legacy reply path when policy flag is OFF", () => { const retrievalResult: UnifiedRetrievalResult = { fragment_id: "F1", requirement_ids: ["R1"], route: "store_feature_risk", status: "ok", result_type: "list", items: [{ source_entity: "Document", source_id: "doc-weak-1" }], summary: { broad_query_detected: false, broad_result_flag: false, minimum_evidence_failed: false, narrowing_strength: "strong" }, evidence: [ { evidence_id: "ev-weak", claim_ref: "requirement:R1", source_type: "retrieval_item", source_ref: { schema_version: "evidence_source_ref_v1", namespace: "snapshot_2020", entity: "document", id: "doc-weak-1", period: "2020-06", canonical_ref: "evidence_source_ref_v1|snapshot_2020|document|doc-weak-1|2020-06" }, pointer: { fragment_id: "F1", route: "store_feature_risk", source: { namespace: "snapshot_2020", entity: "document", id: "doc-weak-1", period: "2020-06" }, locator: { field_path: "risk_score", item_index: 0 } }, evidence_kind: "anomaly_signal", mechanism_note: null, confidence: "low", limitation: { reason_code: "missing_mechanism", note: "Mechanism could not be resolved." }, payload: { risk_score: 1 } } ], why_included: ["synthetic-test"], selection_reason: ["synthetic-test"], risk_factors: [], business_interpretation: [], confidence: "low", limitations: ["Weak mechanism evidence."], errors: [] }; const baseInput = { userMessage: "Проверь риск по документу doc-weak-1 за 2020-06.", routeSummary: { mode: "deterministic_v2" as const, message_in_scope: true, scope_confidence: "high" as const, planner: { total_fragments: 1, in_scope_fragments: 1, out_of_scope_fragments: 0, discarded_fragments: 0, contains_multiple_tasks: false }, decisions: [], fallback: { type: "none" as const, message: null } }, retrievalResults: [retrievalResult], requirements: [ { requirement_id: "R1", source_fragment_id: "F1", requirement_text: "Проверить риск документа", subject_tokens: ["документ"], status: "covered" as const, route: "store_feature_risk" } ], coverageReport: { requirements_total: 1, requirements_covered: 1, requirements_uncovered: [], requirements_partially_covered: [], clarification_needed_for: [], out_of_scope_requirements: [] }, groundingCheck: { status: "grounded" as const, route_subject_match: true, missing_requirements: [], reasons: [], why_included_summary: ["synthetic-test"], selection_reason_summary: ["synthetic-test"] } }; const legacy = composeAssistantAnswer({ ...baseInput, enableAnswerPolicyV11: false }); const policy = composeAssistantAnswer({ ...baseInput, enableAnswerPolicyV11: true }); expect(legacy.answer_structure_v11).toBeUndefined(); expect(policy.answer_structure_v11).toBeTruthy(); expect(String(policy.assistant_reply).length).toBeGreaterThan(40); expect(String(policy.assistant_reply)).not.toBe(String(legacy.assistant_reply)); }); });