NODEDC_1C/llm_normalizer/backend/tests/assistantAnswerPolicyV11.te...

import request from "supertest";
import { afterEach, describe, expect, it, vi } from "vitest";
import { composeAssistantAnswer } from "../src/services/answerComposer";
import type { UnifiedRetrievalResult } from "../src/types/assistant";

const FLAG_KEYS = [
  "FEATURE_ASSISTANT_ANSWER_POLICY_V11",
  "FEATURE_ASSISTANT_BROAD_GUARD_V1",
  "FEATURE_ASSISTANT_MIN_EVIDENCE_GATE_V1",
  "FEATURE_ASSISTANT_ANTI_GENERIC_RANKING_GUARD_V1"
] as const;

const ORIGINAL_FLAGS: Record<string, string | undefined> = Object.fromEntries(
  FLAG_KEYS.map((key) => [key, process.env[key]])
);

function restoreFlags(): void {
  for (const key of FLAG_KEYS) {
    const original = ORIGINAL_FLAGS[key];
    if (original === undefined) {
      delete process.env[key];
    } else {
      process.env[key] = original;
    }
  }
}

async function createAppWithFlags(flags: {
  answerPolicy: "0" | "1";
  broad: "0" | "1";
  evidenceGate: "0" | "1";
  antiGeneric: "0" | "1";
}) {
  process.env.FEATURE_ASSISTANT_ANSWER_POLICY_V11 = flags.answerPolicy;
  process.env.FEATURE_ASSISTANT_BROAD_GUARD_V1 = flags.broad;
  process.env.FEATURE_ASSISTANT_MIN_EVIDENCE_GATE_V1 = flags.evidenceGate;
  process.env.FEATURE_ASSISTANT_ANTI_GENERIC_RANKING_GUARD_V1 = flags.antiGeneric;
  vi.resetModules();
  const { createApp } = await import("../src/server");
  return createApp();
}

function firstRoutedResult(body: Record<string, unknown>): Record<string, unknown> | null {
  const retrieval = Array.isArray((body.debug as { retrieval_results?: unknown[] } | undefined)?.retrieval_results)
    ? ((body.debug as { retrieval_results?: unknown[] }).retrieval_results as Record<string, unknown>[])
    : [];
  return retrieval.find((item) => String(item.route ?? "") !== "no_route") ?? null;
}

describe.sequential("assistant answer policy v1.1", () => {
  afterEach(() => {
    restoreFlags();
    vi.resetModules();
  });

  it("keeps focused grounded answer direct and useful", async () => {
    const app = await createAppWithFlags({
      answerPolicy: "1",
      broad: "1",
      evidenceGate: "1",
      antiGeneric: "1"
    });

    const response = await request(app).post("/api/assistant/message").send({
      useMock: true,
      promptVersion: "normalizer_v2_0_2",
      user_message: "Проверь счет 97 за 2020-06 по документам и выдели отклонения."
    });

    expect(response.status).toBe(200);
    expect(["factual", "factual_with_explanation", "partial_coverage"]).toContain(response.body.reply_type);
    expect(String(response.body.assistant_reply).length).toBeGreaterThan(40);
    expect(String(response.body.assistant_reply)).not.toMatch(/technical_debug_payload_json|source_ref|canonical_ref/i);

    const structure = response.body.debug?.answer_structure_v11;
    if (structure) {
      expect(structure?.mechanism_block).toBeTruthy();
      expect(["grounded", "limited", "unresolved"]).toContain(structure?.mechanism_block?.status);
    }

    const routed = firstRoutedResult(response.body);
    const summary = (routed?.summary as Record<string, unknown>) ?? {};
    expect(summary.minimum_evidence_failed).not.toBe(true);
  }, 20000);

  it("renders broad partial answer with explicit limitations and concrete next steps", async () => {
    const app = await createAppWithFlags({
      answerPolicy: "1",
      broad: "1",
      evidenceGate: "1",
      antiGeneric: "1"
    });

    const response = await request(app).post("/api/assistant/message").send({
      useMock: true,
      promptVersion: "normalizer_v2_0_2",
      user_message: "Покажи в целом общую картину и топ рисков по документам за июнь 2020."
    });

    expect(response.status).toBe(200);
    expect(["partial_coverage", "factual_with_explanation", "factual"]).toContain(response.body.reply_type);
    expect(String(response.body.assistant_reply)).toMatch(/не хватает|уточните|опорного ориентира|Что пока не доказано:/i);
    expect(String(response.body.assistant_reply)).toMatch(/Что проверить первым:|Что могу сделать сейчас:/i);

    const structure = response.body.debug?.answer_structure_v11;
    if (structure) {
      expect(typeof structure?.answer_summary).toBe("string");
      expect(String(structure?.answer_summary).length).toBeGreaterThan(15);
      expect(Array.isArray(structure?.next_step_block?.recommended_actions)).toBe(true);
      expect(structure?.next_step_block?.recommended_actions?.length).toBeGreaterThan(0);
    }
  }, 20000);

  it("uses domain-specific clarification prompts when support is insufficient", async () => {
    const app = await createAppWithFlags({
      answerPolicy: "1",
      broad: "1",
      evidenceGate: "1",
      antiGeneric: "1"
    });

    const response = await request(app).post("/api/assistant/message").send({
      useMock: true,
      promptVersion: "normalizer_v2_0_2",
      user_message: "Что не так по документ #123?"
    });

    expect(response.status).toBe(200);
    expect(["clarification_required", "partial_coverage", "factual"]).toContain(response.body.reply_type);

    const structure = response.body.debug?.answer_structure_v11;
    const clarifications = structure?.next_step_block?.clarification_questions ?? [];
    expect(Array.isArray(clarifications)).toBe(true);
    if (clarifications.length > 0) {
      expect(
        clarifications.some((item: string) =>
          /period|account|document|counterparty|период|счет|документ|контрагент|РїРµСЂ|РґРѕРєСѓРј/i.test(String(item))
        )
      ).toBe(true);
    }
    expect(String(response.body.assistant_reply)).toMatch(
      /уточните|период|счет|документ|контрагент|ориентир|Найдено документов|Собран список документов|Строк отобрано/i
    );
  }, 20000);

  it("does not fabricate mechanism when mechanism_note is unresolved", () => {
    const retrievalResult: UnifiedRetrievalResult = {
      fragment_id: "F1",
      requirement_ids: ["R1"],
      route: "store_feature_risk",
      status: "ok",
      result_type: "list",
      items: [{ source_entity: "Document", source_id: "doc-weak-1" }],
      summary: {
        broad_query_detected: false,
        broad_result_flag: false,
        minimum_evidence_failed: false,
        narrowing_strength: "strong"
      },
      evidence: [
        {
          evidence_id: "ev-weak",
          claim_ref: "requirement:R1",
          source_type: "retrieval_item",
          source_ref: {
            schema_version: "evidence_source_ref_v1",
            namespace: "snapshot_2020",
            entity: "document",
            id: "doc-weak-1",
            period: "2020-06",
            canonical_ref: "evidence_source_ref_v1|snapshot_2020|document|doc-weak-1|2020-06"
          },
          pointer: {
            fragment_id: "F1",
            route: "store_feature_risk",
            source: {
              namespace: "snapshot_2020",
              entity: "document",
              id: "doc-weak-1",
              period: "2020-06"
            },
            locator: {
              field_path: "risk_score",
              item_index: 0
            }
          },
          evidence_kind: "anomaly_signal",
          mechanism_note: null,
          confidence: "low",
          limitation: {
            reason_code: "missing_mechanism",
            note: "Mechanism could not be resolved."
          },
          payload: {
            risk_score: 1
          }
        }
      ],
      why_included: ["synthetic-test"],
      selection_reason: ["synthetic-test"],
      risk_factors: [],
      business_interpretation: [],
      confidence: "low",
      limitations: ["Weak mechanism evidence."],
      errors: []
    };

    const output = composeAssistantAnswer({
      userMessage: "Проверь риск по документу doc-weak-1 за 2020-06.",
      routeSummary: {
        mode: "deterministic_v2",
        message_in_scope: true,
        scope_confidence: "high",
        planner: {
          total_fragments: 1,
          in_scope_fragments: 1,
          out_of_scope_fragments: 0,
          discarded_fragments: 0,
          contains_multiple_tasks: false
        },
        decisions: [],
        fallback: {
          type: "none",
          message: null
        }
      },
      retrievalResults: [retrievalResult],
      requirements: [
        {
          requirement_id: "R1",
          source_fragment_id: "F1",
          requirement_text: "Проверить риск документа",
          subject_tokens: ["документ"],
          status: "covered",
          route: "store_feature_risk"
        }
      ],
      coverageReport: {
        requirements_total: 1,
        requirements_covered: 1,
        requirements_uncovered: [],
        requirements_partially_covered: [],
        clarification_needed_for: [],
        out_of_scope_requirements: []
      },
      groundingCheck: {
        status: "grounded",
        route_subject_match: true,
        missing_requirements: [],
        reasons: [],
        why_included_summary: ["synthetic-test"],
        selection_reason_summary: ["synthetic-test"]
      },
      enableAnswerPolicyV11: true
    });

    expect(output.answer_structure_v11?.mechanism_block?.status).toBe("unresolved");
    expect(output.answer_structure_v11?.mechanism_block?.mechanism_notes).toEqual([]);
    expect(output.answer_structure_v11?.mechanism_block?.limitation_reason_codes).toContain("missing_mechanism");
    expect(output.assistant_reply).toMatch(/Ограничения:|Что пока не доказано:/);
    expect(output.assistant_reply).not.toMatch(/mechanism_note|source_ref|canonical_ref|route|profile/i);
  });

  it("preserves legacy reply path when policy flag is OFF", () => {
    const retrievalResult: UnifiedRetrievalResult = {
      fragment_id: "F1",
      requirement_ids: ["R1"],
      route: "store_feature_risk",
      status: "ok",
      result_type: "list",
      items: [{ source_entity: "Document", source_id: "doc-weak-1" }],
      summary: {
        broad_query_detected: false,
        broad_result_flag: false,
        minimum_evidence_failed: false,
        narrowing_strength: "strong"
      },
      evidence: [
        {
          evidence_id: "ev-weak",
          claim_ref: "requirement:R1",
          source_type: "retrieval_item",
          source_ref: {
            schema_version: "evidence_source_ref_v1",
            namespace: "snapshot_2020",
            entity: "document",
            id: "doc-weak-1",
            period: "2020-06",
            canonical_ref: "evidence_source_ref_v1|snapshot_2020|document|doc-weak-1|2020-06"
          },
          pointer: {
            fragment_id: "F1",
            route: "store_feature_risk",
            source: {
              namespace: "snapshot_2020",
              entity: "document",
              id: "doc-weak-1",
              period: "2020-06"
            },
            locator: {
              field_path: "risk_score",
              item_index: 0
            }
          },
          evidence_kind: "anomaly_signal",
          mechanism_note: null,
          confidence: "low",
          limitation: {
            reason_code: "missing_mechanism",
            note: "Mechanism could not be resolved."
          },
          payload: {
            risk_score: 1
          }
        }
      ],
      why_included: ["synthetic-test"],
      selection_reason: ["synthetic-test"],
      risk_factors: [],
      business_interpretation: [],
      confidence: "low",
      limitations: ["Weak mechanism evidence."],
      errors: []
    };

    const baseInput = {
      userMessage: "Проверь риск по документу doc-weak-1 за 2020-06.",
      routeSummary: {
        mode: "deterministic_v2" as const,
        message_in_scope: true,
        scope_confidence: "high" as const,
        planner: {
          total_fragments: 1,
          in_scope_fragments: 1,
          out_of_scope_fragments: 0,
          discarded_fragments: 0,
          contains_multiple_tasks: false
        },
        decisions: [],
        fallback: {
          type: "none" as const,
          message: null
        }
      },
      retrievalResults: [retrievalResult],
      requirements: [
        {
          requirement_id: "R1",
          source_fragment_id: "F1",
          requirement_text: "Проверить риск документа",
          subject_tokens: ["документ"],
          status: "covered" as const,
          route: "store_feature_risk"
        }
      ],
      coverageReport: {
        requirements_total: 1,
        requirements_covered: 1,
        requirements_uncovered: [],
        requirements_partially_covered: [],
        clarification_needed_for: [],
        out_of_scope_requirements: []
      },
      groundingCheck: {
        status: "grounded" as const,
        route_subject_match: true,
        missing_requirements: [],
        reasons: [],
        why_included_summary: ["synthetic-test"],
        selection_reason_summary: ["synthetic-test"]
      }
    };

    const legacy = composeAssistantAnswer({ ...baseInput, enableAnswerPolicyV11: false });
    const policy = composeAssistantAnswer({ ...baseInput, enableAnswerPolicyV11: true });

    expect(legacy.answer_structure_v11).toBeUndefined();
    expect(policy.answer_structure_v11).toBeTruthy();
    expect(String(policy.assistant_reply).length).toBeGreaterThan(40);
    expect(String(policy.assistant_reply)).not.toBe(String(legacy.assistant_reply));
  });
});