NODEDC_1C/llm_normalizer/backend/tests/assistantAnswerLeakageGuard...

137 lines
4.4 KiB
TypeScript

import { describe, expect, it } from "vitest";
import { composeAssistantAnswer } from "../src/services/answerComposer";
import type { UnifiedRetrievalResult } from "../src/types/assistant";
function buildRouteSummary() {
return {
mode: "deterministic_v2" as const,
message_in_scope: true,
scope_confidence: "high" as const,
planner: {
total_fragments: 1,
in_scope_fragments: 1,
out_of_scope_fragments: 0,
discarded_fragments: 0,
contains_multiple_tasks: false
},
decisions: [],
fallback: {
type: "none" as const,
message: null
}
};
}
describe("assistant answer leakage guard", () => {
it("removes raw technical refs from assistant reply but keeps structured refs in answer structure", () => {
const retrieval: UnifiedRetrievalResult = {
fragment_id: "F1",
requirement_ids: ["R1"],
route: "store_feature_risk",
status: "ok",
result_type: "list",
items: [
{
source_entity: "Document",
source_id: "c921c08a-c117-11ea-a2e2-00155d012600",
risk_score: 4
}
],
summary: {
broad_query_detected: false,
broad_result_flag: false,
minimum_evidence_failed: false,
narrowing_strength: "strong"
},
evidence: [
{
evidence_id: "ev-1",
claim_ref: "requirement:R1",
source_type: "retrieval_item",
source_ref: {
schema_version: "evidence_source_ref_v1",
namespace: "snapshot_2020",
entity: "document",
id: "c921c08a-c117-11ea-a2e2-00155d012600",
period: "2020-06",
canonical_ref:
"evidence_source_ref_v1|snapshot_2020|document|c921c08a-c117-11ea-a2e2-00155d012600|2020-06"
},
pointer: {
fragment_id: "F1",
route: "store_feature_risk",
source: {
namespace: "snapshot_2020",
entity: "document",
id: "c921c08a-c117-11ea-a2e2-00155d012600",
period: "2020-06"
},
locator: {
field_path: "risk_score",
item_index: 0
}
},
evidence_kind: "anomaly_signal",
mechanism_note: null,
confidence: "medium",
limitation: {
reason_code: "weak_source_mapping",
note: null
},
payload: {
risk_score: 4
}
}
],
why_included: ["synthetic-test"],
selection_reason: ["synthetic-test"],
risk_factors: ["document_conflict"],
business_interpretation: ["synthetic-test"],
confidence: "medium",
limitations: ["Weak source mapping evidence."],
errors: []
};
const output = composeAssistantAnswer({
userMessage: "Проверь документный риск по счету 60 за 2020-06.",
routeSummary: buildRouteSummary(),
retrievalResults: [retrieval],
requirements: [
{
requirement_id: "R1",
source_fragment_id: "F1",
requirement_text: "Проверить документный риск",
subject_tokens: ["account_60", "document", "period_2020_06"],
status: "covered",
route: "store_feature_risk"
}
],
coverageReport: {
requirements_total: 1,
requirements_covered: 1,
requirements_uncovered: [],
requirements_partially_covered: [],
clarification_needed_for: [],
out_of_scope_requirements: []
},
groundingCheck: {
status: "grounded",
route_subject_match: true,
missing_requirements: [],
reasons: [],
why_included_summary: ["synthetic-test"],
selection_reason_summary: ["synthetic-test"]
},
enableAnswerPolicyV11: true,
enableProblemCentricAnswerV1: true
});
expect(output.assistant_reply).not.toMatch(/source_ref|canonical_ref|fragment_id|entity_id|guid|uuid/i);
expect(output.assistant_reply).not.toMatch(/\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b/i);
expect(output.assistant_reply).not.toContain("evidence_source_ref_v1|");
expect(output.assistant_reply).toMatch(/опор|документ|проводк|проблем/i);
expect(output.answer_structure_v11?.evidence_block.source_refs?.length).toBeGreaterThan(0);
});
});