NODEDC_1C/llm_normalizer/backend/tests/assistantProblemCentricAnsw...

471 lines
17 KiB
TypeScript

import { describe, expect, it } from "vitest";
import { composeAssistantAnswer } from "../src/services/answerComposer";
import type { AnswerGroundingCheck, RequirementCoverageReport, UnifiedRetrievalResult } from "../src/types/assistant";
import type { ProblemUnit, ProblemUnitSummary } from "../src/types/stage2ProblemUnits";
function buildRouteSummary() {
return {
mode: "deterministic_v2" as const,
message_in_scope: true,
scope_confidence: "high" as const,
planner: {
total_fragments: 1,
in_scope_fragments: 1,
out_of_scope_fragments: 0,
discarded_fragments: 0,
contains_multiple_tasks: false
},
decisions: [],
fallback: {
type: "none" as const,
message: null
}
};
}
function buildCoverage(partial = false): RequirementCoverageReport {
return {
requirements_total: 1,
requirements_covered: partial ? 0 : 1,
requirements_uncovered: partial ? ["R1"] : [],
requirements_partially_covered: partial ? ["R1"] : [],
clarification_needed_for: [],
out_of_scope_requirements: []
};
}
function buildGrounding(status: AnswerGroundingCheck["status"]): AnswerGroundingCheck {
return {
status,
route_subject_match: true,
missing_requirements: status === "partial" ? ["R1"] : [],
reasons: status === "partial" ? ["Coverage is partial for problem-focused analysis."] : [],
why_included_summary: ["synthetic-test"],
selection_reason_summary: ["synthetic-test"]
};
}
function buildProblemUnit(input: {
id: string;
type: ProblemUnit["problem_unit_type"];
confidenceGrade: "low" | "medium" | "high";
severityGrade: "low" | "medium" | "high";
mechanism: string;
}): ProblemUnit {
return {
schema_version: "problem_unit_v0_1",
problem_unit_id: input.id,
problem_unit_type: input.type,
title: "Broken chain segment detected",
mechanism_summary: input.mechanism,
business_defect_class: "failed_edge:payment_to_settlement",
severity: {
score: input.severityGrade === "high" ? 0.8 : input.severityGrade === "medium" ? 0.6 : 0.3,
grade: input.severityGrade
},
confidence: {
score: input.confidenceGrade === "high" ? 0.8 : input.confidenceGrade === "medium" ? 0.6 : 0.3,
grade: input.confidenceGrade
},
affected_entities: ["Document:DOC-1", "Counterparty:CP-1"],
affected_documents: ["Document:DOC-1"],
affected_postings: ["Posting:POST-1"],
affected_accounts: ["60"],
affected_counterparties: ["Counterparty:CP-1"],
affected_contracts: ["Contract:CTR-1"],
failed_expected_edge: "payment_to_settlement",
period_impact: {
is_period_sensitive: true,
impact_class: "close_risk"
},
evidence_pack: ["cand-1"],
entity_backlinks: [{ entity: "Document", id: "DOC-1" }],
snapshot_limitations: []
};
}
function buildProblemSummary(units: ProblemUnit[]): ProblemUnitSummary {
const unitTypes = Array.from(new Set(units.map((item) => item.problem_unit_type)));
const typeDistribution: Partial<Record<ProblemUnit["problem_unit_type"], number>> = {};
const severityDistribution = { low: 0, medium: 0, high: 0 };
const confidenceDistribution = { low: 0, medium: 0, high: 0 };
for (const unit of units) {
typeDistribution[unit.problem_unit_type] = (typeDistribution[unit.problem_unit_type] ?? 0) + 1;
severityDistribution[unit.severity.grade] += 1;
confidenceDistribution[unit.confidence.grade] += 1;
}
return {
schema_version: "problem_unit_summary_v0_1",
units_total: units.length,
duplicate_collapses: 0,
unit_types: unitTypes,
type_distribution: typeDistribution,
severity_distribution: severityDistribution,
confidence_distribution: confidenceDistribution,
primary_unit_type: unitTypes[0] ?? null
};
}
function buildRetrievalResult(input: {
broad: boolean;
minimumEvidenceFailed: boolean;
degradedTo: "partial" | "clarification" | null;
narrowing: "weak" | "medium" | "strong";
confidence: UnifiedRetrievalResult["confidence"];
limitationReason: "missing_mechanism" | "weak_source_mapping" | null;
problemUnits: ProblemUnit[];
}): UnifiedRetrievalResult {
const problemSummary = buildProblemSummary(input.problemUnits);
return {
fragment_id: "F1",
requirement_ids: ["R1"],
route: "hybrid_store_plus_live",
status: "ok",
result_type: "chain",
items: [
{
counterparty_id: "CP-1",
operations_count: 4,
document_refs_count: 2
}
],
raw_entities: [],
candidate_evidence: [],
problem_units: input.problemUnits,
problem_unit_summary: problemSummary,
summary: {
broad_query_detected: input.broad,
broad_result_flag: input.broad,
minimum_evidence_failed: input.minimumEvidenceFailed,
degraded_to: input.degradedTo,
narrowing_strength: input.narrowing
},
evidence: [
{
evidence_id: "ev-1",
claim_ref: "requirement:R1",
source_type: "retrieval_item",
source_ref: {
schema_version: "evidence_source_ref_v1",
namespace: "snapshot_2020",
entity: "Document",
id: "DOC-1",
period: "2020-06",
canonical_ref: "evidence_source_ref_v1|snapshot_2020|document|doc-1|2020-06"
},
pointer: {
fragment_id: "F1",
route: "hybrid_store_plus_live",
source: {
namespace: "snapshot_2020",
entity: "Document",
id: "DOC-1",
period: "2020-06"
},
locator: {
field_path: "risk_score",
item_index: 0
}
},
evidence_kind: "mechanism_link",
mechanism_note: input.limitationReason === "missing_mechanism" ? null : "failed_edge=payment_to_settlement",
confidence: input.confidence,
limitation:
input.limitationReason === null
? null
: {
reason_code: input.limitationReason,
note: null
},
payload: {
risk_score: 4
}
}
],
why_included: ["synthetic-test"],
selection_reason: ["synthetic-test"],
risk_factors: ["broken_chain"],
business_interpretation: ["synthetic-test"],
confidence: input.confidence,
limitations: input.limitationReason ? ["Synthetic limitation for weak evidence."] : [],
errors: []
};
}
describe("assistant problem-centric answer mode v1", () => {
it("uses problem-centric answer mode on problem-heavy case when flag is ON", () => {
const units = [
buildProblemUnit({
id: "pu-1",
type: "broken_chain_segment",
confidenceGrade: "medium",
severityGrade: "high",
mechanism: "Mechanism candidate: failed_edge:payment_to_settlement."
})
];
const retrieval = buildRetrievalResult({
broad: true,
minimumEvidenceFailed: false,
degradedTo: "partial",
narrowing: "weak",
confidence: "medium",
limitationReason: null,
problemUnits: units
});
const output = composeAssistantAnswer({
userMessage: "Покажи разрывы цепочки и хвосты по расчетам за 2020-06.",
routeSummary: buildRouteSummary(),
retrievalResults: [retrieval],
requirements: [
{
requirement_id: "R1",
source_fragment_id: "F1",
requirement_text: "Проверить дефекты цепочки",
subject_tokens: ["chain", "account_60"],
status: "covered",
route: "hybrid_store_plus_live"
}
],
coverageReport: buildCoverage(true),
groundingCheck: buildGrounding("partial"),
enableAnswerPolicyV11: true,
enableProblemCentricAnswerV1: true
});
expect(output.problem_centric_answer_applied).toBe(true);
expect(output.problem_answer_mode).toBe("stage2_problem_centric_v1");
expect(output.problem_units_used_count).toBeGreaterThan(0);
expect(output.problem_unit_ids_used).toContain("pu-1");
expect(output.answer_structure_v11?.answer_summary).toContain("problem-centric");
});
it("falls back to Stage 1 path for the same case when problem-centric flag is OFF", () => {
const units = [
buildProblemUnit({
id: "pu-1",
type: "broken_chain_segment",
confidenceGrade: "medium",
severityGrade: "high",
mechanism: "Mechanism candidate: failed_edge:payment_to_settlement."
})
];
const retrieval = buildRetrievalResult({
broad: true,
minimumEvidenceFailed: false,
degradedTo: "partial",
narrowing: "weak",
confidence: "medium",
limitationReason: null,
problemUnits: units
});
const output = composeAssistantAnswer({
userMessage: "Покажи разрывы цепочки и хвосты по расчетам за 2020-06.",
routeSummary: buildRouteSummary(),
retrievalResults: [retrieval],
requirements: [
{
requirement_id: "R1",
source_fragment_id: "F1",
requirement_text: "Проверить дефекты цепочки",
subject_tokens: ["chain", "account_60"],
status: "covered",
route: "hybrid_store_plus_live"
}
],
coverageReport: buildCoverage(true),
groundingCheck: buildGrounding("partial"),
enableAnswerPolicyV11: true,
enableProblemCentricAnswerV1: false
});
expect(output.problem_centric_answer_applied).toBe(false);
expect(output.problem_answer_mode).toBe("stage1_policy_v11");
expect(output.answer_structure_v11?.answer_summary).not.toContain("problem-centric");
});
it("keeps focused grounded case on Stage 1 path even when problem-centric flag is ON", () => {
const units = [
buildProblemUnit({
id: "pu-1",
type: "broken_chain_segment",
confidenceGrade: "high",
severityGrade: "high",
mechanism: "Mechanism candidate: failed_edge:payment_to_settlement."
})
];
const retrieval = buildRetrievalResult({
broad: false,
minimumEvidenceFailed: false,
degradedTo: null,
narrowing: "strong",
confidence: "high",
limitationReason: null,
problemUnits: units
});
const output = composeAssistantAnswer({
userMessage: "Проверь счет 60 за 2020-06 по конкретному контрагенту и покажи подтвержденный дефект.",
routeSummary: buildRouteSummary(),
retrievalResults: [retrieval],
requirements: [
{
requirement_id: "R1",
source_fragment_id: "F1",
requirement_text: "Проверить конкретный дефект",
subject_tokens: ["account_60", "counterparty", "document"],
status: "covered",
route: "hybrid_store_plus_live"
}
],
coverageReport: buildCoverage(false),
groundingCheck: buildGrounding("grounded"),
enableAnswerPolicyV11: true,
enableProblemCentricAnswerV1: true
});
expect(output.problem_centric_answer_applied).toBe(false);
expect(output.problem_answer_mode).toBe("stage1_policy_v11");
expect(output.reply_type).toBe("factual_with_explanation");
});
it("enables problem-centric mode on mixed focused case when weak mechanism signals are present", () => {
const units = [
buildProblemUnit({
id: "pu-doc-1",
type: "document_conflict",
confidenceGrade: "medium",
severityGrade: "medium",
mechanism: "Mechanism candidate: document_conflict_in_chain."
})
];
const retrieval = buildRetrievalResult({
broad: false,
minimumEvidenceFailed: false,
degradedTo: null,
narrowing: "strong",
confidence: "medium",
limitationReason: "missing_mechanism",
problemUnits: units
});
const output = composeAssistantAnswer({
userMessage: "Проверь конфликт документа по счету 60 за 2020-06 и оцени влияние.",
routeSummary: buildRouteSummary(),
retrievalResults: [retrieval],
requirements: [
{
requirement_id: "R1",
source_fragment_id: "F1",
requirement_text: "Проверить конфликт документа",
subject_tokens: ["account_60", "document"],
status: "covered",
route: "hybrid_store_plus_live"
}
],
coverageReport: buildCoverage(false),
groundingCheck: buildGrounding("grounded"),
enableAnswerPolicyV11: true,
enableProblemCentricAnswerV1: true
});
expect(output.problem_centric_answer_applied).toBe(true);
expect(output.problem_answer_mode).toBe("stage2_problem_centric_v1");
expect(output.problem_units_used_count).toBeGreaterThan(0);
});
it("does not expose raw technical refs in primary problem-centric text", () => {
const units = [
buildProblemUnit({
id: "pu-1",
type: "period_risk_cluster",
confidenceGrade: "medium",
severityGrade: "high",
mechanism: "Mechanism candidate: failed_edge:payment_to_settlement."
})
];
const retrieval = buildRetrievalResult({
broad: true,
minimumEvidenceFailed: false,
degradedTo: "partial",
narrowing: "weak",
confidence: "medium",
limitationReason: null,
problemUnits: units
});
const output = composeAssistantAnswer({
userMessage: "Оцени влияние проблем по расчетам на закрытие периода.",
routeSummary: buildRouteSummary(),
retrievalResults: [retrieval],
requirements: [
{
requirement_id: "R1",
source_fragment_id: "F1",
requirement_text: "Оценить влияние на закрытие периода",
subject_tokens: ["period", "account_60"],
status: "covered",
route: "hybrid_store_plus_live"
}
],
coverageReport: buildCoverage(true),
groundingCheck: buildGrounding("partial"),
enableAnswerPolicyV11: true,
enableProblemCentricAnswerV1: true
});
expect(output.problem_centric_answer_applied).toBe(true);
const primaryText = String(output.assistant_reply).split("Evidence block:")[0];
expect(primaryText).not.toContain("evidence_source_ref_v1|");
expect(primaryText).not.toContain("cand-");
});
it("produces limited answer for weak problem units without false overclaim", () => {
const units = [
buildProblemUnit({
id: "pu-weak-1",
type: "broken_chain_segment",
confidenceGrade: "low",
severityGrade: "low",
mechanism: "Mechanism is currently inferred at baseline level for broken_chain_segment."
})
];
const retrieval = buildRetrievalResult({
broad: true,
minimumEvidenceFailed: false,
degradedTo: "partial",
narrowing: "weak",
confidence: "low",
limitationReason: "missing_mechanism",
problemUnits: units
});
const output = composeAssistantAnswer({
userMessage: "Покажи проблемные зоны по расчетам без детализации.",
routeSummary: buildRouteSummary(),
retrievalResults: [retrieval],
requirements: [
{
requirement_id: "R1",
source_fragment_id: "F1",
requirement_text: "Выделить проблемные зоны",
subject_tokens: ["anomaly"],
status: "covered",
route: "hybrid_store_plus_live"
}
],
coverageReport: buildCoverage(true),
groundingCheck: buildGrounding("partial"),
enableAnswerPolicyV11: true,
enableProblemCentricAnswerV1: true
});
expect(output.problem_centric_answer_applied).toBe(true);
expect(output.answer_structure_v11?.mechanism_block.status).not.toBe("grounded");
expect(output.answer_structure_v11?.uncertainty_block.limitations.join(" ")).toMatch(/limited|огранич/i);
expect(output.answer_structure_v11?.direct_answer).toMatch(/limited|confidence=low|огр|пред/i);
});
});