NODEDC_1C/llm_normalizer/backend/tests/assistantProblemCentricAnsw...

470 lines
16 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { describe, expect, it } from "vitest";
import { composeAssistantAnswer } from "../src/services/answerComposer";
import type { AnswerGroundingCheck, RequirementCoverageReport, UnifiedRetrievalResult } from "../src/types/assistant";
import type { ProblemUnit, ProblemUnitSummary } from "../src/types/stage2ProblemUnits";
function buildRouteSummary() {
return {
mode: "deterministic_v2" as const,
message_in_scope: true,
scope_confidence: "high" as const,
planner: {
total_fragments: 1,
in_scope_fragments: 1,
out_of_scope_fragments: 0,
discarded_fragments: 0,
contains_multiple_tasks: false
},
decisions: [],
fallback: {
type: "none" as const,
message: null
}
};
}
function buildCoverage(partial = false): RequirementCoverageReport {
return {
requirements_total: 1,
requirements_covered: partial ? 0 : 1,
requirements_uncovered: partial ? ["R1"] : [],
requirements_partially_covered: partial ? ["R1"] : [],
clarification_needed_for: [],
out_of_scope_requirements: []
};
}
function buildGrounding(status: AnswerGroundingCheck["status"]): AnswerGroundingCheck {
return {
status,
route_subject_match: true,
missing_requirements: status === "partial" ? ["R1"] : [],
reasons: status === "partial" ? ["Coverage is partial for problem-focused analysis."] : [],
why_included_summary: ["synthetic-test"],
selection_reason_summary: ["synthetic-test"]
};
}
function buildProblemUnit(input: {
id: string;
type: ProblemUnit["problem_unit_type"];
confidenceGrade: "low" | "medium" | "high";
severityGrade: "low" | "medium" | "high";
mechanism: string;
}): ProblemUnit {
return {
schema_version: "problem_unit_v0_1",
problem_unit_id: input.id,
problem_unit_type: input.type,
title: "Broken chain segment detected",
mechanism_summary: input.mechanism,
business_defect_class: "failed_edge:payment_to_settlement",
severity: {
score: input.severityGrade === "high" ? 0.8 : input.severityGrade === "medium" ? 0.6 : 0.3,
grade: input.severityGrade
},
confidence: {
score: input.confidenceGrade === "high" ? 0.8 : input.confidenceGrade === "medium" ? 0.6 : 0.3,
grade: input.confidenceGrade
},
affected_entities: ["Document:DOC-1", "Counterparty:CP-1"],
affected_documents: ["Document:DOC-1"],
affected_postings: ["Posting:POST-1"],
affected_accounts: ["60"],
affected_counterparties: ["Counterparty:CP-1"],
affected_contracts: ["Contract:CTR-1"],
failed_expected_edge: "payment_to_settlement",
period_impact: {
is_period_sensitive: true,
impact_class: "close_risk"
},
evidence_pack: ["cand-1"],
entity_backlinks: [{ entity: "Document", id: "DOC-1" }],
snapshot_limitations: []
};
}
function buildProblemSummary(units: ProblemUnit[]): ProblemUnitSummary {
const unitTypes = Array.from(new Set(units.map((item) => item.problem_unit_type)));
const typeDistribution: Partial<Record<ProblemUnit["problem_unit_type"], number>> = {};
const severityDistribution = { low: 0, medium: 0, high: 0 };
const confidenceDistribution = { low: 0, medium: 0, high: 0 };
for (const unit of units) {
typeDistribution[unit.problem_unit_type] = (typeDistribution[unit.problem_unit_type] ?? 0) + 1;
severityDistribution[unit.severity.grade] += 1;
confidenceDistribution[unit.confidence.grade] += 1;
}
return {
schema_version: "problem_unit_summary_v0_1",
units_total: units.length,
duplicate_collapses: 0,
unit_types: unitTypes,
type_distribution: typeDistribution,
severity_distribution: severityDistribution,
confidence_distribution: confidenceDistribution,
primary_unit_type: unitTypes[0] ?? null
};
}
function buildRetrievalResult(input: {
broad: boolean;
minimumEvidenceFailed: boolean;
degradedTo: "partial" | "clarification" | null;
narrowing: "weak" | "medium" | "strong";
confidence: UnifiedRetrievalResult["confidence"];
limitationReason: "missing_mechanism" | "weak_source_mapping" | null;
problemUnits: ProblemUnit[];
}): UnifiedRetrievalResult {
const problemSummary = buildProblemSummary(input.problemUnits);
return {
fragment_id: "F1",
requirement_ids: ["R1"],
route: "hybrid_store_plus_live",
status: "ok",
result_type: "chain",
items: [
{
counterparty_id: "CP-1",
operations_count: 4,
document_refs_count: 2
}
],
raw_entities: [],
candidate_evidence: [],
problem_units: input.problemUnits,
problem_unit_summary: problemSummary,
summary: {
broad_query_detected: input.broad,
broad_result_flag: input.broad,
minimum_evidence_failed: input.minimumEvidenceFailed,
degraded_to: input.degradedTo,
narrowing_strength: input.narrowing
},
evidence: [
{
evidence_id: "ev-1",
claim_ref: "requirement:R1",
source_type: "retrieval_item",
source_ref: {
schema_version: "evidence_source_ref_v1",
namespace: "snapshot_2020",
entity: "Document",
id: "DOC-1",
period: "2020-06",
canonical_ref: "evidence_source_ref_v1|snapshot_2020|document|doc-1|2020-06"
},
pointer: {
fragment_id: "F1",
route: "hybrid_store_plus_live",
source: {
namespace: "snapshot_2020",
entity: "Document",
id: "DOC-1",
period: "2020-06"
},
locator: {
field_path: "risk_score",
item_index: 0
}
},
evidence_kind: "mechanism_link",
mechanism_note: input.limitationReason === "missing_mechanism" ? null : "failed_edge=payment_to_settlement",
confidence: input.confidence,
limitation:
input.limitationReason === null
? null
: {
reason_code: input.limitationReason,
note: null
},
payload: {
risk_score: 4
}
}
],
why_included: ["synthetic-test"],
selection_reason: ["synthetic-test"],
risk_factors: ["broken_chain"],
business_interpretation: ["synthetic-test"],
confidence: input.confidence,
limitations: input.limitationReason ? ["Synthetic limitation for weak evidence."] : [],
errors: []
};
}
describe("assistant problem-centric answer mode v1", () => {
it("uses problem-centric answer mode on problem-heavy case when flag is ON", () => {
const units = [
buildProblemUnit({
id: "pu-1",
type: "broken_chain_segment",
confidenceGrade: "medium",
severityGrade: "high",
mechanism: "Mechanism candidate: failed_edge:payment_to_settlement."
})
];
const retrieval = buildRetrievalResult({
broad: true,
minimumEvidenceFailed: false,
degradedTo: "partial",
narrowing: "weak",
confidence: "medium",
limitationReason: null,
problemUnits: units
});
const output = composeAssistantAnswer({
userMessage: "Покажи разрывы цепочки и хвосты по расчетам за 2020-06.",
routeSummary: buildRouteSummary(),
retrievalResults: [retrieval],
requirements: [
{
requirement_id: "R1",
source_fragment_id: "F1",
requirement_text: "Проверить дефекты цепочки",
subject_tokens: ["chain", "account_60"],
status: "covered",
route: "hybrid_store_plus_live"
}
],
coverageReport: buildCoverage(true),
groundingCheck: buildGrounding("partial"),
enableAnswerPolicyV11: true,
enableProblemCentricAnswerV1: true
});
expect(output.problem_centric_answer_applied).toBe(true);
expect(output.problem_answer_mode).toBe("stage2_problem_centric_v1");
expect(output.problem_units_used_count).toBeGreaterThan(0);
expect(output.problem_unit_ids_used).toContain("pu-1");
expect(output.answer_structure_v11?.answer_summary).toContain("problem-centric");
});
it("falls back to Stage 1 path for the same case when problem-centric flag is OFF", () => {
const units = [
buildProblemUnit({
id: "pu-1",
type: "broken_chain_segment",
confidenceGrade: "medium",
severityGrade: "high",
mechanism: "Mechanism candidate: failed_edge:payment_to_settlement."
})
];
const retrieval = buildRetrievalResult({
broad: true,
minimumEvidenceFailed: false,
degradedTo: "partial",
narrowing: "weak",
confidence: "medium",
limitationReason: null,
problemUnits: units
});
const output = composeAssistantAnswer({
userMessage: "Покажи разрывы цепочки и хвосты по расчетам за 2020-06.",
routeSummary: buildRouteSummary(),
retrievalResults: [retrieval],
requirements: [
{
requirement_id: "R1",
source_fragment_id: "F1",
requirement_text: "Проверить дефекты цепочки",
subject_tokens: ["chain", "account_60"],
status: "covered",
route: "hybrid_store_plus_live"
}
],
coverageReport: buildCoverage(true),
groundingCheck: buildGrounding("partial"),
enableAnswerPolicyV11: true,
enableProblemCentricAnswerV1: false
});
expect(output.problem_centric_answer_applied).toBe(false);
expect(output.problem_answer_mode).toBe("stage1_policy_v11");
expect(output.answer_structure_v11?.answer_summary).not.toContain("problem-centric");
});
it("keeps focused grounded case on Stage 1 path even when problem-centric flag is ON", () => {
const units = [
buildProblemUnit({
id: "pu-1",
type: "broken_chain_segment",
confidenceGrade: "high",
severityGrade: "high",
mechanism: "Mechanism candidate: failed_edge:payment_to_settlement."
})
];
const retrieval = buildRetrievalResult({
broad: false,
minimumEvidenceFailed: false,
degradedTo: null,
narrowing: "strong",
confidence: "high",
limitationReason: null,
problemUnits: units
});
const output = composeAssistantAnswer({
userMessage: "Проверь счет 60 за 2020-06 по конкретному контрагенту и покажи подтвержденный дефект.",
routeSummary: buildRouteSummary(),
retrievalResults: [retrieval],
requirements: [
{
requirement_id: "R1",
source_fragment_id: "F1",
requirement_text: "Проверить конкретный дефект",
subject_tokens: ["account_60", "counterparty", "document"],
status: "covered",
route: "hybrid_store_plus_live"
}
],
coverageReport: buildCoverage(false),
groundingCheck: buildGrounding("grounded"),
enableAnswerPolicyV11: true,
enableProblemCentricAnswerV1: true
});
expect(output.problem_centric_answer_applied).toBe(false);
expect(output.problem_answer_mode).toBe("stage1_policy_v11");
expect(output.reply_type).toBe("factual_with_explanation");
});
it("enables problem-centric mode on mixed focused case when weak mechanism signals are present", () => {
const units = [
buildProblemUnit({
id: "pu-doc-1",
type: "document_conflict",
confidenceGrade: "medium",
severityGrade: "medium",
mechanism: "Mechanism candidate: document_conflict_in_chain."
})
];
const retrieval = buildRetrievalResult({
broad: false,
minimumEvidenceFailed: false,
degradedTo: null,
narrowing: "strong",
confidence: "medium",
limitationReason: "missing_mechanism",
problemUnits: units
});
const output = composeAssistantAnswer({
userMessage: "Проверь конфликт документа по счету 60 за 2020-06 и оцени влияние.",
routeSummary: buildRouteSummary(),
retrievalResults: [retrieval],
requirements: [
{
requirement_id: "R1",
source_fragment_id: "F1",
requirement_text: "Проверить конфликт документа",
subject_tokens: ["account_60", "document"],
status: "covered",
route: "hybrid_store_plus_live"
}
],
coverageReport: buildCoverage(false),
groundingCheck: buildGrounding("grounded"),
enableAnswerPolicyV11: true,
enableProblemCentricAnswerV1: true
});
expect(output.problem_centric_answer_applied).toBe(true);
expect(output.problem_answer_mode).toBe("stage2_problem_centric_v1");
expect(output.problem_units_used_count).toBeGreaterThan(0);
});
it("does not expose raw technical refs in primary problem-centric text", () => {
const units = [
buildProblemUnit({
id: "pu-1",
type: "period_risk_cluster",
confidenceGrade: "medium",
severityGrade: "high",
mechanism: "Mechanism candidate: failed_edge:payment_to_settlement."
})
];
const retrieval = buildRetrievalResult({
broad: true,
minimumEvidenceFailed: false,
degradedTo: "partial",
narrowing: "weak",
confidence: "medium",
limitationReason: null,
problemUnits: units
});
const output = composeAssistantAnswer({
userMessage: "Оцени влияние проблем по расчетам на закрытие периода.",
routeSummary: buildRouteSummary(),
retrievalResults: [retrieval],
requirements: [
{
requirement_id: "R1",
source_fragment_id: "F1",
requirement_text: "Оценить влияние на закрытие периода",
subject_tokens: ["period", "account_60"],
status: "covered",
route: "hybrid_store_plus_live"
}
],
coverageReport: buildCoverage(true),
groundingCheck: buildGrounding("partial"),
enableAnswerPolicyV11: true,
enableProblemCentricAnswerV1: true
});
expect(output.problem_centric_answer_applied).toBe(true);
const primaryText = String(output.assistant_reply).split("Evidence block:")[0];
expect(primaryText).not.toContain("evidence_source_ref_v1|");
expect(primaryText).not.toContain("cand-");
});
it("produces limited answer for weak problem units without false overclaim", () => {
const units = [
buildProblemUnit({
id: "pu-weak-1",
type: "broken_chain_segment",
confidenceGrade: "low",
severityGrade: "low",
mechanism: "Mechanism is currently inferred at baseline level for broken_chain_segment."
})
];
const retrieval = buildRetrievalResult({
broad: true,
minimumEvidenceFailed: false,
degradedTo: "partial",
narrowing: "weak",
confidence: "low",
limitationReason: "missing_mechanism",
problemUnits: units
});
const output = composeAssistantAnswer({
userMessage: "Покажи проблемные зоны по расчетам без детализации.",
routeSummary: buildRouteSummary(),
retrievalResults: [retrieval],
requirements: [
{
requirement_id: "R1",
source_fragment_id: "F1",
requirement_text: "Выделить проблемные зоны",
subject_tokens: ["anomaly"],
status: "covered",
route: "hybrid_store_plus_live"
}
],
coverageReport: buildCoverage(true),
groundingCheck: buildGrounding("partial"),
enableAnswerPolicyV11: true,
enableProblemCentricAnswerV1: true
});
expect(output.problem_centric_answer_applied).toBe(true);
expect(output.answer_structure_v11?.mechanism_block.status).not.toBe("grounded");
expect(output.answer_structure_v11?.uncertainty_block.limitations.join(" ")).toMatch(/limited|огранич/i);
expect(output.answer_structure_v11?.direct_answer).toMatch(/limited|<7C><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>|<7C><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>|огр|пред/i);
});
});