NODEDC_1C/llm_normalizer/backend/tests/assistantWave6ProblemFirstA...

324 lines
12 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { describe, expect, it } from "vitest";
import { composeAssistantAnswer } from "../src/services/answerComposer";
import type { AnswerGroundingCheck, RequirementCoverageReport, UnifiedRetrievalResult } from "../src/types/assistant";
import type { ProblemUnit } from "../src/types/stage2ProblemUnits";
function buildRouteSummary() {
return {
mode: "deterministic_v2" as const,
message_in_scope: true,
scope_confidence: "high" as const,
planner: {
total_fragments: 1,
in_scope_fragments: 1,
out_of_scope_fragments: 0,
discarded_fragments: 0,
contains_multiple_tasks: false
},
decisions: [],
fallback: {
type: "none" as const,
message: null
}
};
}
function buildCoverage(partial = true): RequirementCoverageReport {
return {
requirements_total: 1,
requirements_covered: partial ? 0 : 1,
requirements_uncovered: partial ? ["R1"] : [],
requirements_partially_covered: partial ? ["R1"] : [],
clarification_needed_for: [],
out_of_scope_requirements: []
};
}
function buildGrounding(status: AnswerGroundingCheck["status"] = "partial"): AnswerGroundingCheck {
return {
status,
route_subject_match: true,
missing_requirements: status === "partial" ? ["R1"] : [],
reasons: status === "partial" ? ["Coverage is partial for problem-first answer contract."] : [],
why_included_summary: [],
selection_reason_summary: []
};
}
function buildProblemUnit(input: {
id: string;
type: ProblemUnit["problem_unit_type"];
defect: string;
account: string;
lifecycleDomain?: ProblemUnit["lifecycle_domain"];
}): ProblemUnit {
return {
schema_version: "problem_unit_v0_1",
problem_unit_id: input.id,
problem_unit_type: input.type,
title: "Problem unit",
mechanism_summary: `Mechanism candidate: ${input.defect}.`,
business_defect_class: input.defect,
severity: {
score: 0.76,
grade: "high"
},
confidence: {
score: 0.52,
grade: "medium"
},
affected_entities: ["Document:DOC-1", "Posting:POST-1"],
affected_documents: ["Document:DOC-1"],
affected_postings: ["Posting:POST-1"],
affected_accounts: [input.account],
affected_counterparties: ["Counterparty:CP-1"],
affected_contracts: ["Contract:CTR-1"],
failed_expected_edge: input.defect,
period_impact: {
is_period_sensitive: true,
impact_class: "close_risk"
},
evidence_pack: ["cand-1"],
entity_backlinks: [{ entity: "Document", id: "DOC-1" }],
snapshot_limitations: [],
...(input.lifecycleDomain
? {
lifecycle_domain: input.lifecycleDomain
}
: {})
};
}
function buildRetrieval(units: ProblemUnit[], extras?: Partial<UnifiedRetrievalResult>): UnifiedRetrievalResult {
return {
fragment_id: "F1",
requirement_ids: ["R1"],
route: "hybrid_store_plus_live",
status: "ok",
result_type: "chain",
items: [
{
source_entity: "Document",
source_id: "DOC-1",
counterparty_id: "CP-1"
}
],
summary: {
broad_query_detected: true,
broad_result_flag: true,
minimum_evidence_failed: false,
degraded_to: "partial",
narrowing_strength: "weak",
semantic_profile: {
domain_scope: ["bank_settlement"],
account_scope: ["60"],
relation_patterns: ["payment_to_settlement"]
}
},
evidence: [
{
evidence_id: "ev-1",
claim_ref: "requirement:R1",
source_type: "retrieval_item",
source_ref: {
schema_version: "evidence_source_ref_v1",
namespace: "snapshot_2020",
entity: "document",
id: "DOC-1",
period: "2020-06",
canonical_ref: "evidence_source_ref_v1|snapshot_2020|document|doc-1|2020-06"
},
pointer: {
fragment_id: "F1",
route: "hybrid_store_plus_live",
source: {
namespace: "snapshot_2020",
entity: "document",
id: "DOC-1",
period: "2020-06"
},
locator: {
field_path: "risk_score",
item_index: 0
}
},
evidence_kind: "mechanism_link",
mechanism_note: "failed_edge:payment_to_settlement",
confidence: "medium",
limitation: {
reason_code: "weak_source_mapping",
note: null
},
payload: {
risk_score: 4
}
}
],
problem_units: units,
problem_unit_summary: {
schema_version: "problem_unit_summary_v0_1",
units_total: units.length,
duplicate_collapses: 0,
unit_types: units.map((unit) => unit.problem_unit_type),
type_distribution: {
[units[0]?.problem_unit_type ?? "broken_chain_segment"]: units.length
},
severity_distribution: {
low: 0,
medium: 0,
high: units.length
},
confidence_distribution: {
low: 0,
medium: units.length,
high: 0
},
primary_unit_type: units[0]?.problem_unit_type ?? null
},
why_included: ["semantic retrieval profile", "route=hybrid_store_plus_live"],
selection_reason: ["domain_scope + relation_patterns + route profile"],
risk_factors: ["broken_chain", "closure_risk"],
business_interpretation: ["problem-first signal"],
confidence: "medium",
limitations: ["Evidence is snapshot-only and may lag source-of-record."],
errors: [],
...extras
};
}
function composeCase(userMessage: string, retrieval: UnifiedRetrievalResult) {
return composeAssistantAnswer({
userMessage,
routeSummary: buildRouteSummary(),
retrievalResults: [retrieval],
requirements: [
{
requirement_id: "R1",
source_fragment_id: "F1",
requirement_text: "Проверить проблемный механизм",
subject_tokens: ["chain"],
status: "covered",
route: "hybrid_store_plus_live"
}
],
coverageReport: buildCoverage(true),
groundingCheck: buildGrounding("partial"),
enableAnswerPolicyV11: true,
enableProblemCentricAnswerV1: true,
enableLifecycleAnswerV1: true
});
}
function extractSection(text: string, title: string): string {
const escaped = title.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
const stopTitles = [
"Коротко",
"Что именно проверено",
"Что найдено",
"Что пока не доказано",
"Что проверить первым",
"Что могу сделать сейчас"
];
const stopPattern = stopTitles.map((item) => item.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")).join("|");
const re = new RegExp(`${escaped}:([\\s\\S]*?)(?=(?:${stopPattern}):|$)`, "i");
const match = String(text ?? "").match(re);
return match?.[1]?.trim() ?? "";
}
describe("assistant wave6 problem-first answer contract", () => {
it("enforces leakage guard in direct user-facing answer", () => {
const units = [buildProblemUnit({ id: "pu-1", type: "broken_chain_segment", defect: "failed_edge:payment_to_settlement", account: "60" })];
const output = composeCase("Покажи проблему по расчетам.", buildRetrieval(units));
expect(output.assistant_reply).not.toMatch(
/graph_|domain_scope|relation_patterns|route|profile|hybrid_store_plus_live|store_canonical|semantic_profile|lifecycle_defect_type/i
);
});
it("keeps narrative mechanism-first and avoids entity-list direct answer", () => {
const units = [buildProblemUnit({ id: "pu-1", type: "broken_chain_segment", defect: "failed_edge:payment_to_settlement", account: "60" })];
const output = composeCase("Проверь по 60 счету, где разрыв.", buildRetrieval(units));
const brokenSection = extractSection(output.assistant_reply, "Что найдено");
expect(brokenSection).toMatch(/не подтвержден|разрыв|зависл|закрыти/i);
expect(brokenSection).not.toMatch(/^\s*-\s*(Document|Record|Entity)\b/i);
});
it("does not expose route/profile explanation in user-facing text", () => {
const units = [buildProblemUnit({ id: "pu-1", type: "document_conflict", defect: "posting_mismatch", account: "60" })];
const output = composeCase("Где конфликт документа и проводки?", buildRetrieval(units));
expect(output.assistant_reply).not.toMatch(/route|profile|semantic|domain_scope|relation_patterns|typed_domain_path/i);
});
it("collapses duplicate problem lines for the same mechanism", () => {
const units = [
buildProblemUnit({ id: "pu-1", type: "broken_chain_segment", defect: "failed_edge:payment_to_settlement", account: "60" }),
buildProblemUnit({ id: "pu-2", type: "unresolved_settlement_cluster", defect: "payment_to_settlement", account: "60" })
];
const output = composeCase("Проверь хвост по расчетам.", buildRetrieval(units));
const brokenSection = extractSection(output.assistant_reply, "Что найдено");
const bulletLines = brokenSection
.split(/\r?\n/g)
.map((line) => line.trim())
.filter((line) => line.startsWith("- "));
const normalized = bulletLines.map((line) => line.replace(/\s+/g, " ").trim().toLowerCase());
const dedupedCount = new Set(normalized).size;
expect(bulletLines.length).toBeGreaterThan(0);
expect(dedupedCount).toBe(bulletLines.length);
});
it("shows explicit limitation when period is missing", () => {
const units = [buildProblemUnit({ id: "pu-1", type: "lifecycle_anomaly_node", defect: "missing_expected_transition", account: "97", lifecycleDomain: "deferred_expense" })];
const output = composeCase("Проверь по 97 счету зависание списания.", buildRetrieval(units));
const limitationsSection = extractSection(output.assistant_reply, "Что пока не доказано");
expect(limitationsSection).toMatch(/период/i);
});
it("returns short accountant-readable answers for P0 domains without technical dump", () => {
const cases: Array<{
message: string;
retrieval: UnifiedRetrievalResult;
domainHint: RegExp;
}> = [
{
message: "Проверь хвосты по расчетам 60/62.",
retrieval: buildRetrieval([
buildProblemUnit({ id: "pu-60", type: "broken_chain_segment", defect: "failed_edge:payment_to_settlement", account: "60" })
]),
domainHint: /расчет|оплат|закрыти/i
},
{
message: "Проверь НДС-цепочку по документу.",
retrieval: buildRetrieval([
buildProblemUnit({ id: "pu-vat", type: "cross_branch_inconsistency_cluster", defect: "invoice_linked", account: "68", lifecycleDomain: "vat_flow" })
]),
domainHint: /ндс|регистр|книг/i
},
{
message: "Проверь закрытие месяца и затраты 20-44.",
retrieval: buildRetrieval([
buildProblemUnit({ id: "pu-close", type: "period_risk_cluster", defect: "close_operation_runs", account: "20", lifecycleDomain: "period_close" })
]),
domainHint: /закрыти|месяц|затрат/i
}
];
for (const testCase of cases) {
const output = composeCase(testCase.message, testCase.retrieval);
expect(output.assistant_reply).toMatch(testCase.domainHint);
expect(output.assistant_reply).toContain("Коротко:");
expect(output.assistant_reply).toContain("Что именно проверено:");
expect(output.assistant_reply).toContain("Что найдено:");
expect(output.assistant_reply).not.toContain("Почему это похоже на проблему:");
expect(output.assistant_reply).not.toContain("На чем это основано:");
expect(output.assistant_reply).toContain("Что пока не доказано:");
expect(output.assistant_reply).toContain("Что проверить первым:");
expect(output.assistant_reply.length).toBeLessThan(1800);
expect(output.assistant_reply).not.toMatch(/graph_|domain_scope|relation_patterns|semantic_profile|route|profile/i);
}
});
});