NODEDC_1C/llm_normalizer/backend/tests/assistantWave6ProblemFirstA...

320 lines
12 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { describe, expect, it } from "vitest";
import { composeAssistantAnswer } from "../src/services/answerComposer";
import type { AnswerGroundingCheck, RequirementCoverageReport, UnifiedRetrievalResult } from "../src/types/assistant";
import type { ProblemUnit } from "../src/types/stage2ProblemUnits";
function buildRouteSummary() {
return {
mode: "deterministic_v2" as const,
message_in_scope: true,
scope_confidence: "high" as const,
planner: {
total_fragments: 1,
in_scope_fragments: 1,
out_of_scope_fragments: 0,
discarded_fragments: 0,
contains_multiple_tasks: false
},
decisions: [],
fallback: {
type: "none" as const,
message: null
}
};
}
function buildCoverage(partial = true): RequirementCoverageReport {
return {
requirements_total: 1,
requirements_covered: partial ? 0 : 1,
requirements_uncovered: partial ? ["R1"] : [],
requirements_partially_covered: partial ? ["R1"] : [],
clarification_needed_for: [],
out_of_scope_requirements: []
};
}
function buildGrounding(status: AnswerGroundingCheck["status"] = "partial"): AnswerGroundingCheck {
return {
status,
route_subject_match: true,
missing_requirements: status === "partial" ? ["R1"] : [],
reasons: status === "partial" ? ["Coverage is partial for problem-first answer contract."] : [],
why_included_summary: [],
selection_reason_summary: []
};
}
function buildProblemUnit(input: {
id: string;
type: ProblemUnit["problem_unit_type"];
defect: string;
account: string;
lifecycleDomain?: ProblemUnit["lifecycle_domain"];
}): ProblemUnit {
return {
schema_version: "problem_unit_v0_1",
problem_unit_id: input.id,
problem_unit_type: input.type,
title: "Problem unit",
mechanism_summary: `Mechanism candidate: ${input.defect}.`,
business_defect_class: input.defect,
severity: {
score: 0.76,
grade: "high"
},
confidence: {
score: 0.52,
grade: "medium"
},
affected_entities: ["Document:DOC-1", "Posting:POST-1"],
affected_documents: ["Document:DOC-1"],
affected_postings: ["Posting:POST-1"],
affected_accounts: [input.account],
affected_counterparties: ["Counterparty:CP-1"],
affected_contracts: ["Contract:CTR-1"],
failed_expected_edge: input.defect,
period_impact: {
is_period_sensitive: true,
impact_class: "close_risk"
},
evidence_pack: ["cand-1"],
entity_backlinks: [{ entity: "Document", id: "DOC-1" }],
snapshot_limitations: [],
...(input.lifecycleDomain
? {
lifecycle_domain: input.lifecycleDomain
}
: {})
};
}
function buildRetrieval(units: ProblemUnit[], extras?: Partial<UnifiedRetrievalResult>): UnifiedRetrievalResult {
return {
fragment_id: "F1",
requirement_ids: ["R1"],
route: "hybrid_store_plus_live",
status: "ok",
result_type: "chain",
items: [
{
source_entity: "Document",
source_id: "DOC-1",
counterparty_id: "CP-1"
}
],
summary: {
broad_query_detected: true,
broad_result_flag: true,
minimum_evidence_failed: false,
degraded_to: "partial",
narrowing_strength: "weak",
semantic_profile: {
domain_scope: ["bank_settlement"],
account_scope: ["60"],
relation_patterns: ["payment_to_settlement"]
}
},
evidence: [
{
evidence_id: "ev-1",
claim_ref: "requirement:R1",
source_type: "retrieval_item",
source_ref: {
schema_version: "evidence_source_ref_v1",
namespace: "snapshot_2020",
entity: "document",
id: "DOC-1",
period: "2020-06",
canonical_ref: "evidence_source_ref_v1|snapshot_2020|document|doc-1|2020-06"
},
pointer: {
fragment_id: "F1",
route: "hybrid_store_plus_live",
source: {
namespace: "snapshot_2020",
entity: "document",
id: "DOC-1",
period: "2020-06"
},
locator: {
field_path: "risk_score",
item_index: 0
}
},
evidence_kind: "mechanism_link",
mechanism_note: "failed_edge:payment_to_settlement",
confidence: "medium",
limitation: {
reason_code: "weak_source_mapping",
note: null
},
payload: {
risk_score: 4
}
}
],
problem_units: units,
problem_unit_summary: {
schema_version: "problem_unit_summary_v0_1",
units_total: units.length,
duplicate_collapses: 0,
unit_types: units.map((unit) => unit.problem_unit_type),
type_distribution: {
[units[0]?.problem_unit_type ?? "broken_chain_segment"]: units.length
},
severity_distribution: {
low: 0,
medium: 0,
high: units.length
},
confidence_distribution: {
low: 0,
medium: units.length,
high: 0
},
primary_unit_type: units[0]?.problem_unit_type ?? null
},
why_included: ["semantic retrieval profile", "route=hybrid_store_plus_live"],
selection_reason: ["domain_scope + relation_patterns + route profile"],
risk_factors: ["broken_chain", "closure_risk"],
business_interpretation: ["problem-first signal"],
confidence: "medium",
limitations: ["Evidence is snapshot-only and may lag source-of-record."],
errors: [],
...extras
};
}
function composeCase(userMessage: string, retrieval: UnifiedRetrievalResult) {
return composeAssistantAnswer({
userMessage,
routeSummary: buildRouteSummary(),
retrievalResults: [retrieval],
requirements: [
{
requirement_id: "R1",
source_fragment_id: "F1",
requirement_text: "Проверить проблемный механизм",
subject_tokens: ["chain"],
status: "covered",
route: "hybrid_store_plus_live"
}
],
coverageReport: buildCoverage(true),
groundingCheck: buildGrounding("partial"),
enableAnswerPolicyV11: true,
enableProblemCentricAnswerV1: true,
enableLifecycleAnswerV1: true
});
}
function extractSection(text: string, title: string): string {
const escaped = title.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
const stopTitles = [
"Коротко",
"Что сломано",
"Почему это похоже на проблему",
"На чем это основано",
"Что проверить первым",
"Ограничения"
];
const stopPattern = stopTitles.map((item) => item.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")).join("|");
const re = new RegExp(`${escaped}:([\\s\\S]*?)(?=(?:${stopPattern}):|$)`, "i");
const match = String(text ?? "").match(re);
return match?.[1]?.trim() ?? "";
}
describe("assistant wave6 problem-first answer contract", () => {
it("enforces leakage guard in direct user-facing answer", () => {
const units = [buildProblemUnit({ id: "pu-1", type: "broken_chain_segment", defect: "failed_edge:payment_to_settlement", account: "60" })];
const output = composeCase("Покажи проблему по расчетам.", buildRetrieval(units));
expect(output.assistant_reply).not.toMatch(
/graph_|domain_scope|relation_patterns|route|profile|hybrid_store_plus_live|store_canonical|semantic_profile|lifecycle_defect_type/i
);
});
it("keeps narrative mechanism-first and avoids entity-list direct answer", () => {
const units = [buildProblemUnit({ id: "pu-1", type: "broken_chain_segment", defect: "failed_edge:payment_to_settlement", account: "60" })];
const output = composeCase("Проверь по 60 счету, где разрыв.", buildRetrieval(units));
const brokenSection = extractSection(output.assistant_reply, "Что сломано");
expect(brokenSection).toMatch(/не подтвержден|разрыв|зависл|закрыти/i);
expect(brokenSection).not.toMatch(/^\s*-\s*(Document|Record|Entity)\b/i);
});
it("does not expose route/profile explanation in user-facing text", () => {
const units = [buildProblemUnit({ id: "pu-1", type: "document_conflict", defect: "posting_mismatch", account: "60" })];
const output = composeCase("Где конфликт документа и проводки?", buildRetrieval(units));
expect(output.assistant_reply).not.toMatch(/route|profile|semantic|domain_scope|relation_patterns|typed_domain_path/i);
});
it("collapses duplicate problem lines for the same mechanism", () => {
const units = [
buildProblemUnit({ id: "pu-1", type: "broken_chain_segment", defect: "failed_edge:payment_to_settlement", account: "60" }),
buildProblemUnit({ id: "pu-2", type: "unresolved_settlement_cluster", defect: "payment_to_settlement", account: "60" })
];
const output = composeCase("Проверь хвост по расчетам.", buildRetrieval(units));
const brokenSection = extractSection(output.assistant_reply, "Что сломано");
const bulletLines = brokenSection
.split(/\r?\n/g)
.map((line) => line.trim())
.filter((line) => line.startsWith("- "));
expect(bulletLines.length).toBe(1);
});
it("shows explicit limitation when period is missing", () => {
const units = [buildProblemUnit({ id: "pu-1", type: "lifecycle_anomaly_node", defect: "missing_expected_transition", account: "97", lifecycleDomain: "deferred_expense" })];
const output = composeCase("Проверь по 97 счету зависание списания.", buildRetrieval(units));
const limitationsSection = extractSection(output.assistant_reply, "Ограничения");
expect(limitationsSection).toMatch(/период/i);
});
it("returns short accountant-readable answers for P0 domains without technical dump", () => {
const cases: Array<{
message: string;
retrieval: UnifiedRetrievalResult;
domainHint: RegExp;
}> = [
{
message: "Проверь хвосты по расчетам 60/62.",
retrieval: buildRetrieval([
buildProblemUnit({ id: "pu-60", type: "broken_chain_segment", defect: "failed_edge:payment_to_settlement", account: "60" })
]),
domainHint: /расчет|оплат|закрыти/i
},
{
message: "Проверь НДС-цепочку по документу.",
retrieval: buildRetrieval([
buildProblemUnit({ id: "pu-vat", type: "cross_branch_inconsistency_cluster", defect: "invoice_linked", account: "68", lifecycleDomain: "vat_flow" })
]),
domainHint: /ндс|регистр|книг/i
},
{
message: "Проверь закрытие месяца и затраты 20-44.",
retrieval: buildRetrieval([
buildProblemUnit({ id: "pu-close", type: "period_risk_cluster", defect: "close_operation_runs", account: "20", lifecycleDomain: "period_close" })
]),
domainHint: /закрыти|месяц|затрат/i
}
];
for (const testCase of cases) {
const output = composeCase(testCase.message, testCase.retrieval);
expect(output.assistant_reply).toMatch(testCase.domainHint);
expect(output.assistant_reply).toContain("Коротко:");
expect(output.assistant_reply).toContain("Что сломано:");
expect(output.assistant_reply).toContain("Почему это похоже на проблему:");
expect(output.assistant_reply).toContain("На чем это основано:");
expect(output.assistant_reply).toContain("Что проверить первым:");
expect(output.assistant_reply).toContain("Ограничения:");
expect(output.assistant_reply.length).toBeLessThan(1800);
expect(output.assistant_reply).not.toMatch(/graph_|domain_scope|relation_patterns|semantic_profile|route|profile/i);
}
});
});