NODEDC_1C/llm_normalizer/backend/tests/problemUnitAssembler.test.ts

191 lines
6.3 KiB
TypeScript

import { describe, expect, it } from "vitest";
import type { EvidenceItem } from "../src/types/stage1Contracts";
import type { ProblemUnit } from "../src/types/stage2ProblemUnits";
import {
assembleProblemUnits,
buildCandidateEvidence,
clusterCandidateEvidence,
collapseDuplicates,
detectProblemUnitType
} from "../src/services/problemUnitAssembler";
function buildEvidence(input: {
evidenceId: string;
sourceId: string;
payload?: Record<string, unknown>;
confidence?: "high" | "medium" | "low";
}): EvidenceItem {
const payload = input.payload ?? {};
return {
evidence_id: input.evidenceId,
claim_ref: "requirement:R1",
source_type: "retrieval_item",
source_ref: {
schema_version: "evidence_source_ref_v1",
namespace: "snapshot_2020",
entity: "Document",
id: input.sourceId,
period: "2020-06",
canonical_ref: `evidence_source_ref_v1|snapshot_2020|document|${input.sourceId.toLowerCase()}|2020-06`
},
pointer: {
fragment_id: "F1",
route: "hybrid_store_plus_live",
source: {
namespace: "snapshot_2020",
entity: "Document",
id: input.sourceId,
period: "2020-06"
},
locator: {
field_path: "risk_score",
item_index: 0
}
},
evidence_kind: "mechanism_link",
mechanism_note: null,
confidence: input.confidence ?? "medium",
limitation: null,
payload
};
}
describe("problemUnitAssembler scaffold", () => {
it("groups candidate evidence by route/source/pattern signature", () => {
const evidence = [
buildEvidence({
evidenceId: "ev-1",
sourceId: "DOC-1",
payload: {
failed_expected_edge: "statement_to_document"
}
}),
buildEvidence({
evidenceId: "ev-2",
sourceId: "DOC-1",
payload: {
failed_expected_edge: "statement_to_document"
}
}),
buildEvidence({
evidenceId: "ev-3",
sourceId: "DOC-2",
payload: {
anomaly_patterns: ["lifecycle_gap"]
}
})
];
const candidates = buildCandidateEvidence(evidence, "hybrid_store_plus_live");
expect(candidates[0].candidate_id).toBe("cand-ev-1");
expect(candidates[0].relation_pattern_hits).toContain("failed_edge:statement_to_document");
expect(candidates[0].entity_backlinks.length).toBeGreaterThan(0);
const clusters = clusterCandidateEvidence(candidates);
expect(clusters.length).toBe(2);
expect(clusters.some((item) => item.candidates.length === 2)).toBe(true);
});
it("detects baseline problem unit type from anomaly hints", () => {
const candidates = buildCandidateEvidence(
[
buildEvidence({
evidenceId: "ev-lifecycle",
sourceId: "DOC-LC",
payload: {
anomaly_patterns: ["lifecycle_gap"]
}
})
],
"store_feature_risk"
);
const cluster = clusterCandidateEvidence(candidates)[0];
expect(detectProblemUnitType(cluster)).toBe("lifecycle_anomaly_node");
});
it("collapses duplicate problem units by signature", () => {
const units: ProblemUnit[] = [
{
schema_version: "problem_unit_v0_1",
problem_unit_id: "pu-1",
problem_unit_type: "broken_chain_segment",
title: "broken",
mechanism_summary: "m1",
business_defect_class: "failed_edge:statement_to_document",
severity: { score: 0.7, grade: "high" },
confidence: { score: 0.6, grade: "medium" },
affected_entities: ["Document:DOC-1"],
affected_documents: ["Document:DOC-1"],
affected_postings: [],
affected_accounts: [],
affected_counterparties: [],
affected_contracts: [],
failed_expected_edge: "statement_to_document",
evidence_pack: ["cand-1"],
entity_backlinks: [{ entity: "Document", id: "DOC-1" }],
snapshot_limitations: []
},
{
schema_version: "problem_unit_v0_1",
problem_unit_id: "pu-2",
problem_unit_type: "broken_chain_segment",
title: "broken",
mechanism_summary: "m2",
business_defect_class: "failed_edge:statement_to_document",
severity: { score: 0.8, grade: "high" },
confidence: { score: 0.7, grade: "high" },
affected_entities: ["Document:DOC-1"],
affected_documents: ["Document:DOC-1"],
affected_postings: [],
affected_accounts: [],
affected_counterparties: [],
affected_contracts: [],
failed_expected_edge: "statement_to_document",
evidence_pack: ["cand-2"],
entity_backlinks: [{ entity: "Document", id: "DOC-1" }],
snapshot_limitations: []
}
];
const collapsed = collapseDuplicates(units);
expect(collapsed.duplicate_collapses).toBe(1);
expect(collapsed.problem_units.length).toBe(1);
expect(collapsed.problem_units[0].evidence_pack).toEqual(["cand-1", "cand-2"]);
expect(collapsed.problem_units[0].severity.score).toBe(0.8);
});
it("assembles problem units and summary with bounded scaffold fields", () => {
const assembled = assembleProblemUnits({
route: "hybrid_store_plus_live",
evidence: [
buildEvidence({
evidenceId: "ev-1",
sourceId: "DOC-1",
payload: {
failed_expected_edge: "statement_to_document",
anomaly_patterns: ["period_close_risk"]
},
confidence: "high"
}),
buildEvidence({
evidenceId: "ev-2",
sourceId: "DOC-2",
payload: {
anomaly_patterns: ["settlement_tail"]
},
confidence: "low"
})
]
});
expect(assembled.candidate_evidence.length).toBe(2);
expect(assembled.problem_units.length).toBeGreaterThan(0);
expect(assembled.problem_unit_summary.schema_version).toBe("problem_unit_summary_v0_1");
expect(assembled.problem_unit_summary.units_total).toBe(assembled.problem_units.length);
expect(Array.isArray(assembled.problem_unit_summary.unit_types)).toBe(true);
expect(typeof assembled.problem_unit_summary.severity_distribution.low).toBe("number");
expect(typeof assembled.problem_unit_summary.confidence_distribution.medium).toBe("number");
expect(typeof assembled.problem_unit_summary.duplicate_collapses).toBe("number");
});
});