NODEDC_1C/llm_normalizer/backend/tests/problemUnitAssembler.test.ts

import { describe, expect, it } from "vitest";
import type { EvidenceItem } from "../src/types/stage1Contracts";
import type { ProblemUnit } from "../src/types/stage2ProblemUnits";
import {
  assembleProblemUnits,
  buildCandidateEvidence,
  clusterCandidateEvidence,
  collapseDuplicates,
  detectProblemUnitType
} from "../src/services/problemUnitAssembler";

function buildEvidence(input: {
  evidenceId: string;
  sourceId: string;
  payload?: Record<string, unknown>;
  confidence?: "high" | "medium" | "low";
}): EvidenceItem {
  const payload = input.payload ?? {};
  return {
    evidence_id: input.evidenceId,
    claim_ref: "requirement:R1",
    source_type: "retrieval_item",
    source_ref: {
      schema_version: "evidence_source_ref_v1",
      namespace: "snapshot_2020",
      entity: "Document",
      id: input.sourceId,
      period: "2020-06",
      canonical_ref: `evidence_source_ref_v1|snapshot_2020|document|${input.sourceId.toLowerCase()}|2020-06`
    },
    pointer: {
      fragment_id: "F1",
      route: "hybrid_store_plus_live",
      source: {
        namespace: "snapshot_2020",
        entity: "Document",
        id: input.sourceId,
        period: "2020-06"
      },
      locator: {
        field_path: "risk_score",
        item_index: 0
      }
    },
    evidence_kind: "mechanism_link",
    mechanism_note: null,
    confidence: input.confidence ?? "medium",
    limitation: null,
    payload
  };
}

describe("problemUnitAssembler scaffold", () => {
  it("groups candidate evidence by route/source/pattern signature", () => {
    const evidence = [
      buildEvidence({
        evidenceId: "ev-1",
        sourceId: "DOC-1",
        payload: {
          failed_expected_edge: "statement_to_document"
        }
      }),
      buildEvidence({
        evidenceId: "ev-2",
        sourceId: "DOC-1",
        payload: {
          failed_expected_edge: "statement_to_document"
        }
      }),
      buildEvidence({
        evidenceId: "ev-3",
        sourceId: "DOC-2",
        payload: {
          anomaly_patterns: ["lifecycle_gap"]
        }
      })
    ];

    const candidates = buildCandidateEvidence(evidence, "hybrid_store_plus_live");
    expect(candidates[0].candidate_id).toBe("cand-ev-1");
    expect(candidates[0].relation_pattern_hits).toContain("failed_edge:statement_to_document");
    expect(candidates[0].entity_backlinks.length).toBeGreaterThan(0);

    const clusters = clusterCandidateEvidence(candidates);
    expect(clusters.length).toBe(2);
    expect(clusters.some((item) => item.candidates.length === 2)).toBe(true);
  });

  it("detects baseline problem unit type from anomaly hints", () => {
    const candidates = buildCandidateEvidence(
      [
        buildEvidence({
          evidenceId: "ev-lifecycle",
          sourceId: "DOC-LC",
          payload: {
            anomaly_patterns: ["lifecycle_gap"]
          }
        })
      ],
      "store_feature_risk"
    );
    const cluster = clusterCandidateEvidence(candidates)[0];
    expect(detectProblemUnitType(cluster)).toBe("lifecycle_anomaly_node");
  });

  it("collapses duplicate problem units by signature", () => {
    const units: ProblemUnit[] = [
      {
        schema_version: "problem_unit_v0_1",
        problem_unit_id: "pu-1",
        problem_unit_type: "broken_chain_segment",
        title: "broken",
        mechanism_summary: "m1",
        business_defect_class: "failed_edge:statement_to_document",
        severity: { score: 0.7, grade: "high" },
        confidence: { score: 0.6, grade: "medium" },
        affected_entities: ["Document:DOC-1"],
        affected_documents: ["Document:DOC-1"],
        affected_postings: [],
        affected_accounts: [],
        affected_counterparties: [],
        affected_contracts: [],
        failed_expected_edge: "statement_to_document",
        evidence_pack: ["cand-1"],
        entity_backlinks: [{ entity: "Document", id: "DOC-1" }],
        snapshot_limitations: []
      },
      {
        schema_version: "problem_unit_v0_1",
        problem_unit_id: "pu-2",
        problem_unit_type: "broken_chain_segment",
        title: "broken",
        mechanism_summary: "m2",
        business_defect_class: "failed_edge:statement_to_document",
        severity: { score: 0.8, grade: "high" },
        confidence: { score: 0.7, grade: "high" },
        affected_entities: ["Document:DOC-1"],
        affected_documents: ["Document:DOC-1"],
        affected_postings: [],
        affected_accounts: [],
        affected_counterparties: [],
        affected_contracts: [],
        failed_expected_edge: "statement_to_document",
        evidence_pack: ["cand-2"],
        entity_backlinks: [{ entity: "Document", id: "DOC-1" }],
        snapshot_limitations: []
      }
    ];

    const collapsed = collapseDuplicates(units);
    expect(collapsed.duplicate_collapses).toBe(1);
    expect(collapsed.problem_units.length).toBe(1);
    expect(collapsed.problem_units[0].evidence_pack).toEqual(["cand-1", "cand-2"]);
    expect(collapsed.problem_units[0].severity.score).toBe(0.8);
  });

  it("assembles problem units and summary with bounded scaffold fields", () => {
    const assembled = assembleProblemUnits({
      route: "hybrid_store_plus_live",
      evidence: [
        buildEvidence({
          evidenceId: "ev-1",
          sourceId: "DOC-1",
          payload: {
            failed_expected_edge: "statement_to_document",
            anomaly_patterns: ["period_close_risk"]
          },
          confidence: "high"
        }),
        buildEvidence({
          evidenceId: "ev-2",
          sourceId: "DOC-2",
          payload: {
            anomaly_patterns: ["settlement_tail"]
          },
          confidence: "low"
        })
      ]
    });

    expect(assembled.candidate_evidence.length).toBe(2);
    expect(assembled.problem_units.length).toBeGreaterThan(0);
    expect(assembled.problem_unit_summary.schema_version).toBe("problem_unit_summary_v0_1");
    expect(assembled.problem_unit_summary.units_total).toBe(assembled.problem_units.length);
    expect(Array.isArray(assembled.problem_unit_summary.unit_types)).toBe(true);
    expect(typeof assembled.problem_unit_summary.severity_distribution.low).toBe("number");
    expect(typeof assembled.problem_unit_summary.confidence_distribution.medium).toBe("number");
    expect(typeof assembled.problem_unit_summary.duplicate_collapses).toBe("number");
  });
});