import fs from "fs"; import os from "os"; import path from "path"; import { afterEach, describe, expect, it, vi } from "vitest"; import { AssistantDataLayer } from "../src/services/assistantDataLayer"; import { toRouteHintSummary } from "../src/services/routeHintAdapter"; import type { NormalizedFragmentV2_0_2, NormalizedQueryV2_0_2 } from "../src/types/normalizer"; type DomainCardId = "settlements_60_62" | "vat_document_register_book" | "month_close_costs_20_44"; type DomainPrefix = "SET" | "VAT" | "CLS"; interface RegressionCase { case_id: string; domain: DomainCardId; expected_prefix: DomainPrefix; query: string; account_hint: string; candidate_label: "anomaly_probe" | "period_close_risk"; } interface SnapshotDataset { keyFields: Array>; problemCases: Array>; journals: Array>; ndsRegisters: Array>; docs: Array>; } const TEMP_DIRS: string[] = []; function cleanupTempDirs(): void { for (const dir of TEMP_DIRS.splice(0)) { fs.rmSync(dir, { recursive: true, force: true }); } } function buildRecord(input: { id: string; account: string; period: string; description: string; unknownLinks?: number; withCounterparty?: boolean; zeroGuid?: boolean; }): Record { const attributes: Record = { Recorder: `${input.id}-REC`, Period: input.period, Description: input.description, Account: input.account, "trace@navigationLinkUrl": `/trace/${input.id}` }; if (input.zeroGuid) { attributes.LinkGuid = "00000000-0000-0000-0000-000000000000"; } const links: Array> = [ { relation: "document_refers_to_document", target_entity: "Document", target_id: `${input.id}-DOC-LINK`, source_field: "Recorder" } ]; if (input.withCounterparty !== false) { links.push({ relation: "document_has_counterparty", target_entity: "Counterparty", target_id: `${input.id}-CP`, source_field: "Counterparty" }); } return { source_entity: "Document", source_id: input.id, display_name: input.id, unknown_link_count: input.unknownLinks ?? 1, problem_flags: ["risk_marker"], attributes, links }; } function createDataset(): SnapshotDataset { const settlements = [ buildRecord({ id: "SET-PC-1", account: "60", period: "2020-06-10T00:00:00", description: "supplier payment recorded but settlement chain is still open account 60" }), buildRecord({ id: "SET-PC-2", account: "62", period: "2020-06-11T00:00:00", description: "customer settlement tail payment to settlement relation broken account 62" }), buildRecord({ id: "SET-DOC-1", account: "60", period: "2020-06-20T00:00:00", description: "bank statement linked to settlement document payment chain account 60" }), buildRecord({ id: "SET-DOC-2", account: "62", period: "2020-06-21T00:00:00", description: "customer payment linked to settlement closure account 62" }), buildRecord({ id: "SET-KF-1", account: "60", period: "2020-06-22T00:00:00", description: "settlement key field record account 60 payment" }) ]; const vat = [ buildRecord({ id: "VAT-PC-1", account: "68", period: "2020-06-12T00:00:00", description: "vat invoice linked to register and purchase book account 68" }), buildRecord({ id: "VAT-PC-2", account: "19", period: "2020-06-13T00:00:00", description: "vat source document present but invoice to vat link is broken account 19" }), buildRecord({ id: "VAT-NDS-1", account: "68", period: "2020-06-23T00:00:00", description: "vat register entry book generation deduction posted" }), buildRecord({ id: "VAT-NDS-2", account: "19", period: "2020-06-24T00:00:00", description: "invoice to vat register chain for deduction account 19" }), buildRecord({ id: "VAT-KF-1", account: "68", period: "2020-06-25T00:00:00", description: "vat key field invoice register linkage account 68" }) ]; const close = [ buildRecord({ id: "CLS-PC-1", account: "20", period: "2020-06-14T00:00:00", description: "period close costs accumulated but allocation rules unresolved account 20" }), buildRecord({ id: "CLS-PC-2", account: "44", period: "2020-06-15T00:00:00", description: "month close operation runs with residuals not zero account 44" }), buildRecord({ id: "CLS-DOC-1", account: "20", period: "2020-06-26T00:00:00", description: "period close costs allocation writeoff account 20" }), buildRecord({ id: "CLS-DOC-2", account: "44", period: "2020-06-27T00:00:00", description: "month close residuals explained allocation account 44" }), buildRecord({ id: "CLS-KF-1", account: "20", period: "2020-06-28T00:00:00", description: "period close key field account 20 allocation" }) ]; const mixed = [ buildRecord({ id: "MIX-PC-1", account: "68", period: "2020-12-31T00:00:00", description: "bank settlement vat mixed conflict record", zeroGuid: true }), buildRecord({ id: "MIX-NDS-1", account: "60", period: "2020-12-30T00:00:00", description: "mixed nds and settlement overlap record", zeroGuid: true }), buildRecord({ id: "MIX-DOC-1", account: "68", period: "2020-12-29T00:00:00", description: "mixed document with vat settlement and bank signals", zeroGuid: true }), buildRecord({ id: "MIX-KF-1", account: "44", period: "2020-12-28T00:00:00", description: "mixed key field with period close and vat overlap", zeroGuid: true }) ]; return { keyFields: [settlements[4], vat[4], close[4], mixed[3]], problemCases: [mixed[0], vat[0], settlements[0], close[0], settlements[1], vat[1], close[1]], journals: [close[2], close[3], settlements[3]], ndsRegisters: [mixed[1], vat[2], vat[3]], docs: [mixed[2], settlements[2], settlements[3], vat[2], vat[3], close[2], close[3]] }; } function createSnapshotRoot(dataset: SnapshotDataset): string { const root = fs.mkdtempSync(path.join(os.tmpdir(), "assistant-wave5-regression-")); TEMP_DIRS.push(root); const write = (fileName: string, records: Array>) => { fs.writeFileSync(path.resolve(root, fileName), JSON.stringify({ records }, null, 2), "utf-8"); }; write("09_samples_key_fields_Recorder_Ref_Supplier_Buyer_Responsible.json", dataset.keyFields); write("03_snapshot_fragment_problem_cases.json", dataset.problemCases); write("07_samples_DocumentJournals.json", dataset.journals); write("08_samples_NDS_registers.json", dataset.ndsRegisters); write("04_samples_SpisanieSRaschetnogoScheta.json", dataset.docs); write("05_samples_RealizaciyaTovarovUslug.json", []); write("06_samples_PostuplenieTovarovUslug.json", []); return root; } function resolvePrefixFromId(sourceId: string): DomainPrefix | "OTHER" { if (sourceId.startsWith("SET")) return "SET"; if (sourceId.startsWith("VAT")) return "VAT"; if (sourceId.startsWith("CLS")) return "CLS"; return "OTHER"; } function extractIds(items: Array>): string[] { return items.map((item) => String(item.source_id ?? "")).filter(Boolean); } function hasForeignDomainInTop3(ids: string[], expected: DomainPrefix): boolean { return ids.slice(0, 3).some((id) => resolvePrefixFromId(id) !== expected); } function top1IsRelevant(ids: string[], expected: DomainPrefix): boolean { if (ids.length === 0) { return false; } return resolvePrefixFromId(ids[0]) === expected; } function legacyRiskScore(record: Record): number { const unknown = Number(record.unknown_link_count ?? 0); const attributes = (record.attributes as Record) ?? {}; const links = Array.isArray(record.links) ? (record.links as Array>) : []; let zeroGuid = 0; for (const value of Object.values(attributes)) { if (String(value) === "00000000-0000-0000-0000-000000000000") { zeroGuid += 1; } } let navigationLinks = 0; for (const key of Object.keys(attributes)) { if (key.includes("@navigationLinkUrl")) { navigationLinks += 1; } } const cpLinks = links.filter((link) => String(link.target_entity ?? "") === "Counterparty").length; const flags = Array.isArray(record.problem_flags) ? record.problem_flags : []; let score = 0; if (unknown > 0) score += 3; if (zeroGuid > 0) score += Math.min(3, 1 + zeroGuid); if (navigationLinks > 0) score += 1; if (cpLinks === 0) score += 1; if (flags.length > 0) score += 1; return score; } function legacyRiskTopIds(dataset: SnapshotDataset): string[] { return [...dataset.problemCases, ...dataset.ndsRegisters] .map((record) => ({ id: String(record.source_id ?? ""), score: legacyRiskScore(record) })) .filter((item) => item.score >= 2) .sort((left, right) => { if (right.score !== left.score) { return right.score - left.score; } return left.id.localeCompare(right.id); }) .slice(0, 15) .map((item) => item.id); } function legacyCanonicalTopIds(query: string, dataset: SnapshotDataset): string[] { const lower = query.toLowerCase(); const useVatSource = /\bvat\b|\bnds\b|\b19\b|\b68\b|ндс/i.test(lower); const source = useVatSource ? [...dataset.ndsRegisters, ...dataset.keyFields] : dataset.docs; return source .map((record) => ({ id: String(record.source_id ?? ""), sort: Date.parse(String(((record.attributes as Record)?.Period ?? "") || "")) || 0 })) .sort((left, right) => right.sort - left.sort) .slice(0, 12) .map((item) => item.id); } function buildNormalizedCase(testCase: RegressionCase): NormalizedQueryV2_0_2 { const fragment: NormalizedFragmentV2_0_2 = { fragment_id: "F1", raw_fragment_text: testCase.query, normalized_fragment_text: testCase.query, domain_relevance: "in_scope", business_scope: "company_specific_accounting", entity_hints: ["document"], account_hints: [testCase.account_hint], document_hints: [], register_hints: [], time_scope: { type: "missing", value: null, confidence: "low" }, flags: { has_multi_entity_scope: false, asks_for_chain_explanation: false, asks_for_ranking_or_top: false, asks_for_period_summary: false, asks_for_rule_check: false, asks_for_anomaly_scan: false, asks_for_exact_object_trace: false, asks_for_evidence: false, mentions_period_close_context: false }, candidate_labels: [testCase.candidate_label], confidence: "high", execution_readiness: "executable", clarification_reason: null, soft_assumption_used: [], route_status: "routed", no_route_reason: null }; return { schema_version: "normalized_query_v2_0_2", user_message_raw: testCase.query, message_in_scope: true, scope_confidence: "high", contains_multiple_tasks: false, fragments: [fragment], discarded_fragments: [], global_notes: { needs_clarification: false, clarification_reason: null } }; } function legacyRouteForFragment(fragment: NormalizedFragmentV2_0_2): string { const accountHints = fragment.account_hints.map((item) => String(item)); const hasLifecycleDomainHint = accountHints.some((item) => /^(97|01|02|08|19|68(?:\.\d+)?|51|60|62)$/.test(item)) || fragment.candidate_labels.includes("anomaly_probe") || fragment.candidate_labels.includes("period_close_risk"); if (fragment.flags.asks_for_exact_object_trace) return "live_mcp_drilldown"; if (fragment.flags.asks_for_ranking_or_top || fragment.flags.asks_for_period_summary) return "batch_refresh_then_store"; if (fragment.flags.asks_for_chain_explanation && (fragment.flags.has_multi_entity_scope || hasLifecycleDomainHint)) { return "hybrid_store_plus_live"; } if (fragment.flags.asks_for_rule_check && !fragment.flags.asks_for_chain_explanation) return "store_feature_risk"; if ( fragment.flags.asks_for_anomaly_scan && !fragment.flags.asks_for_ranking_or_top && !(fragment.flags.has_multi_entity_scope && fragment.flags.asks_for_chain_explanation) ) { return "store_feature_risk"; } return "store_canonical"; } const SETTLEMENT_QUERIES = [ "Show why payment recorded but settlement for account 60 is still open.", "Account 62: payment posted, settlement closure is missing.", "Find settlement tails for account 60 where payment did not close chain.", "Bank and settlements 60/62: where link to settlement is broken.", "Why does account 60 keep open settlement after payment record.", "Account 62 settlement problem: payment done, closure not reached.", "Detect symptom where payment exists but settlement remains open on 60.", "Find lifecycle gap in payment to settlement for account 62.", "60-62 settlement chain has residual tail after payment.", "Locate unresolved settlement after bank payment on account 60." ]; const VAT_QUERIES = [ "VAT check: source document exists but invoice link is missing on account 68.", "Account 19 VAT chain: document to register to book is broken.", "Find VAT symptom where invoice linked but book entry was not generated.", "Show VAT lifecycle gaps for account 68 in document-register-book flow.", "VAT deduction issue on 19: source document present but deduction not posted.", "Find broken invoice to VAT register relation for account 68.", "VAT problem-first: document exists, register is present, book entry missing.", "Locate VAT residual issue where deduction chain is incomplete on 19.", "VAT 68: invoice and register mismatch in purchase/sales book.", "Detect VAT symptom with broken doc-register-book chain for account 68." ]; const CLOSE_QUERIES = [ "Month close: costs on accounts 20 and 44 are not allocated, residuals remain.", "Period close problem for 20/44: allocation rules unresolved.", "Find close lifecycle gap where costs accumulated but close operation fails 20 44.", "Account 20 and 44 month close symptom: residuals are not zero.", "Show period close issue when costs are accumulated but not distributed 20/44.", "Close operation run for 20 and 44 leaves unexplained residuals.", "Detect month close break in costs allocation chain on 20/44.", "Period close 20-44: allocation exists but residual tail remains.", "Find cost close mismatch: costs accumulated, close not completed 20 and 44.", "Month close domain check for accounts 20 and 44 with unresolved residuals." ]; const REGRESSION_CASES: RegressionCase[] = [ ...SETTLEMENT_QUERIES.map((query, index) => ({ case_id: `SET-${String(index + 1).padStart(2, "0")}`, domain: "settlements_60_62" as const, expected_prefix: "SET" as const, query, account_hint: index % 2 === 0 ? "60" : "62", candidate_label: "anomaly_probe" as const })), ...VAT_QUERIES.map((query, index) => ({ case_id: `VAT-${String(index + 1).padStart(2, "0")}`, domain: "vat_document_register_book" as const, expected_prefix: "VAT" as const, query, account_hint: index % 2 === 0 ? "68" : "19", candidate_label: "anomaly_probe" as const })), ...CLOSE_QUERIES.map((query, index) => ({ case_id: `CLS-${String(index + 1).padStart(2, "0")}`, domain: "month_close_costs_20_44" as const, expected_prefix: "CLS" as const, query, account_hint: index % 2 === 0 ? "20" : "44", candidate_label: "period_close_risk" as const })) ]; describe.sequential("stage4 wave5 P0 domain purity + route discipline regression", () => { afterEach(() => { cleanupTempDirs(); vi.resetModules(); }); it("keeps top-3 domain-pure and reroutes symptom/lifecycle intents away from canonical path", () => { const dataset = createDataset(); const root = createSnapshotRoot(dataset); const dataLayer = new AssistantDataLayer(root); const metrics = { route: { before_canonical: 0, after_canonical: 0, after_hybrid: 0 }, risk: { before_foreign_top3: 0, after_foreign_top3: 0, before_top1_relevant: 0, after_top1_relevant: 0 }, canonical: { before_foreign_top3: 0, after_foreign_top3: 0, before_top1_relevant: 0, after_top1_relevant: 0 } }; for (const testCase of REGRESSION_CASES) { const normalized = buildNormalizedCase(testCase); const summary = toRouteHintSummary(normalized); expect(summary.mode).toBe("deterministic_v2"); if (summary.mode !== "deterministic_v2") { throw new Error("Expected deterministic_v2 route summary"); } const afterRoute = summary.decisions[0]?.route; const beforeRoute = legacyRouteForFragment(normalized.fragments[0]); if (beforeRoute === "store_canonical") { metrics.route.before_canonical += 1; } if (afterRoute === "store_canonical") { metrics.route.after_canonical += 1; } if (afterRoute === "hybrid_store_plus_live") { metrics.route.after_hybrid += 1; } expect(afterRoute).toBe("hybrid_store_plus_live"); const afterRisk = dataLayer.executeRoute("store_feature_risk", testCase.query); const afterRiskIds = extractIds(afterRisk.items as Array>); if (hasForeignDomainInTop3(afterRiskIds, testCase.expected_prefix)) { metrics.risk.after_foreign_top3 += 1; } if (top1IsRelevant(afterRiskIds, testCase.expected_prefix)) { metrics.risk.after_top1_relevant += 1; } const afterCanonical = dataLayer.executeRoute("store_canonical", testCase.query); const afterCanonicalIds = extractIds(afterCanonical.items as Array>); if (hasForeignDomainInTop3(afterCanonicalIds, testCase.expected_prefix)) { metrics.canonical.after_foreign_top3 += 1; } if (top1IsRelevant(afterCanonicalIds, testCase.expected_prefix)) { metrics.canonical.after_top1_relevant += 1; } const beforeRiskIds = legacyRiskTopIds(dataset); if (hasForeignDomainInTop3(beforeRiskIds, testCase.expected_prefix)) { metrics.risk.before_foreign_top3 += 1; } if (top1IsRelevant(beforeRiskIds, testCase.expected_prefix)) { metrics.risk.before_top1_relevant += 1; } const beforeCanonicalIds = legacyCanonicalTopIds(testCase.query, dataset); if (hasForeignDomainInTop3(beforeCanonicalIds, testCase.expected_prefix)) { metrics.canonical.before_foreign_top3 += 1; } if (top1IsRelevant(beforeCanonicalIds, testCase.expected_prefix)) { metrics.canonical.before_top1_relevant += 1; } } expect(REGRESSION_CASES.length).toBe(30); expect(metrics.route.before_canonical).toBeGreaterThan(0); expect(metrics.route.after_canonical).toBe(0); expect(metrics.route.after_hybrid).toBe(REGRESSION_CASES.length); expect(metrics.risk.before_foreign_top3).toBeGreaterThan(metrics.risk.after_foreign_top3); expect(metrics.risk.after_foreign_top3).toBe(0); expect(metrics.risk.after_top1_relevant).toBe(REGRESSION_CASES.length); expect(metrics.canonical.before_foreign_top3).toBeGreaterThan(metrics.canonical.after_foreign_top3); expect(metrics.canonical.after_foreign_top3).toBe(0); expect(metrics.canonical.after_top1_relevant).toBe(REGRESSION_CASES.length); }); });