NODEDC_1C/llm_normalizer/backend/tests/assistantContracts.test.ts

162 lines
5.2 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { describe, expect, it } from "vitest";
import type { RouteHintSummary } from "../src/types/normalizer";
import type { UnifiedRetrievalResult } from "../src/types/assistant";
import {
ACCOUNTANT_SCORING_RUBRIC_V01,
INVESTIGATION_MAX_EVIDENCE_REFS,
INVESTIGATION_MAX_UNCERTAINTIES
} from "../src/types/stage1Contracts";
import { createEmptyInvestigationState, updateInvestigationState } from "../src/services/investigationState";
function buildRouteSummary(): RouteHintSummary {
return {
mode: "deterministic_v2",
message_in_scope: true,
scope_confidence: "high",
planner: {
total_fragments: 1,
in_scope_fragments: 1,
out_of_scope_fragments: 0,
discarded_fragments: 0,
contains_multiple_tasks: false
},
decisions: [
{
fragment_id: "F1",
domain_relevance: "in_scope",
business_scope: "company_specific_accounting",
candidate_labels: ["anomaly_probe"],
decision_flags: {
has_multi_entity_scope: false,
asks_for_chain_explanation: false,
asks_for_ranking_or_top: false,
asks_for_period_summary: false,
asks_for_rule_check: true,
asks_for_anomaly_scan: true,
asks_for_exact_object_trace: false,
asks_for_evidence: true,
mentions_period_close_context: false
},
route: "store_feature_risk",
reason: "test-route"
}
],
fallback: {
type: "none",
message: null
}
};
}
function buildRetrievalResult(evidenceCount: number): UnifiedRetrievalResult {
return {
fragment_id: "F1",
requirement_ids: ["R1"],
route: "store_feature_risk",
status: "ok",
result_type: "list",
items: [],
summary: {},
evidence: Array.from({ length: evidenceCount }, (_, index) => ({
evidence_id: `ev-${index + 1}`,
claim_ref: "requirement:R1",
source_type: "retrieval_item",
source_ref: {
schema_version: "evidence_source_ref_v1",
namespace: "snapshot_2020",
entity: "document",
id: `doc-${index + 1}`,
period: "2020-06",
canonical_ref: `evidence_source_ref_v1|snapshot_2020|document|doc-${index + 1}|2020-06`
},
pointer: {
fragment_id: "F1",
route: "store_feature_risk",
source: {
namespace: "snapshot_2020",
entity: "document",
id: `doc-${index + 1}`,
period: "2020-06"
},
locator: {
field_path: "risk_score",
item_index: index
}
},
evidence_kind: "anomaly_signal",
mechanism_note: "Risk signal",
confidence: "medium",
limitation: null,
payload: { risk_score: 2 }
})),
why_included: [],
selection_reason: [],
risk_factors: [],
business_interpretation: [],
confidence: "high",
limitations: ["Need period clarification"],
errors: []
};
}
describe("stage1 contract scaffolding", () => {
it("provides rubric v0.1 for accountant-facing metrics", () => {
const metricNames = Object.keys(ACCOUNTANT_SCORING_RUBRIC_V01);
expect(metricNames).toEqual([
"retrieval_differentiation_rate",
"generic_explanation_rate",
"accountant_actionability_score",
"false_confidence_rate",
"broad_answer_rate",
"mechanism_specificity_score",
"followup_context_retention_score",
"stage4_contract_compliance_rate"
]);
for (const metric of metricNames) {
const bands = ACCOUNTANT_SCORING_RUBRIC_V01[metric as keyof typeof ACCOUNTANT_SCORING_RUBRIC_V01];
expect(bands.some((item) => item.score === 0)).toBe(true);
expect(bands.some((item) => item.score === 5)).toBe(true);
}
});
it("updates investigation_state with bounded fields", () => {
const initial = createEmptyInvestigationState("asst-contract-test", "2026-03-25T10:00:00.000Z");
const updated = updateInvestigationState({
previous: initial,
timestamp: "2026-03-25T10:01:00.000Z",
questionId: "msg-1",
userMessage: "Prover schet 97 za 2020-06 i podsveti risk.",
routeSummary: buildRouteSummary(),
requirements: [
{
requirement_id: "R1",
source_fragment_id: "F1",
requirement_text: "Проверить счет 97",
subject_tokens: ["счет_97"],
status: "covered",
route: "store_feature_risk"
}
],
coverageReport: {
requirements_total: 1,
requirements_covered: 1,
requirements_uncovered: [],
requirements_partially_covered: [],
clarification_needed_for: [],
out_of_scope_requirements: []
},
retrievalResults: [buildRetrievalResult(40)],
replyType: "factual_with_explanation"
});
expect(updated.turn_index).toBe(1);
expect(updated.status).toBe("active");
expect(updated.focus.period).toBe("2020-06");
expect(updated.focus.primary_accounts).toContain("97");
expect(updated.evidence_refs.length).toBeLessThanOrEqual(INVESTIGATION_MAX_EVIDENCE_REFS);
expect(updated.open_uncertainties.length).toBeLessThanOrEqual(INVESTIGATION_MAX_UNCERTAINTIES);
expect(updated.query_mode_hint).toBe("direct_answer");
expect(updated.followup_context?.referenced_requirement_ids).toEqual(["R1"]);
});
});