162 lines
5.2 KiB
TypeScript
162 lines
5.2 KiB
TypeScript
import { describe, expect, it } from "vitest";
|
||
import type { RouteHintSummary } from "../src/types/normalizer";
|
||
import type { UnifiedRetrievalResult } from "../src/types/assistant";
|
||
import {
|
||
ACCOUNTANT_SCORING_RUBRIC_V01,
|
||
INVESTIGATION_MAX_EVIDENCE_REFS,
|
||
INVESTIGATION_MAX_UNCERTAINTIES
|
||
} from "../src/types/stage1Contracts";
|
||
import { createEmptyInvestigationState, updateInvestigationState } from "../src/services/investigationState";
|
||
|
||
function buildRouteSummary(): RouteHintSummary {
|
||
return {
|
||
mode: "deterministic_v2",
|
||
message_in_scope: true,
|
||
scope_confidence: "high",
|
||
planner: {
|
||
total_fragments: 1,
|
||
in_scope_fragments: 1,
|
||
out_of_scope_fragments: 0,
|
||
discarded_fragments: 0,
|
||
contains_multiple_tasks: false
|
||
},
|
||
decisions: [
|
||
{
|
||
fragment_id: "F1",
|
||
domain_relevance: "in_scope",
|
||
business_scope: "company_specific_accounting",
|
||
candidate_labels: ["anomaly_probe"],
|
||
decision_flags: {
|
||
has_multi_entity_scope: false,
|
||
asks_for_chain_explanation: false,
|
||
asks_for_ranking_or_top: false,
|
||
asks_for_period_summary: false,
|
||
asks_for_rule_check: true,
|
||
asks_for_anomaly_scan: true,
|
||
asks_for_exact_object_trace: false,
|
||
asks_for_evidence: true,
|
||
mentions_period_close_context: false
|
||
},
|
||
route: "store_feature_risk",
|
||
reason: "test-route"
|
||
}
|
||
],
|
||
fallback: {
|
||
type: "none",
|
||
message: null
|
||
}
|
||
};
|
||
}
|
||
|
||
function buildRetrievalResult(evidenceCount: number): UnifiedRetrievalResult {
|
||
return {
|
||
fragment_id: "F1",
|
||
requirement_ids: ["R1"],
|
||
route: "store_feature_risk",
|
||
status: "ok",
|
||
result_type: "list",
|
||
items: [],
|
||
summary: {},
|
||
evidence: Array.from({ length: evidenceCount }, (_, index) => ({
|
||
evidence_id: `ev-${index + 1}`,
|
||
claim_ref: "requirement:R1",
|
||
source_type: "retrieval_item",
|
||
source_ref: {
|
||
schema_version: "evidence_source_ref_v1",
|
||
namespace: "snapshot_2020",
|
||
entity: "document",
|
||
id: `doc-${index + 1}`,
|
||
period: "2020-06",
|
||
canonical_ref: `evidence_source_ref_v1|snapshot_2020|document|doc-${index + 1}|2020-06`
|
||
},
|
||
pointer: {
|
||
fragment_id: "F1",
|
||
route: "store_feature_risk",
|
||
source: {
|
||
namespace: "snapshot_2020",
|
||
entity: "document",
|
||
id: `doc-${index + 1}`,
|
||
period: "2020-06"
|
||
},
|
||
locator: {
|
||
field_path: "risk_score",
|
||
item_index: index
|
||
}
|
||
},
|
||
evidence_kind: "anomaly_signal",
|
||
mechanism_note: "Risk signal",
|
||
confidence: "medium",
|
||
limitation: null,
|
||
payload: { risk_score: 2 }
|
||
})),
|
||
why_included: [],
|
||
selection_reason: [],
|
||
risk_factors: [],
|
||
business_interpretation: [],
|
||
confidence: "high",
|
||
limitations: ["Need period clarification"],
|
||
errors: []
|
||
};
|
||
}
|
||
|
||
describe("stage1 contract scaffolding", () => {
|
||
it("provides rubric v0.1 for accountant-facing metrics", () => {
|
||
const metricNames = Object.keys(ACCOUNTANT_SCORING_RUBRIC_V01);
|
||
expect(metricNames).toEqual([
|
||
"retrieval_differentiation_rate",
|
||
"generic_explanation_rate",
|
||
"accountant_actionability_score",
|
||
"false_confidence_rate",
|
||
"broad_answer_rate",
|
||
"mechanism_specificity_score",
|
||
"followup_context_retention_score",
|
||
"stage4_contract_compliance_rate"
|
||
]);
|
||
for (const metric of metricNames) {
|
||
const bands = ACCOUNTANT_SCORING_RUBRIC_V01[metric as keyof typeof ACCOUNTANT_SCORING_RUBRIC_V01];
|
||
expect(bands.some((item) => item.score === 0)).toBe(true);
|
||
expect(bands.some((item) => item.score === 5)).toBe(true);
|
||
}
|
||
});
|
||
|
||
it("updates investigation_state with bounded fields", () => {
|
||
const initial = createEmptyInvestigationState("asst-contract-test", "2026-03-25T10:00:00.000Z");
|
||
const updated = updateInvestigationState({
|
||
previous: initial,
|
||
timestamp: "2026-03-25T10:01:00.000Z",
|
||
questionId: "msg-1",
|
||
userMessage: "Prover schet 97 za 2020-06 i podsveti risk.",
|
||
routeSummary: buildRouteSummary(),
|
||
requirements: [
|
||
{
|
||
requirement_id: "R1",
|
||
source_fragment_id: "F1",
|
||
requirement_text: "Проверить счет 97",
|
||
subject_tokens: ["счет_97"],
|
||
status: "covered",
|
||
route: "store_feature_risk"
|
||
}
|
||
],
|
||
coverageReport: {
|
||
requirements_total: 1,
|
||
requirements_covered: 1,
|
||
requirements_uncovered: [],
|
||
requirements_partially_covered: [],
|
||
clarification_needed_for: [],
|
||
out_of_scope_requirements: []
|
||
},
|
||
retrievalResults: [buildRetrievalResult(40)],
|
||
replyType: "factual_with_explanation"
|
||
});
|
||
|
||
expect(updated.turn_index).toBe(1);
|
||
expect(updated.status).toBe("active");
|
||
expect(updated.focus.period).toBe("2020-06");
|
||
expect(updated.focus.primary_accounts).toContain("97");
|
||
expect(updated.evidence_refs.length).toBeLessThanOrEqual(INVESTIGATION_MAX_EVIDENCE_REFS);
|
||
expect(updated.open_uncertainties.length).toBeLessThanOrEqual(INVESTIGATION_MAX_UNCERTAINTIES);
|
||
expect(updated.query_mode_hint).toBe("direct_answer");
|
||
expect(updated.followup_context?.referenced_requirement_ids).toEqual(["R1"]);
|
||
});
|
||
});
|