305 lines
9.5 KiB
TypeScript
305 lines
9.5 KiB
TypeScript
import { describe, expect, it } from "vitest";
|
|
import type { AnswerGroundingCheck, RequirementCoverageReport, UnifiedRetrievalResult } from "../src/types/assistant";
|
|
import type { RouteHintSummary } from "../src/types/normalizer";
|
|
import {
|
|
buildAssistantCoverageContractV1,
|
|
buildAssistantEvidenceBundleContractV1,
|
|
buildAssistantExecutionPlanContractV1,
|
|
buildAssistantQueryFrameContractV1,
|
|
classifyAssistantOutcomeClassV1
|
|
} from "../src/services/assistantOrchestrationContracts";
|
|
|
|
function buildCoverage(input?: Partial<RequirementCoverageReport>): RequirementCoverageReport {
|
|
return {
|
|
requirements_total: 1,
|
|
requirements_covered: 0,
|
|
requirements_uncovered: ["R1"],
|
|
requirements_partially_covered: [],
|
|
clarification_needed_for: [],
|
|
out_of_scope_requirements: [],
|
|
...input
|
|
};
|
|
}
|
|
|
|
function buildGrounding(input?: Partial<AnswerGroundingCheck>): AnswerGroundingCheck {
|
|
return {
|
|
status: "no_grounded_answer",
|
|
route_subject_match: true,
|
|
missing_requirements: ["R1"],
|
|
reasons: [],
|
|
why_included_summary: [],
|
|
selection_reason_summary: [],
|
|
...input
|
|
};
|
|
}
|
|
|
|
function buildRetrieval(input?: Partial<UnifiedRetrievalResult>): UnifiedRetrievalResult {
|
|
return {
|
|
fragment_id: "F1",
|
|
requirement_ids: ["R1"],
|
|
route: "hybrid_store_plus_live",
|
|
status: "ok",
|
|
result_type: "summary",
|
|
items: [],
|
|
summary: {},
|
|
evidence: [],
|
|
why_included: [],
|
|
selection_reason: [],
|
|
risk_factors: [],
|
|
business_interpretation: [],
|
|
confidence: "medium",
|
|
limitations: [],
|
|
errors: [],
|
|
...input
|
|
};
|
|
}
|
|
|
|
function buildRouteSummary(): RouteHintSummary {
|
|
return {
|
|
mode: "deterministic_v2",
|
|
message_in_scope: true,
|
|
scope_confidence: "high",
|
|
planner: {
|
|
total_fragments: 2,
|
|
in_scope_fragments: 2,
|
|
out_of_scope_fragments: 0,
|
|
discarded_fragments: 0,
|
|
contains_multiple_tasks: false
|
|
},
|
|
decisions: [],
|
|
fallback: {
|
|
type: "none",
|
|
message: null
|
|
}
|
|
};
|
|
}
|
|
|
|
describe("assistant orchestration contracts v1", () => {
|
|
it("builds query frame and execution plan contracts with normalized analysis context", () => {
|
|
const queryFrame = buildAssistantQueryFrameContractV1({
|
|
userMessage: "Покажи хвосты по счету 60",
|
|
normalizedQuestion: "Покажи хвосты по счету 60",
|
|
normalized: {
|
|
schema_version: "normalized_query_v2_0_2",
|
|
user_message_raw: "Покажи хвосты по счету 60",
|
|
message_in_scope: true,
|
|
scope_confidence: "high",
|
|
contains_multiple_tasks: false,
|
|
fragments: [{ fragment_id: "F1" }, { fragment_id: "F2" }],
|
|
discarded_fragments: [],
|
|
global_notes: {
|
|
needs_clarification: false,
|
|
clarification_reason: null
|
|
}
|
|
} as any,
|
|
routeSummary: buildRouteSummary(),
|
|
droppedIntentSegments: ["лишний сегмент"],
|
|
analysisContext: {
|
|
as_of_date: "2020-07-31",
|
|
source: "eval_analysis_date",
|
|
snapshot_mode: "unexpected_mode"
|
|
}
|
|
});
|
|
|
|
const executionPlan = buildAssistantExecutionPlanContractV1({
|
|
executionPlan: [
|
|
{
|
|
fragment_id: "F1",
|
|
requirement_ids: ["R1"],
|
|
route: "hybrid_store_plus_live",
|
|
should_execute: true,
|
|
no_route_reason: null,
|
|
clarification_reason: null
|
|
},
|
|
{
|
|
fragment_id: "F2",
|
|
requirement_ids: ["R2"],
|
|
route: "no_route",
|
|
should_execute: false,
|
|
no_route_reason: "insufficient_specificity",
|
|
clarification_reason: "need_period"
|
|
}
|
|
],
|
|
requirements: [
|
|
{
|
|
requirement_id: "R1",
|
|
source_fragment_id: "F1",
|
|
requirement_text: "req1",
|
|
subject_tokens: [],
|
|
status: "covered",
|
|
route: "hybrid_store_plus_live"
|
|
},
|
|
{
|
|
requirement_id: "R2",
|
|
source_fragment_id: "F2",
|
|
requirement_text: "req2",
|
|
subject_tokens: [],
|
|
status: "clarification_needed",
|
|
route: null
|
|
}
|
|
]
|
|
});
|
|
|
|
expect(queryFrame.schema_version).toBe("assistant_query_frame_v1");
|
|
expect(queryFrame.route_summary_mode).toBe("deterministic_v2");
|
|
expect(queryFrame.fragments_total).toBe(2);
|
|
expect(queryFrame.analysis_context?.as_of_date).toBe("2020-07-31");
|
|
expect(queryFrame.analysis_context?.snapshot_mode).toBe("auto");
|
|
expect(executionPlan.schema_version).toBe("assistant_execution_plan_v1");
|
|
expect(executionPlan.steps).toHaveLength(2);
|
|
expect(executionPlan.requirements_total).toBe(2);
|
|
});
|
|
|
|
it("classifies fully answered and misrouted outcomes", () => {
|
|
const fullyAnswered = classifyAssistantOutcomeClassV1({
|
|
replyType: "factual_with_explanation",
|
|
coverageReport: buildCoverage({
|
|
requirements_total: 1,
|
|
requirements_covered: 1,
|
|
requirements_uncovered: [],
|
|
requirements_partially_covered: [],
|
|
clarification_needed_for: [],
|
|
out_of_scope_requirements: []
|
|
}),
|
|
grounding: buildGrounding({
|
|
status: "grounded",
|
|
missing_requirements: []
|
|
}),
|
|
retrievalResults: [buildRetrieval({ status: "ok" })]
|
|
});
|
|
|
|
const misrouted = classifyAssistantOutcomeClassV1({
|
|
replyType: "route_mismatch_blocked",
|
|
coverageReport: buildCoverage(),
|
|
grounding: buildGrounding({
|
|
status: "route_mismatch_blocked"
|
|
}),
|
|
retrievalResults: [buildRetrieval({ status: "partial" })]
|
|
});
|
|
|
|
expect(fullyAnswered).toBe("FULLY_ANSWERED");
|
|
expect(misrouted).toBe("MISROUTED");
|
|
});
|
|
|
|
it("classifies tooling and entity-binding failures", () => {
|
|
const toolingBlocked = classifyAssistantOutcomeClassV1({
|
|
replyType: "factual",
|
|
coverageReport: buildCoverage(),
|
|
grounding: buildGrounding(),
|
|
retrievalResults: [buildRetrieval({ status: "error" }), buildRetrieval({ status: "error" })]
|
|
});
|
|
|
|
const entityBindingFailure = classifyAssistantOutcomeClassV1({
|
|
replyType: "no_grounded_answer",
|
|
coverageReport: buildCoverage({
|
|
requirements_total: 1,
|
|
requirements_covered: 0,
|
|
requirements_uncovered: ["R1"]
|
|
}),
|
|
grounding: buildGrounding({
|
|
status: "no_grounded_answer",
|
|
route_subject_match: true,
|
|
missing_requirements: ["R1"]
|
|
}),
|
|
retrievalResults: [buildRetrieval({ status: "empty" })]
|
|
});
|
|
|
|
expect(toolingBlocked).toBe("BLOCKED_BY_TOOLING");
|
|
expect(entityBindingFailure).toBe("FAILED_TO_BIND_ENTITIES");
|
|
});
|
|
|
|
it("builds evidence bundle and coverage contracts", () => {
|
|
const evidenceBundle = buildAssistantEvidenceBundleContractV1({
|
|
retrievalCalls: [{ id: 1 }, { id: 2 }, { id: 3 }],
|
|
retrievalResults: [
|
|
buildRetrieval({
|
|
status: "ok",
|
|
evidence: [
|
|
{
|
|
evidence_id: "ev-1",
|
|
claim_ref: "requirement:R1",
|
|
source_type: "retrieval_item",
|
|
source_ref: {
|
|
schema_version: "evidence_source_ref_v1",
|
|
namespace: "snapshot_2020",
|
|
entity: "document",
|
|
id: "doc-1",
|
|
period: "2020-07",
|
|
canonical_ref: "evidence_source_ref_v1|snapshot_2020|document|doc-1|2020-07"
|
|
},
|
|
pointer: {
|
|
fragment_id: "F1",
|
|
route: "hybrid_store_plus_live",
|
|
source: {
|
|
namespace: "snapshot_2020",
|
|
entity: "document",
|
|
id: "doc-1",
|
|
period: "2020-07"
|
|
},
|
|
locator: {
|
|
field_path: "amount",
|
|
item_index: 0
|
|
}
|
|
},
|
|
evidence_kind: "mechanism_link",
|
|
mechanism_note: "signal",
|
|
confidence: "medium",
|
|
limitation: null,
|
|
payload: {}
|
|
}
|
|
],
|
|
limitations: ["needs_extra_period"]
|
|
}),
|
|
buildRetrieval({
|
|
status: "partial",
|
|
evidence: [],
|
|
errors: ["timeout"]
|
|
}),
|
|
buildRetrieval({
|
|
status: "error",
|
|
evidence: [],
|
|
errors: ["mcp_unavailable"]
|
|
})
|
|
]
|
|
});
|
|
|
|
const outcomeClass = classifyAssistantOutcomeClassV1({
|
|
replyType: "partial_coverage",
|
|
coverageReport: buildCoverage({
|
|
requirements_total: 2,
|
|
requirements_covered: 1,
|
|
requirements_uncovered: ["R2"]
|
|
}),
|
|
grounding: buildGrounding({
|
|
status: "partial",
|
|
route_subject_match: true,
|
|
missing_requirements: ["R2"]
|
|
}),
|
|
retrievalResults: [buildRetrieval({ status: "ok" }), buildRetrieval({ status: "partial" })]
|
|
});
|
|
|
|
const coverageContract = buildAssistantCoverageContractV1({
|
|
coverageReport: buildCoverage({
|
|
requirements_total: 2,
|
|
requirements_covered: 1,
|
|
requirements_uncovered: ["R2"]
|
|
}),
|
|
grounding: buildGrounding({
|
|
status: "partial",
|
|
missing_requirements: ["R2"]
|
|
}),
|
|
outcomeClass
|
|
});
|
|
|
|
expect(evidenceBundle.retrieval_calls_total).toBe(3);
|
|
expect(evidenceBundle.retrieval_results_total).toBe(3);
|
|
expect(evidenceBundle.retrieval_status_breakdown.ok).toBe(1);
|
|
expect(evidenceBundle.retrieval_status_breakdown.partial).toBe(1);
|
|
expect(evidenceBundle.retrieval_status_breakdown.error).toBe(1);
|
|
expect(evidenceBundle.evidence_total).toBe(1);
|
|
expect(evidenceBundle.source_refs_total).toBe(1);
|
|
expect(evidenceBundle.error_total).toBe(2);
|
|
expect(coverageContract.outcome_class).toBe("PARTIALLY_ANSWERED");
|
|
});
|
|
});
|