NODEDC_1C/llm_normalizer/backend/tests/assistantOrchestrationContr...

305 lines
9.5 KiB
TypeScript

import { describe, expect, it } from "vitest";
import type { AnswerGroundingCheck, RequirementCoverageReport, UnifiedRetrievalResult } from "../src/types/assistant";
import type { RouteHintSummary } from "../src/types/normalizer";
import {
buildAssistantCoverageContractV1,
buildAssistantEvidenceBundleContractV1,
buildAssistantExecutionPlanContractV1,
buildAssistantQueryFrameContractV1,
classifyAssistantOutcomeClassV1
} from "../src/services/assistantOrchestrationContracts";
function buildCoverage(input?: Partial<RequirementCoverageReport>): RequirementCoverageReport {
return {
requirements_total: 1,
requirements_covered: 0,
requirements_uncovered: ["R1"],
requirements_partially_covered: [],
clarification_needed_for: [],
out_of_scope_requirements: [],
...input
};
}
function buildGrounding(input?: Partial<AnswerGroundingCheck>): AnswerGroundingCheck {
return {
status: "no_grounded_answer",
route_subject_match: true,
missing_requirements: ["R1"],
reasons: [],
why_included_summary: [],
selection_reason_summary: [],
...input
};
}
function buildRetrieval(input?: Partial<UnifiedRetrievalResult>): UnifiedRetrievalResult {
return {
fragment_id: "F1",
requirement_ids: ["R1"],
route: "hybrid_store_plus_live",
status: "ok",
result_type: "summary",
items: [],
summary: {},
evidence: [],
why_included: [],
selection_reason: [],
risk_factors: [],
business_interpretation: [],
confidence: "medium",
limitations: [],
errors: [],
...input
};
}
function buildRouteSummary(): RouteHintSummary {
return {
mode: "deterministic_v2",
message_in_scope: true,
scope_confidence: "high",
planner: {
total_fragments: 2,
in_scope_fragments: 2,
out_of_scope_fragments: 0,
discarded_fragments: 0,
contains_multiple_tasks: false
},
decisions: [],
fallback: {
type: "none",
message: null
}
};
}
describe("assistant orchestration contracts v1", () => {
it("builds query frame and execution plan contracts with normalized analysis context", () => {
const queryFrame = buildAssistantQueryFrameContractV1({
userMessage: "Покажи хвосты по счету 60",
normalizedQuestion: "Покажи хвосты по счету 60",
normalized: {
schema_version: "normalized_query_v2_0_2",
user_message_raw: "Покажи хвосты по счету 60",
message_in_scope: true,
scope_confidence: "high",
contains_multiple_tasks: false,
fragments: [{ fragment_id: "F1" }, { fragment_id: "F2" }],
discarded_fragments: [],
global_notes: {
needs_clarification: false,
clarification_reason: null
}
} as any,
routeSummary: buildRouteSummary(),
droppedIntentSegments: ["лишний сегмент"],
analysisContext: {
as_of_date: "2020-07-31",
source: "eval_analysis_date",
snapshot_mode: "unexpected_mode"
}
});
const executionPlan = buildAssistantExecutionPlanContractV1({
executionPlan: [
{
fragment_id: "F1",
requirement_ids: ["R1"],
route: "hybrid_store_plus_live",
should_execute: true,
no_route_reason: null,
clarification_reason: null
},
{
fragment_id: "F2",
requirement_ids: ["R2"],
route: "no_route",
should_execute: false,
no_route_reason: "insufficient_specificity",
clarification_reason: "need_period"
}
],
requirements: [
{
requirement_id: "R1",
source_fragment_id: "F1",
requirement_text: "req1",
subject_tokens: [],
status: "covered",
route: "hybrid_store_plus_live"
},
{
requirement_id: "R2",
source_fragment_id: "F2",
requirement_text: "req2",
subject_tokens: [],
status: "clarification_needed",
route: null
}
]
});
expect(queryFrame.schema_version).toBe("assistant_query_frame_v1");
expect(queryFrame.route_summary_mode).toBe("deterministic_v2");
expect(queryFrame.fragments_total).toBe(2);
expect(queryFrame.analysis_context?.as_of_date).toBe("2020-07-31");
expect(queryFrame.analysis_context?.snapshot_mode).toBe("auto");
expect(executionPlan.schema_version).toBe("assistant_execution_plan_v1");
expect(executionPlan.steps).toHaveLength(2);
expect(executionPlan.requirements_total).toBe(2);
});
it("classifies fully answered and misrouted outcomes", () => {
const fullyAnswered = classifyAssistantOutcomeClassV1({
replyType: "factual_with_explanation",
coverageReport: buildCoverage({
requirements_total: 1,
requirements_covered: 1,
requirements_uncovered: [],
requirements_partially_covered: [],
clarification_needed_for: [],
out_of_scope_requirements: []
}),
grounding: buildGrounding({
status: "grounded",
missing_requirements: []
}),
retrievalResults: [buildRetrieval({ status: "ok" })]
});
const misrouted = classifyAssistantOutcomeClassV1({
replyType: "route_mismatch_blocked",
coverageReport: buildCoverage(),
grounding: buildGrounding({
status: "route_mismatch_blocked"
}),
retrievalResults: [buildRetrieval({ status: "partial" })]
});
expect(fullyAnswered).toBe("FULLY_ANSWERED");
expect(misrouted).toBe("MISROUTED");
});
it("classifies tooling and entity-binding failures", () => {
const toolingBlocked = classifyAssistantOutcomeClassV1({
replyType: "factual",
coverageReport: buildCoverage(),
grounding: buildGrounding(),
retrievalResults: [buildRetrieval({ status: "error" }), buildRetrieval({ status: "error" })]
});
const entityBindingFailure = classifyAssistantOutcomeClassV1({
replyType: "no_grounded_answer",
coverageReport: buildCoverage({
requirements_total: 1,
requirements_covered: 0,
requirements_uncovered: ["R1"]
}),
grounding: buildGrounding({
status: "no_grounded_answer",
route_subject_match: true,
missing_requirements: ["R1"]
}),
retrievalResults: [buildRetrieval({ status: "empty" })]
});
expect(toolingBlocked).toBe("BLOCKED_BY_TOOLING");
expect(entityBindingFailure).toBe("FAILED_TO_BIND_ENTITIES");
});
it("builds evidence bundle and coverage contracts", () => {
const evidenceBundle = buildAssistantEvidenceBundleContractV1({
retrievalCalls: [{ id: 1 }, { id: 2 }, { id: 3 }],
retrievalResults: [
buildRetrieval({
status: "ok",
evidence: [
{
evidence_id: "ev-1",
claim_ref: "requirement:R1",
source_type: "retrieval_item",
source_ref: {
schema_version: "evidence_source_ref_v1",
namespace: "snapshot_2020",
entity: "document",
id: "doc-1",
period: "2020-07",
canonical_ref: "evidence_source_ref_v1|snapshot_2020|document|doc-1|2020-07"
},
pointer: {
fragment_id: "F1",
route: "hybrid_store_plus_live",
source: {
namespace: "snapshot_2020",
entity: "document",
id: "doc-1",
period: "2020-07"
},
locator: {
field_path: "amount",
item_index: 0
}
},
evidence_kind: "mechanism_link",
mechanism_note: "signal",
confidence: "medium",
limitation: null,
payload: {}
}
],
limitations: ["needs_extra_period"]
}),
buildRetrieval({
status: "partial",
evidence: [],
errors: ["timeout"]
}),
buildRetrieval({
status: "error",
evidence: [],
errors: ["mcp_unavailable"]
})
]
});
const outcomeClass = classifyAssistantOutcomeClassV1({
replyType: "partial_coverage",
coverageReport: buildCoverage({
requirements_total: 2,
requirements_covered: 1,
requirements_uncovered: ["R2"]
}),
grounding: buildGrounding({
status: "partial",
route_subject_match: true,
missing_requirements: ["R2"]
}),
retrievalResults: [buildRetrieval({ status: "ok" }), buildRetrieval({ status: "partial" })]
});
const coverageContract = buildAssistantCoverageContractV1({
coverageReport: buildCoverage({
requirements_total: 2,
requirements_covered: 1,
requirements_uncovered: ["R2"]
}),
grounding: buildGrounding({
status: "partial",
missing_requirements: ["R2"]
}),
outcomeClass
});
expect(evidenceBundle.retrieval_calls_total).toBe(3);
expect(evidenceBundle.retrieval_results_total).toBe(3);
expect(evidenceBundle.retrieval_status_breakdown.ok).toBe(1);
expect(evidenceBundle.retrieval_status_breakdown.partial).toBe(1);
expect(evidenceBundle.retrieval_status_breakdown.error).toBe(1);
expect(evidenceBundle.evidence_total).toBe(1);
expect(evidenceBundle.source_refs_total).toBe(1);
expect(evidenceBundle.error_total).toBe(2);
expect(coverageContract.outcome_class).toBe("PARTIALLY_ANSWERED");
});
});