143 lines
8.1 KiB
TypeScript
143 lines
8.1 KiB
TypeScript
import { describe, expect, it } from "vitest";
|
|
import { AddressQueryService } from "../src/services/addressQueryService";
|
|
import { resolveAssistantOrchestrationDecision, resolveLivingAssistantModeDecision } from "../src/services/assistantService";
|
|
|
|
describe("wave17 run regressions (2026-04-11 real runs)", () => {
|
|
it("keeps real run 17:51 prompts with explicit accounting signals in address lane", () => {
|
|
const realRunPrompts = [
|
|
"\u043a\u0430\u043a\u0438\u0435 \u0443 \u043d\u0430\u0441 \u043d\u0430\u0438\u0431\u043e\u043b\u044c\u0448\u0438\u0435 \u0430\u0432\u0430\u043d\u0441\u044b \u043a \u043f\u043e\u0441\u0442\u0430\u0432\u0449\u0438\u043a\u0430\u043c, \u043a\u043e\u0442\u043e\u0440\u044b\u0435 \u0443\u0436\u0435 \u0434\u0430\u0432\u043d\u043e \u0432\u0438\u0441\u044f\u0442 \u0431\u0435\u0437 \u0437\u0430\u043a\u0440\u044b\u0442\u0438\u044f?",
|
|
"\u0432 \u043a\u0430\u043a\u0438\u0445 \u0441\u0434\u0435\u043b\u043a\u0430\u0445 \u043c\u044b \u0432\u0438\u0434\u0438\u043c \u043e\u0442\u0433\u0440\u0443\u0437\u043a\u0438, \u043d\u043e \u0434\u0435\u043d\u044c\u0433\u0438 \u0442\u0430\u043a \u0438 \u043d\u0435 \u043f\u0440\u0438\u0448\u043b\u0438?"
|
|
];
|
|
|
|
for (const prompt of realRunPrompts) {
|
|
const decision = resolveAssistantOrchestrationDecision({
|
|
rawUserMessage: prompt,
|
|
effectiveAddressUserMessage: prompt,
|
|
followupContext: null,
|
|
llmPreDecomposeMeta: null,
|
|
useMock: false
|
|
});
|
|
|
|
expect(decision.runAddressLane).toBe(true);
|
|
expect(["address_mode_classifier_detected", "address_signal_detected", "address_intent_resolver_detected"]).toContain(
|
|
decision.toolGateReason
|
|
);
|
|
expect(decision.livingMode).toBe("address_data");
|
|
expect(decision.livingReason).toBe("address_lane_triggered");
|
|
}
|
|
});
|
|
|
|
it("keeps vague counterparty risk wording in deep analysis until stronger data anchor appears", () => {
|
|
const decision = resolveAssistantOrchestrationDecision({
|
|
rawUserMessage:
|
|
"\u043a\u0430\u043a\u0438\u0435 \u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442\u044b \u0443 \u043d\u0430\u0441 \u0434\u0430\u0432\u043d\u043e \u043d\u0435 \u043f\u043b\u0430\u0442\u044f\u0442 \u0438 \u044d\u0442\u043e \u0443\u0436\u0435 \u043f\u043e\u0445\u043e\u0436\u0435 \u043d\u0430 \u043f\u0440\u043e\u0431\u043b\u0435\u043c\u0443?",
|
|
effectiveAddressUserMessage:
|
|
"\u043a\u0430\u043a\u0438\u0435 \u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442\u044b \u0443 \u043d\u0430\u0441 \u0434\u0430\u0432\u043d\u043e \u043d\u0435 \u043f\u043b\u0430\u0442\u044f\u0442 \u0438 \u044d\u0442\u043e \u0443\u0436\u0435 \u043f\u043e\u0445\u043e\u0436\u0435 \u043d\u0430 \u043f\u0440\u043e\u0431\u043b\u0435\u043c\u0443?",
|
|
followupContext: null,
|
|
llmPreDecomposeMeta: null,
|
|
useMock: false
|
|
});
|
|
|
|
expect(decision.runAddressLane).toBe(false);
|
|
expect(decision.toolGateReason).toBe("deep_analysis_signal_fallback_to_deep");
|
|
expect(decision.livingMode).toBe("deep_analysis");
|
|
expect(decision.livingReason).toBe("deep_analysis_signal_fallback_to_deep");
|
|
});
|
|
|
|
it("keeps short follow-up style prompts out of chat drift when predecompose says unsupported", () => {
|
|
const shortFollowups = [
|
|
"\u0430 \u0431\u0435\u0437 \u0441\u0432\u043e\u0434\u043a\u0438?",
|
|
"\u0438 \u043f\u043e \u044d\u0442\u043e\u043c\u0443 \u0442\u043e\u0436\u0435?",
|
|
"\u043f\u0440\u044f\u043c \u0433\u0434\u0435 \u0436\u0435?"
|
|
];
|
|
|
|
for (const prompt of shortFollowups) {
|
|
const decision = resolveLivingAssistantModeDecision({
|
|
userMessage: prompt,
|
|
addressLaneTriggered: false,
|
|
useMock: false,
|
|
predecomposeMode: "unsupported",
|
|
predecomposeModeConfidence: "low"
|
|
});
|
|
|
|
expect(decision.mode).toBe("deep_analysis");
|
|
expect(decision.reason).toBe("predecompose_unsupported_mode_fallback_to_deep");
|
|
}
|
|
});
|
|
|
|
it("routes data-scope slang wording to chat mode", () => {
|
|
const decision = resolveLivingAssistantModeDecision({
|
|
userMessage: "\u0430 \u043a\u0430\u043a\u0430\u044f \u0432\u043e\u043e\u0431\u0449\u0435 \u0431\u0430\u0437\u0430 \u0441\u044e\u0434\u0430 \u043f\u043e\u0434\u0440\u0443\u0431\u043b\u0435\u043d\u0430?",
|
|
addressLaneTriggered: false,
|
|
useMock: false,
|
|
predecomposeMode: "unsupported",
|
|
predecomposeModeConfidence: "low"
|
|
});
|
|
|
|
expect(decision.mode).toBe("chat");
|
|
expect(decision.reason).toBe("assistant_data_scope_query_detected");
|
|
});
|
|
|
|
it("keeps open-contracts request in address lane even with stale deep followup context", () => {
|
|
const decision = resolveAssistantOrchestrationDecision({
|
|
rawUserMessage: "\u041f\u043e\u043a\u0430\u0436\u0438 \u043d\u0435\u0437\u0430\u043a\u0440\u044b\u0442\u044b\u0435 \u0434\u043e\u0433\u043e\u0432\u043e\u0440\u044b \u043d\u0430 2020-12-31",
|
|
effectiveAddressUserMessage: "\u041f\u043e\u043a\u0430\u0436\u0438 \u043d\u0435\u0437\u0430\u043a\u0440\u044b\u0442\u044b\u0435 \u0434\u043e\u0433\u043e\u0432\u043e\u0440\u044b \u043d\u0430 2020-12-31",
|
|
followupContext: {
|
|
previous_question_id: "msg-prev",
|
|
last_user_message: "\u043f\u043e\u0447\u0435\u043c\u0443 \u0442\u0430\u043a \u043f\u043e \u0437\u0430\u043a\u0440\u044b\u0442\u0438\u044e \u043c\u0435\u0441\u044f\u0446\u0430",
|
|
active_domain: "month_close_costs_20_44",
|
|
active_requirement_ids: ["R1"],
|
|
uncovered_requirement_ids: ["R1"],
|
|
referenced_requirement_ids: ["R1"]
|
|
} as any,
|
|
llmPreDecomposeMeta: {
|
|
applied: true,
|
|
llmCanonicalCandidateDetected: true,
|
|
reason: "normalized_fragment_applied",
|
|
predecomposeContract: {
|
|
mode: "address_query",
|
|
mode_confidence: "high",
|
|
intent: "open_contracts_confirmed_as_of_date",
|
|
intent_confidence: "medium"
|
|
}
|
|
} as any,
|
|
useMock: false
|
|
});
|
|
|
|
expect(decision.runAddressLane).toBe(true);
|
|
expect(decision.livingMode).toBe("address_data");
|
|
expect(decision.livingReason).toBe("address_lane_triggered");
|
|
expect(decision.orchestrationContract?.deep_analysis_signal_fallback_to_deep).toBe(false);
|
|
});
|
|
|
|
it("supports strongest aggregate revenue route while keeping unsupported turnover prompt soft", async () => {
|
|
const service = new AddressQueryService();
|
|
|
|
const strongestRevenue = await service.tryHandle(
|
|
"\u043a\u0430\u043a\u043e\u0439 \u0441\u0430\u043c\u044b\u0439 \u0434\u043e\u0445\u043e\u0434\u043d\u044b\u0439 \u0433\u043e\u0434?"
|
|
);
|
|
const strongestReply = String(strongestRevenue?.reply_text ?? "");
|
|
expect(strongestRevenue?.handled).toBe(true);
|
|
expect(strongestRevenue?.reply_type).toBe("factual");
|
|
expect(strongestRevenue?.debug.detected_intent).toBe("customer_revenue_and_payments");
|
|
expect(strongestRevenue?.debug.selected_recipe).toBe("address_customer_revenue_and_payments_v1");
|
|
expect(strongestReply).toContain(
|
|
"\u0422\u043e\u043f-3 \u043b\u0435\u0442 \u043f\u043e \u0441\u0443\u043c\u043c\u0435 \u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d\u0438\u0439"
|
|
);
|
|
|
|
const unsupportedTurnover = await service.tryHandle(
|
|
"\u043a\u0430\u043a\u0438\u0435 \u043e\u0431\u043e\u0440\u043e\u0442\u044b \u043f\u043e \u0430\u043b\u044c\u0442\u0435\u0440\u043d\u0430\u0442\u0438\u0432\u0435 \u0437\u0430 2020 \u0433\u043e\u0434"
|
|
);
|
|
const unsupportedReply = String(unsupportedTurnover?.reply_text ?? "");
|
|
expect(unsupportedTurnover?.handled).toBe(true);
|
|
expect(unsupportedTurnover?.reply_type).toBe("partial_coverage");
|
|
expect(unsupportedTurnover?.debug.limited_reason_category).toBe("unsupported");
|
|
expect(unsupportedReply).toContain(
|
|
"\u0427\u0442\u043e \u043c\u043e\u0433\u0443 \u0441\u0434\u0435\u043b\u0430\u0442\u044c \u0441\u0435\u0439\u0447\u0430\u0441:"
|
|
);
|
|
expect(unsupportedReply).not.toContain(
|
|
"\u0421\u0435\u0439\u0447\u0430\u0441 \u044d\u0442\u043e\u0442 \u0442\u0438\u043f \u0432\u043e\u043f\u0440\u043e\u0441\u0430 \u0432\u043d\u0435 \u043f\u043e\u0434\u0434\u0435\u0440\u0436\u0438\u0432\u0430\u0435\u043c\u043e\u0433\u043e \u043a\u043e\u043d\u0442\u0443\u0440\u0430 \u0430\u0434\u0440\u0435\u0441\u043d\u043e\u0433\u043e \u0440\u0435\u0436\u0438\u043c\u0430"
|
|
);
|
|
});
|
|
});
|