ARCH: разрешить net-flow discovery переопределять stale lifecycle carryover

This commit is contained in:
dctouch 2026-04-21 19:10:29 +03:00
parent 429bd3d8ec
commit d323dcd509
4 changed files with 304 additions and 41 deletions

View File

@ -0,0 +1,71 @@
{
"schema_version": "domain_truth_harness_spec_v1",
"scenario_id": "address_truth_harness_phase21_net_followup_after_broad_eval",
"domain": "address_phase21_net_followup_after_broad_eval",
"title": "Phase 21 net-flow follow-up after broad evaluation replay",
"description": "Targeted AGENT replay for the assistant-stage1-LpuYsX0SRP regression where a net cash-flow question about Группа СВК inside an existing dialogue chain was wrongly kept on the counterparty lifecycle contour instead of applying the guarded MCP discovery answer.",
"bindings": {},
"steps": [
{
"step_id": "step_01_company_activity_lifecycle",
"title": "Activity lifecycle answer seeds broad counterparty context",
"question": "а по Альтернативе Плюс сколько лет активности в базе 1С?",
"allowed_reply_types": [
"partial_coverage",
"factual",
"factual_with_explanation"
],
"required_answer_patterns_any": [
"(?i)лет",
"(?i)активност",
"(?i)1с",
"(?i)не получил|не подтвержден|проверил доступный контур"
],
"criticality": "critical",
"semantic_tags": [
"company_activity_lifecycle",
"context_seed"
]
},
{
"step_id": "step_02_broad_company_evaluation",
"title": "Broad evaluation sits between lifecycle and net-flow question",
"question": "Как ты оценишь деятельность компании?",
"required_answer_patterns_any": [
"(?i)активн",
"(?i)заказчик|контрагент|деятельност|оценк"
],
"criticality": "warning",
"semantic_tags": [
"broad_evaluation_bridge"
]
},
{
"step_id": "step_03_net_flow_after_broad_eval",
"title": "Net-flow follow-up overrides stale lifecycle carryover and answers with inflow outflow and net",
"question": "какое нетто по деньгам с Группа СВК за 2020 год: сколько получили и сколько заплатили?",
"allowed_reply_types": [
"partial_coverage",
"factual_with_explanation"
],
"required_answer_patterns_all": [
"(?i)свк",
"(?i)получил|входящ|поступ",
"(?i)заплат|исходящ|списан|плат[её]ж",
"(?i)нетто|сальдо|разниц",
"(?i)2020|период",
"(?i)руб"
],
"forbidden_answer_patterns": [
"(?i)активных заказчиков",
"(?i)лет в базе",
"(?i)последняя активность"
],
"criticality": "critical",
"semantic_tags": [
"counterparty_net_cash_flow",
"stale_lifecycle_override"
]
}
]
}

View File

@ -89,6 +89,51 @@ function isDiscoveryReadyAddressCandidate(input, entryPoint) {
turnInput?.should_run_discovery === true &&
(source === "address_lane" || source === "address_exact" || source === "address_query_runtime_v1"));
}
function isDetectedIntentAlignedWithTurnMeaning(detectedIntent, turnMeaning) {
const normalizedIntent = String(detectedIntent ?? "").trim().toLowerCase();
if (!normalizedIntent) {
return false;
}
const askedDomain = String(toNonEmptyString(turnMeaning?.asked_domain_family) ?? "").trim().toLowerCase();
const askedAction = String(toNonEmptyString(turnMeaning?.asked_action_family) ?? "").trim().toLowerCase();
if (normalizedIntent === "counterparty_activity_lifecycle") {
return (askedDomain === "counterparty_lifecycle" ||
askedAction === "activity_duration" ||
askedAction === "age_or_activity_duration");
}
if (normalizedIntent === "supplier_payouts_profile") {
return askedDomain === "counterparty_value" && askedAction === "payout";
}
if (normalizedIntent === "customer_revenue_and_payments") {
return askedDomain === "counterparty_value" && (askedAction === "turnover" || askedAction === "counterparty_value_or_turnover");
}
if (normalizedIntent === "receivables_confirmed_as_of_date") {
return askedDomain === "receivables" || askedAction === "confirmed_snapshot";
}
if (normalizedIntent === "payables_confirmed_as_of_date") {
return askedDomain === "payables" || askedAction === "confirmed_snapshot";
}
if (normalizedIntent === "vat_liability_confirmed_for_tax_period") {
return askedDomain === "vat" && askedAction === "confirmed_tax_period";
}
if (normalizedIntent === "vat_payable_confirmed_as_of_date") {
return askedDomain === "vat" && askedAction === "confirmed_snapshot";
}
if (normalizedIntent === "vat_payable_forecast") {
return askedDomain === "vat" && askedAction === "forecast";
}
if (normalizedIntent === "list_documents_by_counterparty") {
return askedAction === "list_documents" || askedDomain === "counterparty_documents" || askedDomain === "counterparty";
}
if (normalizedIntent === "inventory_on_hand_as_of_date" || normalizedIntent === "inventory_aging_by_purchase_date") {
return askedDomain === "inventory" && askedAction === "confirmed_snapshot";
}
return false;
}
function readDiscoveryTurnMeaning(entryPoint) {
const turnInput = toRecordObject(entryPoint?.turn_input);
return toRecordObject(turnInput?.turn_meaning_ref);
}
function hasAlignedFactualAddressReply(input, entryPoint) {
if (!isDiscoveryReadyAddressCandidate(input, entryPoint)) {
return false;
@ -97,27 +142,35 @@ function hasAlignedFactualAddressReply(input, entryPoint) {
return false;
}
const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent);
const turnInput = toRecordObject(entryPoint?.turn_input);
const turnMeaning = toRecordObject(turnInput?.turn_meaning_ref);
const askedDomain = toNonEmptyString(turnMeaning?.asked_domain_family);
const askedAction = toNonEmptyString(turnMeaning?.asked_action_family);
if (detectedIntent === "counterparty_activity_lifecycle") {
return askedDomain === "counterparty_lifecycle" || askedAction === "activity_duration";
}
if (detectedIntent === "supplier_payouts_profile") {
return askedDomain === "counterparty_value" && askedAction === "payout";
}
if (detectedIntent === "customer_revenue_and_payments") {
return askedDomain === "counterparty_value" && askedAction === "turnover";
}
return false;
return isDetectedIntentAlignedWithTurnMeaning(detectedIntent, readDiscoveryTurnMeaning(entryPoint));
}
function hasMatchedFactualAddressContinuationTarget(input) {
function hasSemanticConflictWithDiscoveryTurnMeaning(input, entryPoint) {
if (!isDiscoveryReadyAddressCandidate(input, entryPoint)) {
return false;
}
if (toNonEmptyString(input.currentReplyType) !== "factual") {
return false;
}
const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent);
const dialogContinuationContract = toRecordObject(input.addressRuntimeMeta?.dialogContinuationContract);
const turnMeaning = readDiscoveryTurnMeaning(entryPoint);
const askedDomain = toNonEmptyString(turnMeaning?.asked_domain_family);
const askedAction = toNonEmptyString(turnMeaning?.asked_action_family);
const unsupportedFamily = toNonEmptyString(turnMeaning?.unsupported_but_understood_family);
if (!detectedIntent || (!askedDomain && !askedAction && !unsupportedFamily)) {
return false;
}
return !isDetectedIntentAlignedWithTurnMeaning(detectedIntent, turnMeaning);
}
function hasMatchedFactualAddressContinuationTarget(input, entryPoint) {
if (toNonEmptyString(input.currentReplyType) !== "factual") {
return false;
}
if (hasSemanticConflictWithDiscoveryTurnMeaning(input, entryPoint)) {
return false;
}
const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent);
const dialogContinuationContract = toRecordObject(input.addressRuntimeMeta?.dialogContinuationContract) ??
toRecordObject(input.addressRuntimeMeta?.dialog_continuation_contract_v2);
const targetIntent = toNonEmptyString(dialogContinuationContract?.target_intent);
return Boolean(detectedIntent && targetIntent && detectedIntent === targetIntent);
}
@ -128,6 +181,9 @@ function hasFullConfirmedFactualAddressReply(input, entryPoint) {
if (toNonEmptyString(input.currentReplyType) !== "factual") {
return false;
}
if (hasSemanticConflictWithDiscoveryTurnMeaning(input, entryPoint)) {
return false;
}
const truthGateStatus = toNonEmptyString(input.addressRuntimeMeta?.truth_gate_contract_status);
if (truthGateStatus === "full_confirmed") {
return true;
@ -150,7 +206,8 @@ function applyAssistantMcpDiscoveryResponsePolicy(input) {
const discoveryReadyDeepCandidate = isDiscoveryReadyDeepCandidate(input, entryPoint);
const discoveryReadyAddressCandidate = isDiscoveryReadyAddressCandidate(input, entryPoint);
const alignedFactualAddressReply = hasAlignedFactualAddressReply(input, entryPoint);
const matchedFactualAddressContinuationTarget = hasMatchedFactualAddressContinuationTarget(input);
const semanticConflictWithDiscoveryTurnMeaning = hasSemanticConflictWithDiscoveryTurnMeaning(input, entryPoint);
const matchedFactualAddressContinuationTarget = hasMatchedFactualAddressContinuationTarget(input, entryPoint);
const fullConfirmedFactualAddressReply = hasFullConfirmedFactualAddressReply(input, entryPoint);
if (!entryPoint) {
pushReason(reasonCodes, "mcp_discovery_response_policy_no_entry_point");
@ -170,6 +227,9 @@ function applyAssistantMcpDiscoveryResponsePolicy(input) {
if (alignedFactualAddressReply) {
pushReason(reasonCodes, "mcp_discovery_response_policy_keep_aligned_factual_address_reply");
}
if (semanticConflictWithDiscoveryTurnMeaning) {
pushReason(reasonCodes, "mcp_discovery_response_policy_semantic_conflict_allows_candidate_override");
}
if (matchedFactualAddressContinuationTarget) {
pushReason(reasonCodes, "mcp_discovery_response_policy_keep_factual_address_continuation_target");
}

View File

@ -152,6 +152,61 @@ function isDiscoveryReadyAddressCandidate(
);
}
function isDetectedIntentAlignedWithTurnMeaning(
detectedIntent: string | null,
turnMeaning: Record<string, unknown> | null
): boolean {
const normalizedIntent = String(detectedIntent ?? "").trim().toLowerCase();
if (!normalizedIntent) {
return false;
}
const askedDomain = String(toNonEmptyString(turnMeaning?.asked_domain_family) ?? "").trim().toLowerCase();
const askedAction = String(toNonEmptyString(turnMeaning?.asked_action_family) ?? "").trim().toLowerCase();
if (normalizedIntent === "counterparty_activity_lifecycle") {
return (
askedDomain === "counterparty_lifecycle" ||
askedAction === "activity_duration" ||
askedAction === "age_or_activity_duration"
);
}
if (normalizedIntent === "supplier_payouts_profile") {
return askedDomain === "counterparty_value" && askedAction === "payout";
}
if (normalizedIntent === "customer_revenue_and_payments") {
return askedDomain === "counterparty_value" && (askedAction === "turnover" || askedAction === "counterparty_value_or_turnover");
}
if (normalizedIntent === "receivables_confirmed_as_of_date") {
return askedDomain === "receivables" || askedAction === "confirmed_snapshot";
}
if (normalizedIntent === "payables_confirmed_as_of_date") {
return askedDomain === "payables" || askedAction === "confirmed_snapshot";
}
if (normalizedIntent === "vat_liability_confirmed_for_tax_period") {
return askedDomain === "vat" && askedAction === "confirmed_tax_period";
}
if (normalizedIntent === "vat_payable_confirmed_as_of_date") {
return askedDomain === "vat" && askedAction === "confirmed_snapshot";
}
if (normalizedIntent === "vat_payable_forecast") {
return askedDomain === "vat" && askedAction === "forecast";
}
if (normalizedIntent === "list_documents_by_counterparty") {
return askedAction === "list_documents" || askedDomain === "counterparty_documents" || askedDomain === "counterparty";
}
if (normalizedIntent === "inventory_on_hand_as_of_date" || normalizedIntent === "inventory_aging_by_purchase_date") {
return askedDomain === "inventory" && askedAction === "confirmed_snapshot";
}
return false;
}
function readDiscoveryTurnMeaning(
entryPoint: AssistantMcpDiscoveryRuntimeEntryPointContract | null
): Record<string, unknown> | null {
const turnInput = toRecordObject(entryPoint?.turn_input);
return toRecordObject(turnInput?.turn_meaning_ref);
}
function hasAlignedFactualAddressReply(
input: ApplyAssistantMcpDiscoveryResponsePolicyInput,
entryPoint: AssistantMcpDiscoveryRuntimeEntryPointContract | null
@ -162,33 +217,45 @@ function hasAlignedFactualAddressReply(
if (toNonEmptyString(input.currentReplyType) !== "factual") {
return false;
}
const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent);
const turnInput = toRecordObject(entryPoint?.turn_input);
const turnMeaning = toRecordObject(turnInput?.turn_meaning_ref);
const askedDomain = toNonEmptyString(turnMeaning?.asked_domain_family);
const askedAction = toNonEmptyString(turnMeaning?.asked_action_family);
if (detectedIntent === "counterparty_activity_lifecycle") {
return askedDomain === "counterparty_lifecycle" || askedAction === "activity_duration";
}
if (detectedIntent === "supplier_payouts_profile") {
return askedDomain === "counterparty_value" && askedAction === "payout";
}
if (detectedIntent === "customer_revenue_and_payments") {
return askedDomain === "counterparty_value" && askedAction === "turnover";
}
return false;
return isDetectedIntentAlignedWithTurnMeaning(detectedIntent, readDiscoveryTurnMeaning(entryPoint));
}
function hasMatchedFactualAddressContinuationTarget(
input: ApplyAssistantMcpDiscoveryResponsePolicyInput
function hasSemanticConflictWithDiscoveryTurnMeaning(
input: ApplyAssistantMcpDiscoveryResponsePolicyInput,
entryPoint: AssistantMcpDiscoveryRuntimeEntryPointContract | null
): boolean {
if (!isDiscoveryReadyAddressCandidate(input, entryPoint)) {
return false;
}
if (toNonEmptyString(input.currentReplyType) !== "factual") {
return false;
}
const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent);
const dialogContinuationContract = toRecordObject(input.addressRuntimeMeta?.dialogContinuationContract);
const turnMeaning = readDiscoveryTurnMeaning(entryPoint);
const askedDomain = toNonEmptyString(turnMeaning?.asked_domain_family);
const askedAction = toNonEmptyString(turnMeaning?.asked_action_family);
const unsupportedFamily = toNonEmptyString(turnMeaning?.unsupported_but_understood_family);
if (!detectedIntent || (!askedDomain && !askedAction && !unsupportedFamily)) {
return false;
}
return !isDetectedIntentAlignedWithTurnMeaning(detectedIntent, turnMeaning);
}
function hasMatchedFactualAddressContinuationTarget(
input: ApplyAssistantMcpDiscoveryResponsePolicyInput,
entryPoint: AssistantMcpDiscoveryRuntimeEntryPointContract | null
): boolean {
if (toNonEmptyString(input.currentReplyType) !== "factual") {
return false;
}
if (hasSemanticConflictWithDiscoveryTurnMeaning(input, entryPoint)) {
return false;
}
const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent);
const dialogContinuationContract =
toRecordObject(input.addressRuntimeMeta?.dialogContinuationContract) ??
toRecordObject(input.addressRuntimeMeta?.dialog_continuation_contract_v2);
const targetIntent = toNonEmptyString(dialogContinuationContract?.target_intent);
return Boolean(detectedIntent && targetIntent && detectedIntent === targetIntent);
}
@ -203,6 +270,9 @@ function hasFullConfirmedFactualAddressReply(
if (toNonEmptyString(input.currentReplyType) !== "factual") {
return false;
}
if (hasSemanticConflictWithDiscoveryTurnMeaning(input, entryPoint)) {
return false;
}
const truthGateStatus = toNonEmptyString(input.addressRuntimeMeta?.truth_gate_contract_status);
if (truthGateStatus === "full_confirmed") {
return true;
@ -229,7 +299,8 @@ export function applyAssistantMcpDiscoveryResponsePolicy(
const discoveryReadyDeepCandidate = isDiscoveryReadyDeepCandidate(input, entryPoint);
const discoveryReadyAddressCandidate = isDiscoveryReadyAddressCandidate(input, entryPoint);
const alignedFactualAddressReply = hasAlignedFactualAddressReply(input, entryPoint);
const matchedFactualAddressContinuationTarget = hasMatchedFactualAddressContinuationTarget(input);
const semanticConflictWithDiscoveryTurnMeaning = hasSemanticConflictWithDiscoveryTurnMeaning(input, entryPoint);
const matchedFactualAddressContinuationTarget = hasMatchedFactualAddressContinuationTarget(input, entryPoint);
const fullConfirmedFactualAddressReply = hasFullConfirmedFactualAddressReply(input, entryPoint);
if (!entryPoint) {
@ -250,6 +321,9 @@ export function applyAssistantMcpDiscoveryResponsePolicy(
if (alignedFactualAddressReply) {
pushReason(reasonCodes, "mcp_discovery_response_policy_keep_aligned_factual_address_reply");
}
if (semanticConflictWithDiscoveryTurnMeaning) {
pushReason(reasonCodes, "mcp_discovery_response_policy_semantic_conflict_allows_candidate_override");
}
if (matchedFactualAddressContinuationTarget) {
pushReason(reasonCodes, "mcp_discovery_response_policy_keep_factual_address_continuation_target");
}

View File

@ -176,8 +176,8 @@ describe("assistant MCP discovery response policy", () => {
adapter_status: "ready",
should_run_discovery: true,
turn_meaning_ref: {
asked_domain_family: "counterparty_lifecycle",
asked_action_family: "activity_duration"
asked_domain_family: "counterparty_value",
asked_action_family: "turnover"
}
}
})
@ -209,8 +209,8 @@ describe("assistant MCP discovery response policy", () => {
adapter_status: "ready",
should_run_discovery: true,
turn_meaning_ref: {
asked_domain_family: "counterparty_value",
asked_action_family: "turnover"
asked_domain_family: "receivables",
asked_action_family: "confirmed_snapshot"
}
}
})
@ -223,6 +223,64 @@ describe("assistant MCP discovery response policy", () => {
expect(result.reason_codes).toContain("mcp_discovery_response_policy_keep_full_confirmed_factual_address_reply");
});
it("overrides a stale full-confirmed lifecycle reply when discovery proves a different net-flow question", () => {
const result = applyAssistantMcpDiscoveryResponsePolicy({
currentReply: "Коротко: активных заказчиков в 2020 году — 1.",
currentReplySource: "address_query_runtime_v1",
currentReplyType: "factual",
addressRuntimeMeta: {
detected_intent: "counterparty_activity_lifecycle",
truth_gate_contract_status: "full_confirmed",
assistant_truth_answer_policy_v1: {
truth_gate: {
coverage_status: "full",
grounding_status: "grounded",
source_truth_gate_status: "full_confirmed"
}
},
dialog_continuation_contract_v2: {
target_intent: "counterparty_activity_lifecycle"
},
assistant_mcp_discovery_entry_point_v1: entryPoint({
turn_input: {
adapter_status: "ready",
should_run_discovery: true,
turn_meaning_ref: {
asked_domain_family: "counterparty_value",
asked_action_family: "net_value_flow",
explicit_entity_candidates: ["Группа СВК"],
explicit_organization_scope: "ООО Альтернатива Плюс",
explicit_date_scope: "2020",
unsupported_but_understood_family: "counterparty_bidirectional_value_flow_or_netting"
}
},
bridge: {
bridge_status: "answer_draft_ready",
user_facing_response_allowed: true,
business_fact_answer_allowed: true,
requires_user_clarification: false,
answer_draft: {
answer_mode: "confirmed_with_bounded_inference",
headline: "По данным 1С найдены строки входящих и исходящих денежных движений.",
confirmed_lines: ["Получили 47 628 853,03 руб.; заплатили 43 763 351,53 руб.; нетто 3 865 501,50 руб."],
inference_lines: [],
unknown_lines: ["Полное сальдо вне проверенного окна не подтверждено."],
limitation_lines: [],
next_step_line: null
}
}
})
}
});
expect(result.applied).toBe(true);
expect(result.decision).toBe("apply_candidate");
expect(result.reply_source).toBe("mcp_discovery_response_candidate_guarded");
expect(result.reply_text).toContain("47 628 853,03");
expect(result.reason_codes).toContain("mcp_discovery_response_policy_semantic_conflict_allows_candidate_override");
expect(result.reason_codes).not.toContain("mcp_discovery_response_policy_keep_full_confirmed_factual_address_reply");
});
it("keeps address lane answers when discovery was not requested for the current turn", () => {
const result = applyAssistantMcpDiscoveryResponsePolicy({
currentReply: "supported exact route answer",