Закрепить целевой AGENT-прогон hot value-flow handoff

This commit is contained in:
dctouch 2026-05-22 23:00:37 +03:00
parent e7603a9d29
commit 50d938b8f1
14 changed files with 636 additions and 8 deletions

View File

@ -0,0 +1,157 @@
{
"schema_version": "domain_truth_harness_spec_v1",
"scenario_id": "agent_hot_value_flow_handoff_20260522",
"domain": "autonomy_hot_value_flow_handoff",
"title": "AGENT | Hot value-flow discovery handoff",
"description": "Targeted AGENT replay for the current Autonomy Core slice: organization-scoped value-flow questions must be answered through guarded MCP discovery response with hot handoff, not through stale exact fallback or a counterparty-only route.",
"bindings": {},
"steps": [
{
"step_id": "step_01_incoming_total_hot_handoff",
"title": "Organization-scoped incoming total uses hot value-flow discovery candidate",
"question": "Сколько входящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
"allowed_reply_types": [
"partial_coverage",
"factual_with_explanation",
"factual"
],
"expected_mcp_discovery_response_applied": true,
"expected_mcp_discovery_selected_chain_id": "value_flow",
"expected_mcp_discovery_response_candidate_status": "ready_for_guarded_use",
"expected_mcp_discovery_candidate_hot_runtime_wired": true,
"expected_mcp_discovery_hot_runtime_wired": true,
"expected_mcp_discovery_execution_handoff_status": "ready_for_guarded_response",
"expected_mcp_discovery_execution_handoff_can_use_guarded_response": true,
"expected_catalog_alignment_status": "selected_matches_top",
"expected_catalog_chain_top_match": "value_flow",
"expected_catalog_selected_matches_top": true,
"expected_route_candidate_status": "ready_for_reviewed_execution",
"expected_route_candidate_executable_now": true,
"required_answer_patterns_all": [
"(?i)2020",
"(?i)входящ|получ|поступ",
"(?i)руб"
],
"required_answer_patterns_any": [
"(?i)Альтернатива",
"(?i)проверенн",
"(?i)1С"
],
"forbidden_answer_patterns": [
"(?i)уточните контрагента",
"(?i)по какому контрагенту",
"(?i)не найден контрагент",
"(?i)runtime_",
"(?i)planner_",
"(?i)query_movements",
"(?i)primitive"
],
"criticality": "critical",
"semantic_tags": [
"autonomy_core",
"value_flow",
"hot_handoff",
"guarded_response",
"incoming_total"
]
},
{
"step_id": "step_02_outgoing_total_hot_handoff",
"title": "Organization-scoped outgoing total uses hot value-flow discovery candidate",
"question": "Сколько исходящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
"allowed_reply_types": [
"partial_coverage",
"factual_with_explanation",
"factual"
],
"expected_mcp_discovery_response_applied": true,
"expected_mcp_discovery_selected_chain_id": "value_flow",
"expected_mcp_discovery_response_candidate_status": "ready_for_guarded_use",
"expected_mcp_discovery_candidate_hot_runtime_wired": true,
"expected_mcp_discovery_hot_runtime_wired": true,
"expected_mcp_discovery_execution_handoff_status": "ready_for_guarded_response",
"expected_mcp_discovery_execution_handoff_can_use_guarded_response": true,
"expected_catalog_alignment_status": "selected_matches_top",
"expected_catalog_chain_top_match": "value_flow",
"expected_catalog_selected_matches_top": true,
"expected_route_candidate_status": "ready_for_reviewed_execution",
"expected_route_candidate_executable_now": true,
"required_answer_patterns_all": [
"(?i)2020",
"(?i)исходящ|списан|заплат",
"(?i)руб"
],
"required_answer_patterns_any": [
"(?i)Альтернатива",
"(?i)проверенн",
"(?i)1С"
],
"forbidden_answer_patterns": [
"(?i)уточните контрагента",
"(?i)по какому контрагенту",
"(?i)не найден контрагент",
"(?i)runtime_",
"(?i)planner_",
"(?i)query_movements",
"(?i)primitive"
],
"criticality": "critical",
"semantic_tags": [
"autonomy_core",
"value_flow",
"hot_handoff",
"guarded_response",
"outgoing_total"
]
},
{
"step_id": "step_03_colloquial_money_total_hot_handoff",
"title": "Colloquial company money wording still uses hot value-flow discovery candidate",
"question": "А всего сколько денег пришло в ООО Альтернатива Плюс за 2020, без топов и без контрагентов?",
"allowed_reply_types": [
"partial_coverage",
"factual_with_explanation",
"factual"
],
"expected_mcp_discovery_response_applied": true,
"expected_mcp_discovery_selected_chain_id": "value_flow",
"expected_mcp_discovery_response_candidate_status": "ready_for_guarded_use",
"expected_mcp_discovery_candidate_hot_runtime_wired": true,
"expected_mcp_discovery_hot_runtime_wired": true,
"expected_mcp_discovery_execution_handoff_status": "ready_for_guarded_response",
"expected_mcp_discovery_execution_handoff_can_use_guarded_response": true,
"expected_catalog_alignment_status": "selected_matches_top",
"expected_catalog_chain_top_match": "value_flow",
"expected_catalog_selected_matches_top": true,
"expected_route_candidate_status": "ready_for_reviewed_execution",
"expected_route_candidate_executable_now": true,
"required_answer_patterns_all": [
"(?i)2020",
"(?i)пришл|получ|поступ|входящ",
"(?i)руб"
],
"required_answer_patterns_any": [
"(?i)Альтернатива",
"(?i)проверенн",
"(?i)1С"
],
"forbidden_answer_patterns": [
"(?i)уточните контрагента",
"(?i)по какому контрагенту",
"(?i)не найден контрагент",
"(?i)runtime_",
"(?i)planner_",
"(?i)query_movements",
"(?i)primitive"
],
"criticality": "critical",
"semantic_tags": [
"autonomy_core",
"value_flow",
"hot_handoff",
"guarded_response",
"colloquial_total"
]
}
]
}

View File

@ -193,6 +193,12 @@ function rankingNeedFromRawUtterance(value) {
if (!text) { if (!text) {
return null; return null;
} }
if (/\u0438\u0441\u043a\u043b\u044e\u0447[\p{L}\p{N}_]*\s+\u0442\u043e\u043f/iu.test(text)) {
return null;
}
if (/(?:\u0431\u0435\u0437\s+\u0442\u043e\u043f(?:\u043e\u0432|\u0430)?\b|\u043d\u0435\s+\u0442\u043e\u043f\b|\u0438\u0441\u043a\u043b\u044e\u0447\w*\s+\u0442\u043e\u043f|\u0431\u0435\u0437\s+\u0440\u0435\u0439\u0442\u0438\u043d\u0433\u0430\b|без\s+топ(?:ов|а)?\b|не\s+топ\b|исключ\S*\s+топ|без\s+рейтинга\b)/iu.test(text)) {
return null;
}
if (/(?:\u0442\u043e\u043f[-\s]?\d*|\u0441\u0430\u043c(?:\u044b\u0439|\u0430\u044f|\u043e\u0435|\u044b\u0435)|\u0431\u043e\u043b\u044c\u0448\u0435\s+\u0432\u0441\u0435\u0433\u043e|\u043d\u0430\u0438\u0431\u043e\u043b[\u0435\u0451]\u0435|\u043c\u0430\u043a\u0441\u0438\u043c\u0430\u043b\u044c\u043d|\u043c\u0430\u043a\u0441\u0438\u043c\u0443\u043c|\u043a\u0440\u0443\u043f\u043d\u0435\u0439\u0448|\u043b\u0443\u0447\u0448\u0438\u0439)/iu.test(text)) { if (/(?:\u0442\u043e\u043f[-\s]?\d*|\u0441\u0430\u043c(?:\u044b\u0439|\u0430\u044f|\u043e\u0435|\u044b\u0435)|\u0431\u043e\u043b\u044c\u0448\u0435\s+\u0432\u0441\u0435\u0433\u043e|\u043d\u0430\u0438\u0431\u043e\u043b[\u0435\u0451]\u0435|\u043c\u0430\u043a\u0441\u0438\u043c\u0430\u043b\u044c\u043d|\u043c\u0430\u043a\u0441\u0438\u043c\u0443\u043c|\u043a\u0440\u0443\u043f\u043d\u0435\u0439\u0448|\u043b\u0443\u0447\u0448\u0438\u0439)/iu.test(text)) {
return "top_desc"; return "top_desc";
} }

View File

@ -68,11 +68,14 @@ function buildAssistantMcpDiscoveryDebugAttachmentFields(input) {
const routeCandidate = isRouteCandidateContract(bridge?.route_candidate) ? bridge.route_candidate : null; const routeCandidate = isRouteCandidateContract(bridge?.route_candidate) ? bridge.route_candidate : null;
const executionHandoff = isExecutionHandoffContract(bridge?.execution_handoff) ? bridge.execution_handoff : null; const executionHandoff = isExecutionHandoffContract(bridge?.execution_handoff) ? bridge.execution_handoff : null;
const answerDraft = toRecordObject(bridge?.answer_draft); const answerDraft = toRecordObject(bridge?.answer_draft);
const hotRuntimeWired = entryPoint?.hot_runtime_wired === true ||
bridge?.hot_runtime_wired === true ||
executionHandoff?.can_use_guarded_response === true;
return { return {
assistant_mcp_discovery_entry_point_v1: entryPoint, assistant_mcp_discovery_entry_point_v1: entryPoint,
mcp_discovery_entry_status: toNonEmptyString(entryPoint?.entry_status), mcp_discovery_entry_status: toNonEmptyString(entryPoint?.entry_status),
mcp_discovery_attempted: Boolean(entryPoint?.discovery_attempted), mcp_discovery_attempted: Boolean(entryPoint?.discovery_attempted),
mcp_discovery_hot_runtime_wired: false, mcp_discovery_hot_runtime_wired: hotRuntimeWired,
mcp_discovery_bridge_status: toNonEmptyString(bridge?.bridge_status), mcp_discovery_bridge_status: toNonEmptyString(bridge?.bridge_status),
mcp_discovery_selected_chain_id: toNonEmptyString(planner?.selected_chain_id), mcp_discovery_selected_chain_id: toNonEmptyString(planner?.selected_chain_id),
mcp_discovery_evidence_plan_v1: evidencePlan, mcp_discovery_evidence_plan_v1: evidencePlan,

View File

@ -204,12 +204,16 @@ function hasMetadataDiscoveryPriority(input, entryPoint) {
} }
function isOpenScopeValueFlowWithoutSubject(entryPoint) { function isOpenScopeValueFlowWithoutSubject(entryPoint) {
const graph = readDiscoveryDataNeedGraph(entryPoint); const graph = readDiscoveryDataNeedGraph(entryPoint);
const turnMeaning = readDiscoveryTurnMeaning(entryPoint);
const businessFactFamily = toNonEmptyString(graph?.business_fact_family); const businessFactFamily = toNonEmptyString(graph?.business_fact_family);
const subjectCandidates = Array.isArray(graph?.subject_candidates) ? graph.subject_candidates : []; const subjectCandidates = Array.isArray(graph?.subject_candidates) ? graph.subject_candidates : [];
const reasonCodes = Array.isArray(graph?.reason_codes) ? graph.reason_codes : []; const reasonCodes = readStringArray(graph?.reason_codes);
const clarificationGaps = readStringArray(graph?.clarification_gaps);
const explicitOrganizationScope = toNonEmptyString(turnMeaning?.explicit_organization_scope);
return (businessFactFamily === "value_flow" && return (businessFactFamily === "value_flow" &&
subjectCandidates.length === 0 && subjectCandidates.length === 0 &&
reasonCodes.some((reason) => toNonEmptyString(reason) === "data_need_graph_open_scope_total_without_subject")); (reasonCodes.includes("data_need_graph_open_scope_total_without_subject") ||
(Boolean(explicitOrganizationScope) && clarificationGaps.includes("subject"))));
} }
function needsOpenScopeValueFlowOrganizationClarification(entryPoint) { function needsOpenScopeValueFlowOrganizationClarification(entryPoint) {
const graph = readDiscoveryDataNeedGraph(entryPoint); const graph = readDiscoveryDataNeedGraph(entryPoint);
@ -440,6 +444,9 @@ function hasRuntimeAdjustedExactReply(input, entryPoint) {
if (hasMetadataDiscoveryPriority(input, entryPoint)) { if (hasMetadataDiscoveryPriority(input, entryPoint)) {
return false; return false;
} }
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
return false;
}
if (hasEvidenceLaneConflictWithDiscoveryTurnMeaning(input, entryPoint)) { if (hasEvidenceLaneConflictWithDiscoveryTurnMeaning(input, entryPoint)) {
return false; return false;
} }
@ -463,6 +470,9 @@ function hasRuntimeMatchedExactReply(input, entryPoint) {
if (hasMetadataDiscoveryPriority(input, entryPoint)) { if (hasMetadataDiscoveryPriority(input, entryPoint)) {
return false; return false;
} }
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
return false;
}
if (hasEvidenceLaneConflictWithDiscoveryTurnMeaning(input, entryPoint)) { if (hasEvidenceLaneConflictWithDiscoveryTurnMeaning(input, entryPoint)) {
return false; return false;
} }
@ -483,6 +493,9 @@ function hasAlignedFactualAddressReply(input, entryPoint) {
if (hasMetadataDiscoveryPriority(input, entryPoint)) { if (hasMetadataDiscoveryPriority(input, entryPoint)) {
return false; return false;
} }
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
return false;
}
if (hasSemanticConflictWithDiscoveryTurnMeaning(input, entryPoint)) { if (hasSemanticConflictWithDiscoveryTurnMeaning(input, entryPoint)) {
return false; return false;
} }
@ -538,6 +551,9 @@ function hasMatchedFactualAddressContinuationTarget(input, entryPoint) {
if (hasMetadataDiscoveryPriority(input, entryPoint)) { if (hasMetadataDiscoveryPriority(input, entryPoint)) {
return false; return false;
} }
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
return false;
}
const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent); const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent);
const dialogContinuationContract = toRecordObject(input.addressRuntimeMeta?.dialogContinuationContract) ?? const dialogContinuationContract = toRecordObject(input.addressRuntimeMeta?.dialogContinuationContract) ??
toRecordObject(input.addressRuntimeMeta?.dialog_continuation_contract_v2); toRecordObject(input.addressRuntimeMeta?.dialog_continuation_contract_v2);
@ -578,6 +594,9 @@ function hasFullConfirmedFactualAddressReply(input, entryPoint) {
if (hasMetadataDiscoveryPriority(input, entryPoint)) { if (hasMetadataDiscoveryPriority(input, entryPoint)) {
return false; return false;
} }
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
return false;
}
return hasFullConfirmedTruth(input); return hasFullConfirmedTruth(input);
} }
function applyAssistantMcpDiscoveryResponsePolicy(input) { function applyAssistantMcpDiscoveryResponsePolicy(input) {

View File

@ -293,6 +293,12 @@ function rankingNeedFromRawUtterance(value: string): string | null {
if (!text) { if (!text) {
return null; return null;
} }
if (/\u0438\u0441\u043a\u043b\u044e\u0447[\p{L}\p{N}_]*\s+\u0442\u043e\u043f/iu.test(text)) {
return null;
}
if (/(?:\u0431\u0435\u0437\s+\u0442\u043e\u043f(?:\u043e\u0432|\u0430)?\b|\u043d\u0435\s+\u0442\u043e\u043f\b|\u0438\u0441\u043a\u043b\u044e\u0447\w*\s+\u0442\u043e\u043f|\u0431\u0435\u0437\s+\u0440\u0435\u0439\u0442\u0438\u043d\u0433\u0430\b|без\s+СРѕРї(?:РѕРІ|Р°)?\b|РЅРµ\s+СРѕРї\b|исклюС\S*\s+СРѕРї|без\s+СЂРµРСРёРЅРіР°\b)/iu.test(text)) {
return null;
}
if (/(?:\u0442\u043e\u043f[-\s]?\d*|\u0441\u0430\u043c(?:\u044b\u0439|\u0430\u044f|\u043e\u0435|\u044b\u0435)|\u0431\u043e\u043b\u044c\u0448\u0435\s+\u0432\u0441\u0435\u0433\u043e|\u043d\u0430\u0438\u0431\u043e\u043b[\u0435\u0451]\u0435|\u043c\u0430\u043a\u0441\u0438\u043c\u0430\u043b\u044c\u043d|\u043c\u0430\u043a\u0441\u0438\u043c\u0443\u043c|\u043a\u0440\u0443\u043f\u043d\u0435\u0439\u0448|\u043b\u0443\u0447\u0448\u0438\u0439)/iu.test(text)) { if (/(?:\u0442\u043e\u043f[-\s]?\d*|\u0441\u0430\u043c(?:\u044b\u0439|\u0430\u044f|\u043e\u0435|\u044b\u0435)|\u0431\u043e\u043b\u044c\u0448\u0435\s+\u0432\u0441\u0435\u0433\u043e|\u043d\u0430\u0438\u0431\u043e\u043b[\u0435\u0451]\u0435|\u043c\u0430\u043a\u0441\u0438\u043c\u0430\u043b\u044c\u043d|\u043c\u0430\u043a\u0441\u0438\u043c\u0443\u043c|\u043a\u0440\u0443\u043f\u043d\u0435\u0439\u0448|\u043b\u0443\u0447\u0448\u0438\u0439)/iu.test(text)) {
return "top_desc"; return "top_desc";
} }

View File

@ -7,7 +7,7 @@ export interface AssistantMcpDiscoveryDebugAttachmentFields {
assistant_mcp_discovery_entry_point_v1: AssistantMcpDiscoveryRuntimeEntryPointContract | null; assistant_mcp_discovery_entry_point_v1: AssistantMcpDiscoveryRuntimeEntryPointContract | null;
mcp_discovery_entry_status: string | null; mcp_discovery_entry_status: string | null;
mcp_discovery_attempted: boolean; mcp_discovery_attempted: boolean;
mcp_discovery_hot_runtime_wired: false; mcp_discovery_hot_runtime_wired: boolean;
mcp_discovery_bridge_status: string | null; mcp_discovery_bridge_status: string | null;
mcp_discovery_selected_chain_id: string | null; mcp_discovery_selected_chain_id: string | null;
mcp_discovery_evidence_plan_v1: AssistantEvidencePlannerContract | null; mcp_discovery_evidence_plan_v1: AssistantEvidencePlannerContract | null;
@ -130,12 +130,16 @@ export function buildAssistantMcpDiscoveryDebugAttachmentFields(
const routeCandidate = isRouteCandidateContract(bridge?.route_candidate) ? bridge.route_candidate : null; const routeCandidate = isRouteCandidateContract(bridge?.route_candidate) ? bridge.route_candidate : null;
const executionHandoff = isExecutionHandoffContract(bridge?.execution_handoff) ? bridge.execution_handoff : null; const executionHandoff = isExecutionHandoffContract(bridge?.execution_handoff) ? bridge.execution_handoff : null;
const answerDraft = toRecordObject(bridge?.answer_draft); const answerDraft = toRecordObject(bridge?.answer_draft);
const hotRuntimeWired =
entryPoint?.hot_runtime_wired === true ||
bridge?.hot_runtime_wired === true ||
executionHandoff?.can_use_guarded_response === true;
return { return {
assistant_mcp_discovery_entry_point_v1: entryPoint, assistant_mcp_discovery_entry_point_v1: entryPoint,
mcp_discovery_entry_status: toNonEmptyString(entryPoint?.entry_status), mcp_discovery_entry_status: toNonEmptyString(entryPoint?.entry_status),
mcp_discovery_attempted: Boolean(entryPoint?.discovery_attempted), mcp_discovery_attempted: Boolean(entryPoint?.discovery_attempted),
mcp_discovery_hot_runtime_wired: false, mcp_discovery_hot_runtime_wired: hotRuntimeWired,
mcp_discovery_bridge_status: toNonEmptyString(bridge?.bridge_status), mcp_discovery_bridge_status: toNonEmptyString(bridge?.bridge_status),
mcp_discovery_selected_chain_id: toNonEmptyString(planner?.selected_chain_id), mcp_discovery_selected_chain_id: toNonEmptyString(planner?.selected_chain_id),
mcp_discovery_evidence_plan_v1: evidencePlan, mcp_discovery_evidence_plan_v1: evidencePlan,

View File

@ -306,13 +306,17 @@ function isOpenScopeValueFlowWithoutSubject(
entryPoint: AssistantMcpDiscoveryRuntimeEntryPointContract | null entryPoint: AssistantMcpDiscoveryRuntimeEntryPointContract | null
): boolean { ): boolean {
const graph = readDiscoveryDataNeedGraph(entryPoint); const graph = readDiscoveryDataNeedGraph(entryPoint);
const turnMeaning = readDiscoveryTurnMeaning(entryPoint);
const businessFactFamily = toNonEmptyString(graph?.business_fact_family); const businessFactFamily = toNonEmptyString(graph?.business_fact_family);
const subjectCandidates = Array.isArray(graph?.subject_candidates) ? graph.subject_candidates : []; const subjectCandidates = Array.isArray(graph?.subject_candidates) ? graph.subject_candidates : [];
const reasonCodes = Array.isArray(graph?.reason_codes) ? graph.reason_codes : []; const reasonCodes = readStringArray(graph?.reason_codes);
const clarificationGaps = readStringArray(graph?.clarification_gaps);
const explicitOrganizationScope = toNonEmptyString(turnMeaning?.explicit_organization_scope);
return ( return (
businessFactFamily === "value_flow" && businessFactFamily === "value_flow" &&
subjectCandidates.length === 0 && subjectCandidates.length === 0 &&
reasonCodes.some((reason) => toNonEmptyString(reason) === "data_need_graph_open_scope_total_without_subject") (reasonCodes.includes("data_need_graph_open_scope_total_without_subject") ||
(Boolean(explicitOrganizationScope) && clarificationGaps.includes("subject")))
); );
} }
@ -609,6 +613,9 @@ function hasRuntimeAdjustedExactReply(
if (hasMetadataDiscoveryPriority(input, entryPoint)) { if (hasMetadataDiscoveryPriority(input, entryPoint)) {
return false; return false;
} }
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
return false;
}
if (hasEvidenceLaneConflictWithDiscoveryTurnMeaning(input, entryPoint)) { if (hasEvidenceLaneConflictWithDiscoveryTurnMeaning(input, entryPoint)) {
return false; return false;
} }
@ -638,6 +645,9 @@ function hasRuntimeMatchedExactReply(
if (hasMetadataDiscoveryPriority(input, entryPoint)) { if (hasMetadataDiscoveryPriority(input, entryPoint)) {
return false; return false;
} }
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
return false;
}
if (hasEvidenceLaneConflictWithDiscoveryTurnMeaning(input, entryPoint)) { if (hasEvidenceLaneConflictWithDiscoveryTurnMeaning(input, entryPoint)) {
return false; return false;
} }
@ -664,6 +674,9 @@ function hasAlignedFactualAddressReply(
if (hasMetadataDiscoveryPriority(input, entryPoint)) { if (hasMetadataDiscoveryPriority(input, entryPoint)) {
return false; return false;
} }
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
return false;
}
if (hasSemanticConflictWithDiscoveryTurnMeaning(input, entryPoint)) { if (hasSemanticConflictWithDiscoveryTurnMeaning(input, entryPoint)) {
return false; return false;
} }
@ -729,6 +742,9 @@ function hasMatchedFactualAddressContinuationTarget(
if (hasMetadataDiscoveryPriority(input, entryPoint)) { if (hasMetadataDiscoveryPriority(input, entryPoint)) {
return false; return false;
} }
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
return false;
}
const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent); const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent);
const dialogContinuationContract = const dialogContinuationContract =
toRecordObject(input.addressRuntimeMeta?.dialogContinuationContract) ?? toRecordObject(input.addressRuntimeMeta?.dialogContinuationContract) ??
@ -781,6 +797,9 @@ function hasFullConfirmedFactualAddressReply(
if (hasMetadataDiscoveryPriority(input, entryPoint)) { if (hasMetadataDiscoveryPriority(input, entryPoint)) {
return false; return false;
} }
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
return false;
}
return hasFullConfirmedTruth(input); return hasFullConfirmedTruth(input);
} }

View File

@ -60,6 +60,25 @@ describe("assistant MCP discovery data need graph", () => {
); );
}); });
it("does not turn explicit no-top wording into a value-flow ranking", () => {
const result = buildAssistantMcpDiscoveryDataNeedGraph({
semanticDataNeed: "counterparty value-flow evidence",
rawUtterance:
"Определить общую сумму поступлений в ООО Альтернатива Плюс за 2020 год, исключая топ-контрагентов и детализацию по контрагентам",
turnMeaning: {
asked_domain_family: "counterparty_value",
asked_action_family: "counterparty_value_or_turnover",
explicit_organization_scope: "ООО Альтернатива Плюс",
explicit_date_scope: "2020"
}
});
expect(result.business_fact_family).toBe("value_flow");
expect(result.ranking_need).toBeNull();
expect(result.decomposition_candidates).toContain("aggregate_checked_amounts");
expect(result.decomposition_candidates).not.toContain("aggregate_ranked_axis_values");
});
it("marks metadata lane choice as a clarification-required graph", () => { it("marks metadata lane choice as a clarification-required graph", () => {
const result = buildAssistantMcpDiscoveryDataNeedGraph({ const result = buildAssistantMcpDiscoveryDataNeedGraph({
semanticDataNeed: "metadata lane clarification", semanticDataNeed: "metadata lane clarification",

View File

@ -100,7 +100,7 @@ describe("assistant MCP discovery debug attachment", () => {
); );
expect(debug.mcp_discovery_entry_status).toBe("bridge_executed"); expect(debug.mcp_discovery_entry_status).toBe("bridge_executed");
expect(debug.mcp_discovery_attempted).toBe(true); expect(debug.mcp_discovery_attempted).toBe(true);
expect(debug.mcp_discovery_hot_runtime_wired).toBe(false); expect(debug.mcp_discovery_hot_runtime_wired).toBe(true);
expect(debug.mcp_discovery_bridge_status).toBe("answer_draft_ready"); expect(debug.mcp_discovery_bridge_status).toBe("answer_draft_ready");
expect(debug.mcp_discovery_selected_chain_id).toBe("value_flow"); expect(debug.mcp_discovery_selected_chain_id).toBe("value_flow");
expect(debug.mcp_discovery_evidence_plan_status).toBe("ready_for_execution"); expect(debug.mcp_discovery_evidence_plan_status).toBe("ready_for_execution");

View File

@ -1,4 +1,48 @@
[ [
{
"generation_id": "gen-ag05221957-713bbd",
"created_at": "2026-05-22T19:57:37+00:00",
"mode": "saved_user_sessions",
"title": "AGENT | Hot value-flow discovery handoff",
"count": 3,
"domain": "autonomy_hot_value_flow_handoff",
"questions": [
"Сколько входящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
"Сколько исходящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
"А всего сколько денег пришло в ООО Альтернатива Плюс за 2020, без топов и без контрагентов?"
],
"generated_by": "codex_agent",
"saved_case_set_file": "assistant_autogen_saved_user_sessions_20260522195737_gen-ag05221957-713bbd.json",
"context": {
"llm_provider": null,
"model": null,
"assistant_prompt_version": null,
"decomposition_prompt_version": null,
"prompt_fingerprint": null,
"autogen_personality_id": null,
"autogen_personality_prompt": null,
"source_session_id": null,
"saved_session_file": "assistant_saved_session_20260522195737_gen-ag05221957-713bbd.json",
"saved_case_set_kind": "agent_semantic_scenario",
"agent_run": true,
"agent_focus": "Targeted AGENT replay for the current Autonomy Core slice: organization-scoped value-flow questions must be answered through guarded MCP discovery response with hot handoff, not through stale exact fallback or a counterparty-only route.",
"architecture_phase": "turnaround_11",
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\agent_hot_value_flow_handoff_20260522.json",
"scenario_id": "agent_hot_value_flow_handoff_20260522",
"semantic_tags": [
"autonomy_core",
"colloquial_total",
"guarded_response",
"hot_handoff",
"incoming_total",
"outgoing_total",
"value_flow"
],
"validation_status": "accepted_live_replay",
"validated_run_dir": "artifacts\\domain_runs\\agent_hot_value_flow_handoff_live5",
"saved_after_validated_replay": true
}
},
{ {
"generation_id": "gen-ag05221319-4035f5", "generation_id": "gen-ag05221319-4035f5",
"created_at": "2026-05-22T13:19:31+00:00", "created_at": "2026-05-22T13:19:31+00:00",

View File

@ -0,0 +1,119 @@
{
"saved_at": "2026-05-22T19:57:37+00:00",
"generation_id": "gen-ag05221957-713bbd",
"mode": "saved_user_sessions",
"title": "AGENT | Hot value-flow discovery handoff",
"agent_run": true,
"questions": [
"Сколько входящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
"Сколько исходящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
"А всего сколько денег пришло в ООО Альтернатива Плюс за 2020, без топов и без контрагентов?"
],
"metadata": {
"assistant_prompt_version": null,
"decomposition_prompt_version": null,
"prompt_fingerprint": null,
"agent_focus": "Targeted AGENT replay for the current Autonomy Core slice: organization-scoped value-flow questions must be answered through guarded MCP discovery response with hot handoff, not through stale exact fallback or a counterparty-only route.",
"architecture_phase": "turnaround_11",
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\agent_hot_value_flow_handoff_20260522.json",
"scenario_id": "agent_hot_value_flow_handoff_20260522",
"semantic_tags": [
"autonomy_core",
"colloquial_total",
"guarded_response",
"hot_handoff",
"incoming_total",
"outgoing_total",
"value_flow"
],
"validation_status": "accepted_live_replay",
"validated_run_dir": "artifacts\\domain_runs\\agent_hot_value_flow_handoff_live5",
"saved_after_validated_replay": true,
"save_gate": {
"schema_version": "agent_semantic_save_gate_v1",
"validation_status": "accepted_live_replay",
"validated_run_dir": "artifacts\\domain_runs\\agent_hot_value_flow_handoff_live5",
"final_status": "accepted",
"review_overall_status": "pass",
"business_overall_status": "pass",
"steps_total": 3,
"steps_passed": 3,
"steps_failed": 0,
"steps_with_business_failures": 0,
"steps_with_business_warnings": 0,
"acceptance_gate_passed": true,
"saved_after_validated_replay": true
}
},
"source_session_id": null,
"session": {
"session_id": null,
"mode": "agent_semantic_run",
"items": [
{
"message_id": "agent-user-001",
"role": "user",
"text": "Сколько входящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
"created_at": "2026-05-22T19:57:37+00:00",
"reply_type": null,
"trace_id": null,
"debug": null
},
{
"message_id": "agent-user-002",
"role": "user",
"text": "Сколько исходящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
"created_at": "2026-05-22T19:57:37+00:00",
"reply_type": null,
"trace_id": null,
"debug": null
},
{
"message_id": "agent-user-003",
"role": "user",
"text": "А всего сколько денег пришло в ООО Альтернатива Плюс за 2020, без топов и без контрагентов?",
"created_at": "2026-05-22T19:57:37+00:00",
"reply_type": null,
"trace_id": null,
"debug": null
}
],
"agent_run": true,
"metadata": {
"assistant_prompt_version": null,
"decomposition_prompt_version": null,
"prompt_fingerprint": null,
"agent_focus": "Targeted AGENT replay for the current Autonomy Core slice: organization-scoped value-flow questions must be answered through guarded MCP discovery response with hot handoff, not through stale exact fallback or a counterparty-only route.",
"architecture_phase": "turnaround_11",
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\agent_hot_value_flow_handoff_20260522.json",
"scenario_id": "agent_hot_value_flow_handoff_20260522",
"semantic_tags": [
"autonomy_core",
"colloquial_total",
"guarded_response",
"hot_handoff",
"incoming_total",
"outgoing_total",
"value_flow"
],
"validation_status": "accepted_live_replay",
"validated_run_dir": "artifacts\\domain_runs\\agent_hot_value_flow_handoff_live5",
"saved_after_validated_replay": true,
"save_gate": {
"schema_version": "agent_semantic_save_gate_v1",
"validation_status": "accepted_live_replay",
"validated_run_dir": "artifacts\\domain_runs\\agent_hot_value_flow_handoff_live5",
"final_status": "accepted",
"review_overall_status": "pass",
"business_overall_status": "pass",
"steps_total": 3,
"steps_passed": 3,
"steps_failed": 0,
"steps_with_business_failures": 0,
"steps_with_business_warnings": 0,
"acceptance_gate_passed": true,
"saved_after_validated_replay": true
}
}
}
}

View File

@ -0,0 +1,34 @@
{
"suite_id": "assistant_saved_session_gen-ag05221957-713bbd",
"suite_version": "0.1.0",
"schema_version": "assistant_saved_session_suite_v0_1",
"generated_at": "2026-05-22T19:57:37+00:00",
"generation_id": "gen-ag05221957-713bbd",
"mode": "saved_user_sessions",
"title": "AGENT | Hot value-flow discovery handoff",
"domain": "autonomy_hot_value_flow_handoff",
"scenario_count": 1,
"case_ids": [
"SAVED-001"
],
"cases": [
{
"case_id": "SAVED-001",
"scenario_tag": "agent_saved_user_sessions",
"title": "AGENT | Hot value-flow discovery handoff",
"question_type": "followup",
"broadness_level": "medium",
"turns": [
{
"user_message": "Сколько входящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?"
},
{
"user_message": "Сколько исходящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?"
},
{
"user_message": "А всего сколько денег пришло в ООО Альтернатива Плюс за 2020, без топов и без контрагентов?"
}
]
}
]
}

View File

@ -2343,12 +2343,25 @@ def build_scenario_step_state(
), ),
"mcp_discovery_route_candidate_next_action": debug.get("mcp_discovery_route_candidate_next_action"), "mcp_discovery_route_candidate_next_action": debug.get("mcp_discovery_route_candidate_next_action"),
"mcp_discovery_response_applied": debug.get("mcp_discovery_response_applied"), "mcp_discovery_response_applied": debug.get("mcp_discovery_response_applied"),
"mcp_discovery_hot_runtime_wired": debug.get("mcp_discovery_hot_runtime_wired"),
"mcp_discovery_selected_chain_id": debug.get("mcp_discovery_selected_chain_id"), "mcp_discovery_selected_chain_id": debug.get("mcp_discovery_selected_chain_id"),
"mcp_discovery_execution_handoff_status": debug.get("mcp_discovery_execution_handoff_status"),
"mcp_discovery_execution_handoff_allowed_hot_chain": debug.get(
"mcp_discovery_execution_handoff_allowed_hot_chain"
),
"mcp_discovery_execution_handoff_can_use_guarded_response": debug.get(
"mcp_discovery_execution_handoff_can_use_guarded_response"
),
"mcp_discovery_response_candidate_status": ( "mcp_discovery_response_candidate_status": (
debug.get("mcp_discovery_response_candidate_v1", {}).get("candidate_status") debug.get("mcp_discovery_response_candidate_v1", {}).get("candidate_status")
if isinstance(debug.get("mcp_discovery_response_candidate_v1"), dict) if isinstance(debug.get("mcp_discovery_response_candidate_v1"), dict)
else None else None
), ),
"mcp_discovery_response_candidate_hot_runtime_wired": (
debug.get("mcp_discovery_response_candidate_v1", {}).get("hot_runtime_wired")
if isinstance(debug.get("mcp_discovery_response_candidate_v1"), dict)
else None
),
"mcp_discovery_response_reply_type": ( "mcp_discovery_response_reply_type": (
debug.get("mcp_discovery_response_candidate_v1", {}).get("reply_type") debug.get("mcp_discovery_response_candidate_v1", {}).get("reply_type")
if isinstance(debug.get("mcp_discovery_response_candidate_v1"), dict) if isinstance(debug.get("mcp_discovery_response_candidate_v1"), dict)

View File

@ -28,6 +28,13 @@ TECHNICAL_QUESTION_FIELDS = (
"expected_catalog_alignment_status", "expected_catalog_alignment_status",
"expected_catalog_chain_top_match", "expected_catalog_chain_top_match",
"expected_catalog_selected_matches_top", "expected_catalog_selected_matches_top",
"expected_mcp_discovery_response_applied",
"expected_mcp_discovery_selected_chain_id",
"expected_mcp_discovery_response_candidate_status",
"expected_mcp_discovery_candidate_hot_runtime_wired",
"expected_mcp_discovery_hot_runtime_wired",
"expected_mcp_discovery_execution_handoff_status",
"expected_mcp_discovery_execution_handoff_can_use_guarded_response",
"expected_route_candidate_status", "expected_route_candidate_status",
"expected_route_candidate_executable_now", "expected_route_candidate_executable_now",
"expected_route_candidate_missing_axes", "expected_route_candidate_missing_axes",
@ -103,6 +110,27 @@ def normalize_step_spec(index: int, raw_step: Any) -> dict[str, Any]:
str(step.get("expected_catalog_chain_top_match") or "").strip() or None str(step.get("expected_catalog_chain_top_match") or "").strip() or None
) )
normalized_step["expected_catalog_selected_matches_top"] = step.get("expected_catalog_selected_matches_top") normalized_step["expected_catalog_selected_matches_top"] = step.get("expected_catalog_selected_matches_top")
normalized_step["expected_mcp_discovery_response_applied"] = step.get(
"expected_mcp_discovery_response_applied"
)
normalized_step["expected_mcp_discovery_selected_chain_id"] = (
str(step.get("expected_mcp_discovery_selected_chain_id") or "").strip() or None
)
normalized_step["expected_mcp_discovery_response_candidate_status"] = (
str(step.get("expected_mcp_discovery_response_candidate_status") or "").strip() or None
)
normalized_step["expected_mcp_discovery_candidate_hot_runtime_wired"] = step.get(
"expected_mcp_discovery_candidate_hot_runtime_wired"
)
normalized_step["expected_mcp_discovery_hot_runtime_wired"] = step.get(
"expected_mcp_discovery_hot_runtime_wired"
)
normalized_step["expected_mcp_discovery_execution_handoff_status"] = (
str(step.get("expected_mcp_discovery_execution_handoff_status") or "").strip() or None
)
normalized_step["expected_mcp_discovery_execution_handoff_can_use_guarded_response"] = step.get(
"expected_mcp_discovery_execution_handoff_can_use_guarded_response"
)
normalized_step["expected_route_candidate_status"] = ( normalized_step["expected_route_candidate_status"] = (
str(step.get("expected_route_candidate_status") or "").strip() or None str(step.get("expected_route_candidate_status") or "").strip() or None
) )
@ -486,6 +514,13 @@ def evaluate_truth_step(
capability_id = str(step_state.get("capability_id") or "").strip() capability_id = str(step_state.get("capability_id") or "").strip()
catalog_alignment_status = str(step_state.get("mcp_discovery_catalog_chain_alignment_status") or "").strip() catalog_alignment_status = str(step_state.get("mcp_discovery_catalog_chain_alignment_status") or "").strip()
catalog_chain_top_match = str(step_state.get("mcp_discovery_catalog_chain_top_match") or "").strip() catalog_chain_top_match = str(step_state.get("mcp_discovery_catalog_chain_top_match") or "").strip()
mcp_discovery_selected_chain_id = str(step_state.get("mcp_discovery_selected_chain_id") or "").strip()
mcp_discovery_response_candidate_status = str(
step_state.get("mcp_discovery_response_candidate_status") or ""
).strip()
mcp_discovery_execution_handoff_status = str(
step_state.get("mcp_discovery_execution_handoff_status") or ""
).strip()
route_candidate_status = str(step_state.get("mcp_discovery_route_candidate_status") or "").strip() route_candidate_status = str(step_state.get("mcp_discovery_route_candidate_status") or "").strip()
limited_reason_category = str(step_state.get("limited_reason_category") or "").strip() limited_reason_category = str(step_state.get("limited_reason_category") or "").strip()
extracted_filters = ( extracted_filters = (
@ -569,6 +604,156 @@ def evaluate_truth_step(
expected=expected_catalog_selected_matches_top, expected=expected_catalog_selected_matches_top,
) )
expected_mcp_discovery_response_applied = normalize_optional_bool(
resolve_nested_placeholders(
step.get("expected_mcp_discovery_response_applied"),
step_results,
bindings,
runtime_bindings,
)
)
if expected_mcp_discovery_response_applied is not None:
actual_mcp_discovery_response_applied = step_state.get("mcp_discovery_response_applied") is True
if actual_mcp_discovery_response_applied != expected_mcp_discovery_response_applied:
append_finding(
findings,
step,
"wrong_mcp_discovery_response_applied",
"MCP discovery response replacement flag does not match the expected hot handoff behavior.",
actual=actual_mcp_discovery_response_applied,
expected=expected_mcp_discovery_response_applied,
)
expected_mcp_discovery_selected_chain_id = str(
resolve_nested_placeholders(
step.get("expected_mcp_discovery_selected_chain_id"),
step_results,
bindings,
runtime_bindings,
)
or ""
).strip()
if (
expected_mcp_discovery_selected_chain_id
and mcp_discovery_selected_chain_id != expected_mcp_discovery_selected_chain_id
):
append_finding(
findings,
step,
"wrong_mcp_discovery_selected_chain_id",
"MCP discovery selected chain does not match the expected autonomy chain for this step.",
actual=mcp_discovery_selected_chain_id or None,
expected=expected_mcp_discovery_selected_chain_id,
)
expected_mcp_discovery_response_candidate_status = str(
resolve_nested_placeholders(
step.get("expected_mcp_discovery_response_candidate_status"),
step_results,
bindings,
runtime_bindings,
)
or ""
).strip()
if (
expected_mcp_discovery_response_candidate_status
and mcp_discovery_response_candidate_status != expected_mcp_discovery_response_candidate_status
):
append_finding(
findings,
step,
"wrong_mcp_discovery_response_candidate_status",
"MCP discovery response candidate status does not match the expected guarded response readiness.",
actual=mcp_discovery_response_candidate_status or None,
expected=expected_mcp_discovery_response_candidate_status,
)
expected_mcp_discovery_candidate_hot_runtime_wired = normalize_optional_bool(
resolve_nested_placeholders(
step.get("expected_mcp_discovery_candidate_hot_runtime_wired"),
step_results,
bindings,
runtime_bindings,
)
)
if expected_mcp_discovery_candidate_hot_runtime_wired is not None:
actual_candidate_hot_runtime_wired = (
step_state.get("mcp_discovery_response_candidate_hot_runtime_wired") is True
)
if actual_candidate_hot_runtime_wired != expected_mcp_discovery_candidate_hot_runtime_wired:
append_finding(
findings,
step,
"wrong_mcp_discovery_candidate_hot_runtime_wired",
"MCP discovery response candidate hot-runtime flag does not match the expected guarded handoff.",
actual=actual_candidate_hot_runtime_wired,
expected=expected_mcp_discovery_candidate_hot_runtime_wired,
)
expected_mcp_discovery_hot_runtime_wired = normalize_optional_bool(
resolve_nested_placeholders(
step.get("expected_mcp_discovery_hot_runtime_wired"),
step_results,
bindings,
runtime_bindings,
)
)
if expected_mcp_discovery_hot_runtime_wired is not None:
actual_hot_runtime_wired = step_state.get("mcp_discovery_hot_runtime_wired") is True
if actual_hot_runtime_wired != expected_mcp_discovery_hot_runtime_wired:
append_finding(
findings,
step,
"wrong_mcp_discovery_hot_runtime_wired",
"Top-level MCP discovery hot-runtime flag does not match the expected guarded handoff.",
actual=actual_hot_runtime_wired,
expected=expected_mcp_discovery_hot_runtime_wired,
)
expected_mcp_discovery_execution_handoff_status = str(
resolve_nested_placeholders(
step.get("expected_mcp_discovery_execution_handoff_status"),
step_results,
bindings,
runtime_bindings,
)
or ""
).strip()
if (
expected_mcp_discovery_execution_handoff_status
and mcp_discovery_execution_handoff_status != expected_mcp_discovery_execution_handoff_status
):
append_finding(
findings,
step,
"wrong_mcp_discovery_execution_handoff_status",
"MCP discovery execution handoff status does not match the expected guarded response status.",
actual=mcp_discovery_execution_handoff_status or None,
expected=expected_mcp_discovery_execution_handoff_status,
)
expected_mcp_discovery_execution_handoff_can_use_guarded_response = normalize_optional_bool(
resolve_nested_placeholders(
step.get("expected_mcp_discovery_execution_handoff_can_use_guarded_response"),
step_results,
bindings,
runtime_bindings,
)
)
if expected_mcp_discovery_execution_handoff_can_use_guarded_response is not None:
actual_can_use_guarded_response = (
step_state.get("mcp_discovery_execution_handoff_can_use_guarded_response") is True
)
if actual_can_use_guarded_response != expected_mcp_discovery_execution_handoff_can_use_guarded_response:
append_finding(
findings,
step,
"wrong_mcp_discovery_execution_handoff_guarded_response",
"MCP discovery execution handoff guarded-response flag does not match the expected hot path.",
actual=actual_can_use_guarded_response,
expected=expected_mcp_discovery_execution_handoff_can_use_guarded_response,
)
expected_route_candidate_status = str( expected_route_candidate_status = str(
resolve_nested_placeholders( resolve_nested_placeholders(
step.get("expected_route_candidate_status"), step.get("expected_route_candidate_status"),