Закрепить целевой AGENT-прогон hot value-flow handoff
This commit is contained in:
parent
e7603a9d29
commit
50d938b8f1
|
|
@ -0,0 +1,157 @@
|
||||||
|
{
|
||||||
|
"schema_version": "domain_truth_harness_spec_v1",
|
||||||
|
"scenario_id": "agent_hot_value_flow_handoff_20260522",
|
||||||
|
"domain": "autonomy_hot_value_flow_handoff",
|
||||||
|
"title": "AGENT | Hot value-flow discovery handoff",
|
||||||
|
"description": "Targeted AGENT replay for the current Autonomy Core slice: organization-scoped value-flow questions must be answered through guarded MCP discovery response with hot handoff, not through stale exact fallback or a counterparty-only route.",
|
||||||
|
"bindings": {},
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"step_id": "step_01_incoming_total_hot_handoff",
|
||||||
|
"title": "Organization-scoped incoming total uses hot value-flow discovery candidate",
|
||||||
|
"question": "Сколько входящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
|
||||||
|
"allowed_reply_types": [
|
||||||
|
"partial_coverage",
|
||||||
|
"factual_with_explanation",
|
||||||
|
"factual"
|
||||||
|
],
|
||||||
|
"expected_mcp_discovery_response_applied": true,
|
||||||
|
"expected_mcp_discovery_selected_chain_id": "value_flow",
|
||||||
|
"expected_mcp_discovery_response_candidate_status": "ready_for_guarded_use",
|
||||||
|
"expected_mcp_discovery_candidate_hot_runtime_wired": true,
|
||||||
|
"expected_mcp_discovery_hot_runtime_wired": true,
|
||||||
|
"expected_mcp_discovery_execution_handoff_status": "ready_for_guarded_response",
|
||||||
|
"expected_mcp_discovery_execution_handoff_can_use_guarded_response": true,
|
||||||
|
"expected_catalog_alignment_status": "selected_matches_top",
|
||||||
|
"expected_catalog_chain_top_match": "value_flow",
|
||||||
|
"expected_catalog_selected_matches_top": true,
|
||||||
|
"expected_route_candidate_status": "ready_for_reviewed_execution",
|
||||||
|
"expected_route_candidate_executable_now": true,
|
||||||
|
"required_answer_patterns_all": [
|
||||||
|
"(?i)2020",
|
||||||
|
"(?i)входящ|получ|поступ",
|
||||||
|
"(?i)руб"
|
||||||
|
],
|
||||||
|
"required_answer_patterns_any": [
|
||||||
|
"(?i)Альтернатива",
|
||||||
|
"(?i)проверенн",
|
||||||
|
"(?i)1С"
|
||||||
|
],
|
||||||
|
"forbidden_answer_patterns": [
|
||||||
|
"(?i)уточните контрагента",
|
||||||
|
"(?i)по какому контрагенту",
|
||||||
|
"(?i)не найден контрагент",
|
||||||
|
"(?i)runtime_",
|
||||||
|
"(?i)planner_",
|
||||||
|
"(?i)query_movements",
|
||||||
|
"(?i)primitive"
|
||||||
|
],
|
||||||
|
"criticality": "critical",
|
||||||
|
"semantic_tags": [
|
||||||
|
"autonomy_core",
|
||||||
|
"value_flow",
|
||||||
|
"hot_handoff",
|
||||||
|
"guarded_response",
|
||||||
|
"incoming_total"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"step_id": "step_02_outgoing_total_hot_handoff",
|
||||||
|
"title": "Organization-scoped outgoing total uses hot value-flow discovery candidate",
|
||||||
|
"question": "Сколько исходящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
|
||||||
|
"allowed_reply_types": [
|
||||||
|
"partial_coverage",
|
||||||
|
"factual_with_explanation",
|
||||||
|
"factual"
|
||||||
|
],
|
||||||
|
"expected_mcp_discovery_response_applied": true,
|
||||||
|
"expected_mcp_discovery_selected_chain_id": "value_flow",
|
||||||
|
"expected_mcp_discovery_response_candidate_status": "ready_for_guarded_use",
|
||||||
|
"expected_mcp_discovery_candidate_hot_runtime_wired": true,
|
||||||
|
"expected_mcp_discovery_hot_runtime_wired": true,
|
||||||
|
"expected_mcp_discovery_execution_handoff_status": "ready_for_guarded_response",
|
||||||
|
"expected_mcp_discovery_execution_handoff_can_use_guarded_response": true,
|
||||||
|
"expected_catalog_alignment_status": "selected_matches_top",
|
||||||
|
"expected_catalog_chain_top_match": "value_flow",
|
||||||
|
"expected_catalog_selected_matches_top": true,
|
||||||
|
"expected_route_candidate_status": "ready_for_reviewed_execution",
|
||||||
|
"expected_route_candidate_executable_now": true,
|
||||||
|
"required_answer_patterns_all": [
|
||||||
|
"(?i)2020",
|
||||||
|
"(?i)исходящ|списан|заплат",
|
||||||
|
"(?i)руб"
|
||||||
|
],
|
||||||
|
"required_answer_patterns_any": [
|
||||||
|
"(?i)Альтернатива",
|
||||||
|
"(?i)проверенн",
|
||||||
|
"(?i)1С"
|
||||||
|
],
|
||||||
|
"forbidden_answer_patterns": [
|
||||||
|
"(?i)уточните контрагента",
|
||||||
|
"(?i)по какому контрагенту",
|
||||||
|
"(?i)не найден контрагент",
|
||||||
|
"(?i)runtime_",
|
||||||
|
"(?i)planner_",
|
||||||
|
"(?i)query_movements",
|
||||||
|
"(?i)primitive"
|
||||||
|
],
|
||||||
|
"criticality": "critical",
|
||||||
|
"semantic_tags": [
|
||||||
|
"autonomy_core",
|
||||||
|
"value_flow",
|
||||||
|
"hot_handoff",
|
||||||
|
"guarded_response",
|
||||||
|
"outgoing_total"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"step_id": "step_03_colloquial_money_total_hot_handoff",
|
||||||
|
"title": "Colloquial company money wording still uses hot value-flow discovery candidate",
|
||||||
|
"question": "А всего сколько денег пришло в ООО Альтернатива Плюс за 2020, без топов и без контрагентов?",
|
||||||
|
"allowed_reply_types": [
|
||||||
|
"partial_coverage",
|
||||||
|
"factual_with_explanation",
|
||||||
|
"factual"
|
||||||
|
],
|
||||||
|
"expected_mcp_discovery_response_applied": true,
|
||||||
|
"expected_mcp_discovery_selected_chain_id": "value_flow",
|
||||||
|
"expected_mcp_discovery_response_candidate_status": "ready_for_guarded_use",
|
||||||
|
"expected_mcp_discovery_candidate_hot_runtime_wired": true,
|
||||||
|
"expected_mcp_discovery_hot_runtime_wired": true,
|
||||||
|
"expected_mcp_discovery_execution_handoff_status": "ready_for_guarded_response",
|
||||||
|
"expected_mcp_discovery_execution_handoff_can_use_guarded_response": true,
|
||||||
|
"expected_catalog_alignment_status": "selected_matches_top",
|
||||||
|
"expected_catalog_chain_top_match": "value_flow",
|
||||||
|
"expected_catalog_selected_matches_top": true,
|
||||||
|
"expected_route_candidate_status": "ready_for_reviewed_execution",
|
||||||
|
"expected_route_candidate_executable_now": true,
|
||||||
|
"required_answer_patterns_all": [
|
||||||
|
"(?i)2020",
|
||||||
|
"(?i)пришл|получ|поступ|входящ",
|
||||||
|
"(?i)руб"
|
||||||
|
],
|
||||||
|
"required_answer_patterns_any": [
|
||||||
|
"(?i)Альтернатива",
|
||||||
|
"(?i)проверенн",
|
||||||
|
"(?i)1С"
|
||||||
|
],
|
||||||
|
"forbidden_answer_patterns": [
|
||||||
|
"(?i)уточните контрагента",
|
||||||
|
"(?i)по какому контрагенту",
|
||||||
|
"(?i)не найден контрагент",
|
||||||
|
"(?i)runtime_",
|
||||||
|
"(?i)planner_",
|
||||||
|
"(?i)query_movements",
|
||||||
|
"(?i)primitive"
|
||||||
|
],
|
||||||
|
"criticality": "critical",
|
||||||
|
"semantic_tags": [
|
||||||
|
"autonomy_core",
|
||||||
|
"value_flow",
|
||||||
|
"hot_handoff",
|
||||||
|
"guarded_response",
|
||||||
|
"colloquial_total"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
@ -193,6 +193,12 @@ function rankingNeedFromRawUtterance(value) {
|
||||||
if (!text) {
|
if (!text) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
if (/\u0438\u0441\u043a\u043b\u044e\u0447[\p{L}\p{N}_]*\s+\u0442\u043e\u043f/iu.test(text)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (/(?:\u0431\u0435\u0437\s+\u0442\u043e\u043f(?:\u043e\u0432|\u0430)?\b|\u043d\u0435\s+\u0442\u043e\u043f\b|\u0438\u0441\u043a\u043b\u044e\u0447\w*\s+\u0442\u043e\u043f|\u0431\u0435\u0437\s+\u0440\u0435\u0439\u0442\u0438\u043d\u0433\u0430\b|без\s+топ(?:ов|а)?\b|не\s+топ\b|исключ\S*\s+топ|без\s+рейтинга\b)/iu.test(text)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
if (/(?:\u0442\u043e\u043f[-\s]?\d*|\u0441\u0430\u043c(?:\u044b\u0439|\u0430\u044f|\u043e\u0435|\u044b\u0435)|\u0431\u043e\u043b\u044c\u0448\u0435\s+\u0432\u0441\u0435\u0433\u043e|\u043d\u0430\u0438\u0431\u043e\u043b[\u0435\u0451]\u0435|\u043c\u0430\u043a\u0441\u0438\u043c\u0430\u043b\u044c\u043d|\u043c\u0430\u043a\u0441\u0438\u043c\u0443\u043c|\u043a\u0440\u0443\u043f\u043d\u0435\u0439\u0448|\u043b\u0443\u0447\u0448\u0438\u0439)/iu.test(text)) {
|
if (/(?:\u0442\u043e\u043f[-\s]?\d*|\u0441\u0430\u043c(?:\u044b\u0439|\u0430\u044f|\u043e\u0435|\u044b\u0435)|\u0431\u043e\u043b\u044c\u0448\u0435\s+\u0432\u0441\u0435\u0433\u043e|\u043d\u0430\u0438\u0431\u043e\u043b[\u0435\u0451]\u0435|\u043c\u0430\u043a\u0441\u0438\u043c\u0430\u043b\u044c\u043d|\u043c\u0430\u043a\u0441\u0438\u043c\u0443\u043c|\u043a\u0440\u0443\u043f\u043d\u0435\u0439\u0448|\u043b\u0443\u0447\u0448\u0438\u0439)/iu.test(text)) {
|
||||||
return "top_desc";
|
return "top_desc";
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -68,11 +68,14 @@ function buildAssistantMcpDiscoveryDebugAttachmentFields(input) {
|
||||||
const routeCandidate = isRouteCandidateContract(bridge?.route_candidate) ? bridge.route_candidate : null;
|
const routeCandidate = isRouteCandidateContract(bridge?.route_candidate) ? bridge.route_candidate : null;
|
||||||
const executionHandoff = isExecutionHandoffContract(bridge?.execution_handoff) ? bridge.execution_handoff : null;
|
const executionHandoff = isExecutionHandoffContract(bridge?.execution_handoff) ? bridge.execution_handoff : null;
|
||||||
const answerDraft = toRecordObject(bridge?.answer_draft);
|
const answerDraft = toRecordObject(bridge?.answer_draft);
|
||||||
|
const hotRuntimeWired = entryPoint?.hot_runtime_wired === true ||
|
||||||
|
bridge?.hot_runtime_wired === true ||
|
||||||
|
executionHandoff?.can_use_guarded_response === true;
|
||||||
return {
|
return {
|
||||||
assistant_mcp_discovery_entry_point_v1: entryPoint,
|
assistant_mcp_discovery_entry_point_v1: entryPoint,
|
||||||
mcp_discovery_entry_status: toNonEmptyString(entryPoint?.entry_status),
|
mcp_discovery_entry_status: toNonEmptyString(entryPoint?.entry_status),
|
||||||
mcp_discovery_attempted: Boolean(entryPoint?.discovery_attempted),
|
mcp_discovery_attempted: Boolean(entryPoint?.discovery_attempted),
|
||||||
mcp_discovery_hot_runtime_wired: false,
|
mcp_discovery_hot_runtime_wired: hotRuntimeWired,
|
||||||
mcp_discovery_bridge_status: toNonEmptyString(bridge?.bridge_status),
|
mcp_discovery_bridge_status: toNonEmptyString(bridge?.bridge_status),
|
||||||
mcp_discovery_selected_chain_id: toNonEmptyString(planner?.selected_chain_id),
|
mcp_discovery_selected_chain_id: toNonEmptyString(planner?.selected_chain_id),
|
||||||
mcp_discovery_evidence_plan_v1: evidencePlan,
|
mcp_discovery_evidence_plan_v1: evidencePlan,
|
||||||
|
|
|
||||||
|
|
@ -204,12 +204,16 @@ function hasMetadataDiscoveryPriority(input, entryPoint) {
|
||||||
}
|
}
|
||||||
function isOpenScopeValueFlowWithoutSubject(entryPoint) {
|
function isOpenScopeValueFlowWithoutSubject(entryPoint) {
|
||||||
const graph = readDiscoveryDataNeedGraph(entryPoint);
|
const graph = readDiscoveryDataNeedGraph(entryPoint);
|
||||||
|
const turnMeaning = readDiscoveryTurnMeaning(entryPoint);
|
||||||
const businessFactFamily = toNonEmptyString(graph?.business_fact_family);
|
const businessFactFamily = toNonEmptyString(graph?.business_fact_family);
|
||||||
const subjectCandidates = Array.isArray(graph?.subject_candidates) ? graph.subject_candidates : [];
|
const subjectCandidates = Array.isArray(graph?.subject_candidates) ? graph.subject_candidates : [];
|
||||||
const reasonCodes = Array.isArray(graph?.reason_codes) ? graph.reason_codes : [];
|
const reasonCodes = readStringArray(graph?.reason_codes);
|
||||||
|
const clarificationGaps = readStringArray(graph?.clarification_gaps);
|
||||||
|
const explicitOrganizationScope = toNonEmptyString(turnMeaning?.explicit_organization_scope);
|
||||||
return (businessFactFamily === "value_flow" &&
|
return (businessFactFamily === "value_flow" &&
|
||||||
subjectCandidates.length === 0 &&
|
subjectCandidates.length === 0 &&
|
||||||
reasonCodes.some((reason) => toNonEmptyString(reason) === "data_need_graph_open_scope_total_without_subject"));
|
(reasonCodes.includes("data_need_graph_open_scope_total_without_subject") ||
|
||||||
|
(Boolean(explicitOrganizationScope) && clarificationGaps.includes("subject"))));
|
||||||
}
|
}
|
||||||
function needsOpenScopeValueFlowOrganizationClarification(entryPoint) {
|
function needsOpenScopeValueFlowOrganizationClarification(entryPoint) {
|
||||||
const graph = readDiscoveryDataNeedGraph(entryPoint);
|
const graph = readDiscoveryDataNeedGraph(entryPoint);
|
||||||
|
|
@ -440,6 +444,9 @@ function hasRuntimeAdjustedExactReply(input, entryPoint) {
|
||||||
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
if (hasEvidenceLaneConflictWithDiscoveryTurnMeaning(input, entryPoint)) {
|
if (hasEvidenceLaneConflictWithDiscoveryTurnMeaning(input, entryPoint)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -463,6 +470,9 @@ function hasRuntimeMatchedExactReply(input, entryPoint) {
|
||||||
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
if (hasEvidenceLaneConflictWithDiscoveryTurnMeaning(input, entryPoint)) {
|
if (hasEvidenceLaneConflictWithDiscoveryTurnMeaning(input, entryPoint)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -483,6 +493,9 @@ function hasAlignedFactualAddressReply(input, entryPoint) {
|
||||||
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
if (hasSemanticConflictWithDiscoveryTurnMeaning(input, entryPoint)) {
|
if (hasSemanticConflictWithDiscoveryTurnMeaning(input, entryPoint)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -538,6 +551,9 @@ function hasMatchedFactualAddressContinuationTarget(input, entryPoint) {
|
||||||
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent);
|
const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent);
|
||||||
const dialogContinuationContract = toRecordObject(input.addressRuntimeMeta?.dialogContinuationContract) ??
|
const dialogContinuationContract = toRecordObject(input.addressRuntimeMeta?.dialogContinuationContract) ??
|
||||||
toRecordObject(input.addressRuntimeMeta?.dialog_continuation_contract_v2);
|
toRecordObject(input.addressRuntimeMeta?.dialog_continuation_contract_v2);
|
||||||
|
|
@ -578,6 +594,9 @@ function hasFullConfirmedFactualAddressReply(input, entryPoint) {
|
||||||
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
return hasFullConfirmedTruth(input);
|
return hasFullConfirmedTruth(input);
|
||||||
}
|
}
|
||||||
function applyAssistantMcpDiscoveryResponsePolicy(input) {
|
function applyAssistantMcpDiscoveryResponsePolicy(input) {
|
||||||
|
|
|
||||||
|
|
@ -293,6 +293,12 @@ function rankingNeedFromRawUtterance(value: string): string | null {
|
||||||
if (!text) {
|
if (!text) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
if (/\u0438\u0441\u043a\u043b\u044e\u0447[\p{L}\p{N}_]*\s+\u0442\u043e\u043f/iu.test(text)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (/(?:\u0431\u0435\u0437\s+\u0442\u043e\u043f(?:\u043e\u0432|\u0430)?\b|\u043d\u0435\s+\u0442\u043e\u043f\b|\u0438\u0441\u043a\u043b\u044e\u0447\w*\s+\u0442\u043e\u043f|\u0431\u0435\u0437\s+\u0440\u0435\u0439\u0442\u0438\u043d\u0433\u0430\b|без\s+топ(?:ов|а)?\b|не\s+топ\b|исключ\S*\s+топ|без\s+рейтинга\b)/iu.test(text)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
if (/(?:\u0442\u043e\u043f[-\s]?\d*|\u0441\u0430\u043c(?:\u044b\u0439|\u0430\u044f|\u043e\u0435|\u044b\u0435)|\u0431\u043e\u043b\u044c\u0448\u0435\s+\u0432\u0441\u0435\u0433\u043e|\u043d\u0430\u0438\u0431\u043e\u043b[\u0435\u0451]\u0435|\u043c\u0430\u043a\u0441\u0438\u043c\u0430\u043b\u044c\u043d|\u043c\u0430\u043a\u0441\u0438\u043c\u0443\u043c|\u043a\u0440\u0443\u043f\u043d\u0435\u0439\u0448|\u043b\u0443\u0447\u0448\u0438\u0439)/iu.test(text)) {
|
if (/(?:\u0442\u043e\u043f[-\s]?\d*|\u0441\u0430\u043c(?:\u044b\u0439|\u0430\u044f|\u043e\u0435|\u044b\u0435)|\u0431\u043e\u043b\u044c\u0448\u0435\s+\u0432\u0441\u0435\u0433\u043e|\u043d\u0430\u0438\u0431\u043e\u043b[\u0435\u0451]\u0435|\u043c\u0430\u043a\u0441\u0438\u043c\u0430\u043b\u044c\u043d|\u043c\u0430\u043a\u0441\u0438\u043c\u0443\u043c|\u043a\u0440\u0443\u043f\u043d\u0435\u0439\u0448|\u043b\u0443\u0447\u0448\u0438\u0439)/iu.test(text)) {
|
||||||
return "top_desc";
|
return "top_desc";
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ export interface AssistantMcpDiscoveryDebugAttachmentFields {
|
||||||
assistant_mcp_discovery_entry_point_v1: AssistantMcpDiscoveryRuntimeEntryPointContract | null;
|
assistant_mcp_discovery_entry_point_v1: AssistantMcpDiscoveryRuntimeEntryPointContract | null;
|
||||||
mcp_discovery_entry_status: string | null;
|
mcp_discovery_entry_status: string | null;
|
||||||
mcp_discovery_attempted: boolean;
|
mcp_discovery_attempted: boolean;
|
||||||
mcp_discovery_hot_runtime_wired: false;
|
mcp_discovery_hot_runtime_wired: boolean;
|
||||||
mcp_discovery_bridge_status: string | null;
|
mcp_discovery_bridge_status: string | null;
|
||||||
mcp_discovery_selected_chain_id: string | null;
|
mcp_discovery_selected_chain_id: string | null;
|
||||||
mcp_discovery_evidence_plan_v1: AssistantEvidencePlannerContract | null;
|
mcp_discovery_evidence_plan_v1: AssistantEvidencePlannerContract | null;
|
||||||
|
|
@ -130,12 +130,16 @@ export function buildAssistantMcpDiscoveryDebugAttachmentFields(
|
||||||
const routeCandidate = isRouteCandidateContract(bridge?.route_candidate) ? bridge.route_candidate : null;
|
const routeCandidate = isRouteCandidateContract(bridge?.route_candidate) ? bridge.route_candidate : null;
|
||||||
const executionHandoff = isExecutionHandoffContract(bridge?.execution_handoff) ? bridge.execution_handoff : null;
|
const executionHandoff = isExecutionHandoffContract(bridge?.execution_handoff) ? bridge.execution_handoff : null;
|
||||||
const answerDraft = toRecordObject(bridge?.answer_draft);
|
const answerDraft = toRecordObject(bridge?.answer_draft);
|
||||||
|
const hotRuntimeWired =
|
||||||
|
entryPoint?.hot_runtime_wired === true ||
|
||||||
|
bridge?.hot_runtime_wired === true ||
|
||||||
|
executionHandoff?.can_use_guarded_response === true;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
assistant_mcp_discovery_entry_point_v1: entryPoint,
|
assistant_mcp_discovery_entry_point_v1: entryPoint,
|
||||||
mcp_discovery_entry_status: toNonEmptyString(entryPoint?.entry_status),
|
mcp_discovery_entry_status: toNonEmptyString(entryPoint?.entry_status),
|
||||||
mcp_discovery_attempted: Boolean(entryPoint?.discovery_attempted),
|
mcp_discovery_attempted: Boolean(entryPoint?.discovery_attempted),
|
||||||
mcp_discovery_hot_runtime_wired: false,
|
mcp_discovery_hot_runtime_wired: hotRuntimeWired,
|
||||||
mcp_discovery_bridge_status: toNonEmptyString(bridge?.bridge_status),
|
mcp_discovery_bridge_status: toNonEmptyString(bridge?.bridge_status),
|
||||||
mcp_discovery_selected_chain_id: toNonEmptyString(planner?.selected_chain_id),
|
mcp_discovery_selected_chain_id: toNonEmptyString(planner?.selected_chain_id),
|
||||||
mcp_discovery_evidence_plan_v1: evidencePlan,
|
mcp_discovery_evidence_plan_v1: evidencePlan,
|
||||||
|
|
|
||||||
|
|
@ -306,13 +306,17 @@ function isOpenScopeValueFlowWithoutSubject(
|
||||||
entryPoint: AssistantMcpDiscoveryRuntimeEntryPointContract | null
|
entryPoint: AssistantMcpDiscoveryRuntimeEntryPointContract | null
|
||||||
): boolean {
|
): boolean {
|
||||||
const graph = readDiscoveryDataNeedGraph(entryPoint);
|
const graph = readDiscoveryDataNeedGraph(entryPoint);
|
||||||
|
const turnMeaning = readDiscoveryTurnMeaning(entryPoint);
|
||||||
const businessFactFamily = toNonEmptyString(graph?.business_fact_family);
|
const businessFactFamily = toNonEmptyString(graph?.business_fact_family);
|
||||||
const subjectCandidates = Array.isArray(graph?.subject_candidates) ? graph.subject_candidates : [];
|
const subjectCandidates = Array.isArray(graph?.subject_candidates) ? graph.subject_candidates : [];
|
||||||
const reasonCodes = Array.isArray(graph?.reason_codes) ? graph.reason_codes : [];
|
const reasonCodes = readStringArray(graph?.reason_codes);
|
||||||
|
const clarificationGaps = readStringArray(graph?.clarification_gaps);
|
||||||
|
const explicitOrganizationScope = toNonEmptyString(turnMeaning?.explicit_organization_scope);
|
||||||
return (
|
return (
|
||||||
businessFactFamily === "value_flow" &&
|
businessFactFamily === "value_flow" &&
|
||||||
subjectCandidates.length === 0 &&
|
subjectCandidates.length === 0 &&
|
||||||
reasonCodes.some((reason) => toNonEmptyString(reason) === "data_need_graph_open_scope_total_without_subject")
|
(reasonCodes.includes("data_need_graph_open_scope_total_without_subject") ||
|
||||||
|
(Boolean(explicitOrganizationScope) && clarificationGaps.includes("subject")))
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -609,6 +613,9 @@ function hasRuntimeAdjustedExactReply(
|
||||||
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
if (hasEvidenceLaneConflictWithDiscoveryTurnMeaning(input, entryPoint)) {
|
if (hasEvidenceLaneConflictWithDiscoveryTurnMeaning(input, entryPoint)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -638,6 +645,9 @@ function hasRuntimeMatchedExactReply(
|
||||||
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
if (hasEvidenceLaneConflictWithDiscoveryTurnMeaning(input, entryPoint)) {
|
if (hasEvidenceLaneConflictWithDiscoveryTurnMeaning(input, entryPoint)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -664,6 +674,9 @@ function hasAlignedFactualAddressReply(
|
||||||
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
if (hasSemanticConflictWithDiscoveryTurnMeaning(input, entryPoint)) {
|
if (hasSemanticConflictWithDiscoveryTurnMeaning(input, entryPoint)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -729,6 +742,9 @@ function hasMatchedFactualAddressContinuationTarget(
|
||||||
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent);
|
const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent);
|
||||||
const dialogContinuationContract =
|
const dialogContinuationContract =
|
||||||
toRecordObject(input.addressRuntimeMeta?.dialogContinuationContract) ??
|
toRecordObject(input.addressRuntimeMeta?.dialogContinuationContract) ??
|
||||||
|
|
@ -781,6 +797,9 @@ function hasFullConfirmedFactualAddressReply(
|
||||||
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
return hasFullConfirmedTruth(input);
|
return hasFullConfirmedTruth(input);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -60,6 +60,25 @@ describe("assistant MCP discovery data need graph", () => {
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("does not turn explicit no-top wording into a value-flow ranking", () => {
|
||||||
|
const result = buildAssistantMcpDiscoveryDataNeedGraph({
|
||||||
|
semanticDataNeed: "counterparty value-flow evidence",
|
||||||
|
rawUtterance:
|
||||||
|
"Определить общую сумму поступлений в ООО Альтернатива Плюс за 2020 год, исключая топ-контрагентов и детализацию по контрагентам",
|
||||||
|
turnMeaning: {
|
||||||
|
asked_domain_family: "counterparty_value",
|
||||||
|
asked_action_family: "counterparty_value_or_turnover",
|
||||||
|
explicit_organization_scope: "ООО Альтернатива Плюс",
|
||||||
|
explicit_date_scope: "2020"
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result.business_fact_family).toBe("value_flow");
|
||||||
|
expect(result.ranking_need).toBeNull();
|
||||||
|
expect(result.decomposition_candidates).toContain("aggregate_checked_amounts");
|
||||||
|
expect(result.decomposition_candidates).not.toContain("aggregate_ranked_axis_values");
|
||||||
|
});
|
||||||
|
|
||||||
it("marks metadata lane choice as a clarification-required graph", () => {
|
it("marks metadata lane choice as a clarification-required graph", () => {
|
||||||
const result = buildAssistantMcpDiscoveryDataNeedGraph({
|
const result = buildAssistantMcpDiscoveryDataNeedGraph({
|
||||||
semanticDataNeed: "metadata lane clarification",
|
semanticDataNeed: "metadata lane clarification",
|
||||||
|
|
|
||||||
|
|
@ -100,7 +100,7 @@ describe("assistant MCP discovery debug attachment", () => {
|
||||||
);
|
);
|
||||||
expect(debug.mcp_discovery_entry_status).toBe("bridge_executed");
|
expect(debug.mcp_discovery_entry_status).toBe("bridge_executed");
|
||||||
expect(debug.mcp_discovery_attempted).toBe(true);
|
expect(debug.mcp_discovery_attempted).toBe(true);
|
||||||
expect(debug.mcp_discovery_hot_runtime_wired).toBe(false);
|
expect(debug.mcp_discovery_hot_runtime_wired).toBe(true);
|
||||||
expect(debug.mcp_discovery_bridge_status).toBe("answer_draft_ready");
|
expect(debug.mcp_discovery_bridge_status).toBe("answer_draft_ready");
|
||||||
expect(debug.mcp_discovery_selected_chain_id).toBe("value_flow");
|
expect(debug.mcp_discovery_selected_chain_id).toBe("value_flow");
|
||||||
expect(debug.mcp_discovery_evidence_plan_status).toBe("ready_for_execution");
|
expect(debug.mcp_discovery_evidence_plan_status).toBe("ready_for_execution");
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,48 @@
|
||||||
[
|
[
|
||||||
|
{
|
||||||
|
"generation_id": "gen-ag05221957-713bbd",
|
||||||
|
"created_at": "2026-05-22T19:57:37+00:00",
|
||||||
|
"mode": "saved_user_sessions",
|
||||||
|
"title": "AGENT | Hot value-flow discovery handoff",
|
||||||
|
"count": 3,
|
||||||
|
"domain": "autonomy_hot_value_flow_handoff",
|
||||||
|
"questions": [
|
||||||
|
"Сколько входящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
|
||||||
|
"Сколько исходящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
|
||||||
|
"А всего сколько денег пришло в ООО Альтернатива Плюс за 2020, без топов и без контрагентов?"
|
||||||
|
],
|
||||||
|
"generated_by": "codex_agent",
|
||||||
|
"saved_case_set_file": "assistant_autogen_saved_user_sessions_20260522195737_gen-ag05221957-713bbd.json",
|
||||||
|
"context": {
|
||||||
|
"llm_provider": null,
|
||||||
|
"model": null,
|
||||||
|
"assistant_prompt_version": null,
|
||||||
|
"decomposition_prompt_version": null,
|
||||||
|
"prompt_fingerprint": null,
|
||||||
|
"autogen_personality_id": null,
|
||||||
|
"autogen_personality_prompt": null,
|
||||||
|
"source_session_id": null,
|
||||||
|
"saved_session_file": "assistant_saved_session_20260522195737_gen-ag05221957-713bbd.json",
|
||||||
|
"saved_case_set_kind": "agent_semantic_scenario",
|
||||||
|
"agent_run": true,
|
||||||
|
"agent_focus": "Targeted AGENT replay for the current Autonomy Core slice: organization-scoped value-flow questions must be answered through guarded MCP discovery response with hot handoff, not through stale exact fallback or a counterparty-only route.",
|
||||||
|
"architecture_phase": "turnaround_11",
|
||||||
|
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\agent_hot_value_flow_handoff_20260522.json",
|
||||||
|
"scenario_id": "agent_hot_value_flow_handoff_20260522",
|
||||||
|
"semantic_tags": [
|
||||||
|
"autonomy_core",
|
||||||
|
"colloquial_total",
|
||||||
|
"guarded_response",
|
||||||
|
"hot_handoff",
|
||||||
|
"incoming_total",
|
||||||
|
"outgoing_total",
|
||||||
|
"value_flow"
|
||||||
|
],
|
||||||
|
"validation_status": "accepted_live_replay",
|
||||||
|
"validated_run_dir": "artifacts\\domain_runs\\agent_hot_value_flow_handoff_live5",
|
||||||
|
"saved_after_validated_replay": true
|
||||||
|
}
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"generation_id": "gen-ag05221319-4035f5",
|
"generation_id": "gen-ag05221319-4035f5",
|
||||||
"created_at": "2026-05-22T13:19:31+00:00",
|
"created_at": "2026-05-22T13:19:31+00:00",
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,119 @@
|
||||||
|
{
|
||||||
|
"saved_at": "2026-05-22T19:57:37+00:00",
|
||||||
|
"generation_id": "gen-ag05221957-713bbd",
|
||||||
|
"mode": "saved_user_sessions",
|
||||||
|
"title": "AGENT | Hot value-flow discovery handoff",
|
||||||
|
"agent_run": true,
|
||||||
|
"questions": [
|
||||||
|
"Сколько входящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
|
||||||
|
"Сколько исходящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
|
||||||
|
"А всего сколько денег пришло в ООО Альтернатива Плюс за 2020, без топов и без контрагентов?"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"assistant_prompt_version": null,
|
||||||
|
"decomposition_prompt_version": null,
|
||||||
|
"prompt_fingerprint": null,
|
||||||
|
"agent_focus": "Targeted AGENT replay for the current Autonomy Core slice: organization-scoped value-flow questions must be answered through guarded MCP discovery response with hot handoff, not through stale exact fallback or a counterparty-only route.",
|
||||||
|
"architecture_phase": "turnaround_11",
|
||||||
|
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\agent_hot_value_flow_handoff_20260522.json",
|
||||||
|
"scenario_id": "agent_hot_value_flow_handoff_20260522",
|
||||||
|
"semantic_tags": [
|
||||||
|
"autonomy_core",
|
||||||
|
"colloquial_total",
|
||||||
|
"guarded_response",
|
||||||
|
"hot_handoff",
|
||||||
|
"incoming_total",
|
||||||
|
"outgoing_total",
|
||||||
|
"value_flow"
|
||||||
|
],
|
||||||
|
"validation_status": "accepted_live_replay",
|
||||||
|
"validated_run_dir": "artifacts\\domain_runs\\agent_hot_value_flow_handoff_live5",
|
||||||
|
"saved_after_validated_replay": true,
|
||||||
|
"save_gate": {
|
||||||
|
"schema_version": "agent_semantic_save_gate_v1",
|
||||||
|
"validation_status": "accepted_live_replay",
|
||||||
|
"validated_run_dir": "artifacts\\domain_runs\\agent_hot_value_flow_handoff_live5",
|
||||||
|
"final_status": "accepted",
|
||||||
|
"review_overall_status": "pass",
|
||||||
|
"business_overall_status": "pass",
|
||||||
|
"steps_total": 3,
|
||||||
|
"steps_passed": 3,
|
||||||
|
"steps_failed": 0,
|
||||||
|
"steps_with_business_failures": 0,
|
||||||
|
"steps_with_business_warnings": 0,
|
||||||
|
"acceptance_gate_passed": true,
|
||||||
|
"saved_after_validated_replay": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source_session_id": null,
|
||||||
|
"session": {
|
||||||
|
"session_id": null,
|
||||||
|
"mode": "agent_semantic_run",
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"message_id": "agent-user-001",
|
||||||
|
"role": "user",
|
||||||
|
"text": "Сколько входящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
|
||||||
|
"created_at": "2026-05-22T19:57:37+00:00",
|
||||||
|
"reply_type": null,
|
||||||
|
"trace_id": null,
|
||||||
|
"debug": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"message_id": "agent-user-002",
|
||||||
|
"role": "user",
|
||||||
|
"text": "Сколько исходящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
|
||||||
|
"created_at": "2026-05-22T19:57:37+00:00",
|
||||||
|
"reply_type": null,
|
||||||
|
"trace_id": null,
|
||||||
|
"debug": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"message_id": "agent-user-003",
|
||||||
|
"role": "user",
|
||||||
|
"text": "А всего сколько денег пришло в ООО Альтернатива Плюс за 2020, без топов и без контрагентов?",
|
||||||
|
"created_at": "2026-05-22T19:57:37+00:00",
|
||||||
|
"reply_type": null,
|
||||||
|
"trace_id": null,
|
||||||
|
"debug": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"agent_run": true,
|
||||||
|
"metadata": {
|
||||||
|
"assistant_prompt_version": null,
|
||||||
|
"decomposition_prompt_version": null,
|
||||||
|
"prompt_fingerprint": null,
|
||||||
|
"agent_focus": "Targeted AGENT replay for the current Autonomy Core slice: organization-scoped value-flow questions must be answered through guarded MCP discovery response with hot handoff, not through stale exact fallback or a counterparty-only route.",
|
||||||
|
"architecture_phase": "turnaround_11",
|
||||||
|
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\agent_hot_value_flow_handoff_20260522.json",
|
||||||
|
"scenario_id": "agent_hot_value_flow_handoff_20260522",
|
||||||
|
"semantic_tags": [
|
||||||
|
"autonomy_core",
|
||||||
|
"colloquial_total",
|
||||||
|
"guarded_response",
|
||||||
|
"hot_handoff",
|
||||||
|
"incoming_total",
|
||||||
|
"outgoing_total",
|
||||||
|
"value_flow"
|
||||||
|
],
|
||||||
|
"validation_status": "accepted_live_replay",
|
||||||
|
"validated_run_dir": "artifacts\\domain_runs\\agent_hot_value_flow_handoff_live5",
|
||||||
|
"saved_after_validated_replay": true,
|
||||||
|
"save_gate": {
|
||||||
|
"schema_version": "agent_semantic_save_gate_v1",
|
||||||
|
"validation_status": "accepted_live_replay",
|
||||||
|
"validated_run_dir": "artifacts\\domain_runs\\agent_hot_value_flow_handoff_live5",
|
||||||
|
"final_status": "accepted",
|
||||||
|
"review_overall_status": "pass",
|
||||||
|
"business_overall_status": "pass",
|
||||||
|
"steps_total": 3,
|
||||||
|
"steps_passed": 3,
|
||||||
|
"steps_failed": 0,
|
||||||
|
"steps_with_business_failures": 0,
|
||||||
|
"steps_with_business_warnings": 0,
|
||||||
|
"acceptance_gate_passed": true,
|
||||||
|
"saved_after_validated_replay": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,34 @@
|
||||||
|
{
|
||||||
|
"suite_id": "assistant_saved_session_gen-ag05221957-713bbd",
|
||||||
|
"suite_version": "0.1.0",
|
||||||
|
"schema_version": "assistant_saved_session_suite_v0_1",
|
||||||
|
"generated_at": "2026-05-22T19:57:37+00:00",
|
||||||
|
"generation_id": "gen-ag05221957-713bbd",
|
||||||
|
"mode": "saved_user_sessions",
|
||||||
|
"title": "AGENT | Hot value-flow discovery handoff",
|
||||||
|
"domain": "autonomy_hot_value_flow_handoff",
|
||||||
|
"scenario_count": 1,
|
||||||
|
"case_ids": [
|
||||||
|
"SAVED-001"
|
||||||
|
],
|
||||||
|
"cases": [
|
||||||
|
{
|
||||||
|
"case_id": "SAVED-001",
|
||||||
|
"scenario_tag": "agent_saved_user_sessions",
|
||||||
|
"title": "AGENT | Hot value-flow discovery handoff",
|
||||||
|
"question_type": "followup",
|
||||||
|
"broadness_level": "medium",
|
||||||
|
"turns": [
|
||||||
|
{
|
||||||
|
"user_message": "Сколько входящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"user_message": "Сколько исходящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"user_message": "А всего сколько денег пришло в ООО Альтернатива Плюс за 2020, без топов и без контрагентов?"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
@ -2343,12 +2343,25 @@ def build_scenario_step_state(
|
||||||
),
|
),
|
||||||
"mcp_discovery_route_candidate_next_action": debug.get("mcp_discovery_route_candidate_next_action"),
|
"mcp_discovery_route_candidate_next_action": debug.get("mcp_discovery_route_candidate_next_action"),
|
||||||
"mcp_discovery_response_applied": debug.get("mcp_discovery_response_applied"),
|
"mcp_discovery_response_applied": debug.get("mcp_discovery_response_applied"),
|
||||||
|
"mcp_discovery_hot_runtime_wired": debug.get("mcp_discovery_hot_runtime_wired"),
|
||||||
"mcp_discovery_selected_chain_id": debug.get("mcp_discovery_selected_chain_id"),
|
"mcp_discovery_selected_chain_id": debug.get("mcp_discovery_selected_chain_id"),
|
||||||
|
"mcp_discovery_execution_handoff_status": debug.get("mcp_discovery_execution_handoff_status"),
|
||||||
|
"mcp_discovery_execution_handoff_allowed_hot_chain": debug.get(
|
||||||
|
"mcp_discovery_execution_handoff_allowed_hot_chain"
|
||||||
|
),
|
||||||
|
"mcp_discovery_execution_handoff_can_use_guarded_response": debug.get(
|
||||||
|
"mcp_discovery_execution_handoff_can_use_guarded_response"
|
||||||
|
),
|
||||||
"mcp_discovery_response_candidate_status": (
|
"mcp_discovery_response_candidate_status": (
|
||||||
debug.get("mcp_discovery_response_candidate_v1", {}).get("candidate_status")
|
debug.get("mcp_discovery_response_candidate_v1", {}).get("candidate_status")
|
||||||
if isinstance(debug.get("mcp_discovery_response_candidate_v1"), dict)
|
if isinstance(debug.get("mcp_discovery_response_candidate_v1"), dict)
|
||||||
else None
|
else None
|
||||||
),
|
),
|
||||||
|
"mcp_discovery_response_candidate_hot_runtime_wired": (
|
||||||
|
debug.get("mcp_discovery_response_candidate_v1", {}).get("hot_runtime_wired")
|
||||||
|
if isinstance(debug.get("mcp_discovery_response_candidate_v1"), dict)
|
||||||
|
else None
|
||||||
|
),
|
||||||
"mcp_discovery_response_reply_type": (
|
"mcp_discovery_response_reply_type": (
|
||||||
debug.get("mcp_discovery_response_candidate_v1", {}).get("reply_type")
|
debug.get("mcp_discovery_response_candidate_v1", {}).get("reply_type")
|
||||||
if isinstance(debug.get("mcp_discovery_response_candidate_v1"), dict)
|
if isinstance(debug.get("mcp_discovery_response_candidate_v1"), dict)
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,13 @@ TECHNICAL_QUESTION_FIELDS = (
|
||||||
"expected_catalog_alignment_status",
|
"expected_catalog_alignment_status",
|
||||||
"expected_catalog_chain_top_match",
|
"expected_catalog_chain_top_match",
|
||||||
"expected_catalog_selected_matches_top",
|
"expected_catalog_selected_matches_top",
|
||||||
|
"expected_mcp_discovery_response_applied",
|
||||||
|
"expected_mcp_discovery_selected_chain_id",
|
||||||
|
"expected_mcp_discovery_response_candidate_status",
|
||||||
|
"expected_mcp_discovery_candidate_hot_runtime_wired",
|
||||||
|
"expected_mcp_discovery_hot_runtime_wired",
|
||||||
|
"expected_mcp_discovery_execution_handoff_status",
|
||||||
|
"expected_mcp_discovery_execution_handoff_can_use_guarded_response",
|
||||||
"expected_route_candidate_status",
|
"expected_route_candidate_status",
|
||||||
"expected_route_candidate_executable_now",
|
"expected_route_candidate_executable_now",
|
||||||
"expected_route_candidate_missing_axes",
|
"expected_route_candidate_missing_axes",
|
||||||
|
|
@ -103,6 +110,27 @@ def normalize_step_spec(index: int, raw_step: Any) -> dict[str, Any]:
|
||||||
str(step.get("expected_catalog_chain_top_match") or "").strip() or None
|
str(step.get("expected_catalog_chain_top_match") or "").strip() or None
|
||||||
)
|
)
|
||||||
normalized_step["expected_catalog_selected_matches_top"] = step.get("expected_catalog_selected_matches_top")
|
normalized_step["expected_catalog_selected_matches_top"] = step.get("expected_catalog_selected_matches_top")
|
||||||
|
normalized_step["expected_mcp_discovery_response_applied"] = step.get(
|
||||||
|
"expected_mcp_discovery_response_applied"
|
||||||
|
)
|
||||||
|
normalized_step["expected_mcp_discovery_selected_chain_id"] = (
|
||||||
|
str(step.get("expected_mcp_discovery_selected_chain_id") or "").strip() or None
|
||||||
|
)
|
||||||
|
normalized_step["expected_mcp_discovery_response_candidate_status"] = (
|
||||||
|
str(step.get("expected_mcp_discovery_response_candidate_status") or "").strip() or None
|
||||||
|
)
|
||||||
|
normalized_step["expected_mcp_discovery_candidate_hot_runtime_wired"] = step.get(
|
||||||
|
"expected_mcp_discovery_candidate_hot_runtime_wired"
|
||||||
|
)
|
||||||
|
normalized_step["expected_mcp_discovery_hot_runtime_wired"] = step.get(
|
||||||
|
"expected_mcp_discovery_hot_runtime_wired"
|
||||||
|
)
|
||||||
|
normalized_step["expected_mcp_discovery_execution_handoff_status"] = (
|
||||||
|
str(step.get("expected_mcp_discovery_execution_handoff_status") or "").strip() or None
|
||||||
|
)
|
||||||
|
normalized_step["expected_mcp_discovery_execution_handoff_can_use_guarded_response"] = step.get(
|
||||||
|
"expected_mcp_discovery_execution_handoff_can_use_guarded_response"
|
||||||
|
)
|
||||||
normalized_step["expected_route_candidate_status"] = (
|
normalized_step["expected_route_candidate_status"] = (
|
||||||
str(step.get("expected_route_candidate_status") or "").strip() or None
|
str(step.get("expected_route_candidate_status") or "").strip() or None
|
||||||
)
|
)
|
||||||
|
|
@ -486,6 +514,13 @@ def evaluate_truth_step(
|
||||||
capability_id = str(step_state.get("capability_id") or "").strip()
|
capability_id = str(step_state.get("capability_id") or "").strip()
|
||||||
catalog_alignment_status = str(step_state.get("mcp_discovery_catalog_chain_alignment_status") or "").strip()
|
catalog_alignment_status = str(step_state.get("mcp_discovery_catalog_chain_alignment_status") or "").strip()
|
||||||
catalog_chain_top_match = str(step_state.get("mcp_discovery_catalog_chain_top_match") or "").strip()
|
catalog_chain_top_match = str(step_state.get("mcp_discovery_catalog_chain_top_match") or "").strip()
|
||||||
|
mcp_discovery_selected_chain_id = str(step_state.get("mcp_discovery_selected_chain_id") or "").strip()
|
||||||
|
mcp_discovery_response_candidate_status = str(
|
||||||
|
step_state.get("mcp_discovery_response_candidate_status") or ""
|
||||||
|
).strip()
|
||||||
|
mcp_discovery_execution_handoff_status = str(
|
||||||
|
step_state.get("mcp_discovery_execution_handoff_status") or ""
|
||||||
|
).strip()
|
||||||
route_candidate_status = str(step_state.get("mcp_discovery_route_candidate_status") or "").strip()
|
route_candidate_status = str(step_state.get("mcp_discovery_route_candidate_status") or "").strip()
|
||||||
limited_reason_category = str(step_state.get("limited_reason_category") or "").strip()
|
limited_reason_category = str(step_state.get("limited_reason_category") or "").strip()
|
||||||
extracted_filters = (
|
extracted_filters = (
|
||||||
|
|
@ -569,6 +604,156 @@ def evaluate_truth_step(
|
||||||
expected=expected_catalog_selected_matches_top,
|
expected=expected_catalog_selected_matches_top,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
expected_mcp_discovery_response_applied = normalize_optional_bool(
|
||||||
|
resolve_nested_placeholders(
|
||||||
|
step.get("expected_mcp_discovery_response_applied"),
|
||||||
|
step_results,
|
||||||
|
bindings,
|
||||||
|
runtime_bindings,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if expected_mcp_discovery_response_applied is not None:
|
||||||
|
actual_mcp_discovery_response_applied = step_state.get("mcp_discovery_response_applied") is True
|
||||||
|
if actual_mcp_discovery_response_applied != expected_mcp_discovery_response_applied:
|
||||||
|
append_finding(
|
||||||
|
findings,
|
||||||
|
step,
|
||||||
|
"wrong_mcp_discovery_response_applied",
|
||||||
|
"MCP discovery response replacement flag does not match the expected hot handoff behavior.",
|
||||||
|
actual=actual_mcp_discovery_response_applied,
|
||||||
|
expected=expected_mcp_discovery_response_applied,
|
||||||
|
)
|
||||||
|
|
||||||
|
expected_mcp_discovery_selected_chain_id = str(
|
||||||
|
resolve_nested_placeholders(
|
||||||
|
step.get("expected_mcp_discovery_selected_chain_id"),
|
||||||
|
step_results,
|
||||||
|
bindings,
|
||||||
|
runtime_bindings,
|
||||||
|
)
|
||||||
|
or ""
|
||||||
|
).strip()
|
||||||
|
if (
|
||||||
|
expected_mcp_discovery_selected_chain_id
|
||||||
|
and mcp_discovery_selected_chain_id != expected_mcp_discovery_selected_chain_id
|
||||||
|
):
|
||||||
|
append_finding(
|
||||||
|
findings,
|
||||||
|
step,
|
||||||
|
"wrong_mcp_discovery_selected_chain_id",
|
||||||
|
"MCP discovery selected chain does not match the expected autonomy chain for this step.",
|
||||||
|
actual=mcp_discovery_selected_chain_id or None,
|
||||||
|
expected=expected_mcp_discovery_selected_chain_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
expected_mcp_discovery_response_candidate_status = str(
|
||||||
|
resolve_nested_placeholders(
|
||||||
|
step.get("expected_mcp_discovery_response_candidate_status"),
|
||||||
|
step_results,
|
||||||
|
bindings,
|
||||||
|
runtime_bindings,
|
||||||
|
)
|
||||||
|
or ""
|
||||||
|
).strip()
|
||||||
|
if (
|
||||||
|
expected_mcp_discovery_response_candidate_status
|
||||||
|
and mcp_discovery_response_candidate_status != expected_mcp_discovery_response_candidate_status
|
||||||
|
):
|
||||||
|
append_finding(
|
||||||
|
findings,
|
||||||
|
step,
|
||||||
|
"wrong_mcp_discovery_response_candidate_status",
|
||||||
|
"MCP discovery response candidate status does not match the expected guarded response readiness.",
|
||||||
|
actual=mcp_discovery_response_candidate_status or None,
|
||||||
|
expected=expected_mcp_discovery_response_candidate_status,
|
||||||
|
)
|
||||||
|
|
||||||
|
expected_mcp_discovery_candidate_hot_runtime_wired = normalize_optional_bool(
|
||||||
|
resolve_nested_placeholders(
|
||||||
|
step.get("expected_mcp_discovery_candidate_hot_runtime_wired"),
|
||||||
|
step_results,
|
||||||
|
bindings,
|
||||||
|
runtime_bindings,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if expected_mcp_discovery_candidate_hot_runtime_wired is not None:
|
||||||
|
actual_candidate_hot_runtime_wired = (
|
||||||
|
step_state.get("mcp_discovery_response_candidate_hot_runtime_wired") is True
|
||||||
|
)
|
||||||
|
if actual_candidate_hot_runtime_wired != expected_mcp_discovery_candidate_hot_runtime_wired:
|
||||||
|
append_finding(
|
||||||
|
findings,
|
||||||
|
step,
|
||||||
|
"wrong_mcp_discovery_candidate_hot_runtime_wired",
|
||||||
|
"MCP discovery response candidate hot-runtime flag does not match the expected guarded handoff.",
|
||||||
|
actual=actual_candidate_hot_runtime_wired,
|
||||||
|
expected=expected_mcp_discovery_candidate_hot_runtime_wired,
|
||||||
|
)
|
||||||
|
|
||||||
|
expected_mcp_discovery_hot_runtime_wired = normalize_optional_bool(
|
||||||
|
resolve_nested_placeholders(
|
||||||
|
step.get("expected_mcp_discovery_hot_runtime_wired"),
|
||||||
|
step_results,
|
||||||
|
bindings,
|
||||||
|
runtime_bindings,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if expected_mcp_discovery_hot_runtime_wired is not None:
|
||||||
|
actual_hot_runtime_wired = step_state.get("mcp_discovery_hot_runtime_wired") is True
|
||||||
|
if actual_hot_runtime_wired != expected_mcp_discovery_hot_runtime_wired:
|
||||||
|
append_finding(
|
||||||
|
findings,
|
||||||
|
step,
|
||||||
|
"wrong_mcp_discovery_hot_runtime_wired",
|
||||||
|
"Top-level MCP discovery hot-runtime flag does not match the expected guarded handoff.",
|
||||||
|
actual=actual_hot_runtime_wired,
|
||||||
|
expected=expected_mcp_discovery_hot_runtime_wired,
|
||||||
|
)
|
||||||
|
|
||||||
|
expected_mcp_discovery_execution_handoff_status = str(
|
||||||
|
resolve_nested_placeholders(
|
||||||
|
step.get("expected_mcp_discovery_execution_handoff_status"),
|
||||||
|
step_results,
|
||||||
|
bindings,
|
||||||
|
runtime_bindings,
|
||||||
|
)
|
||||||
|
or ""
|
||||||
|
).strip()
|
||||||
|
if (
|
||||||
|
expected_mcp_discovery_execution_handoff_status
|
||||||
|
and mcp_discovery_execution_handoff_status != expected_mcp_discovery_execution_handoff_status
|
||||||
|
):
|
||||||
|
append_finding(
|
||||||
|
findings,
|
||||||
|
step,
|
||||||
|
"wrong_mcp_discovery_execution_handoff_status",
|
||||||
|
"MCP discovery execution handoff status does not match the expected guarded response status.",
|
||||||
|
actual=mcp_discovery_execution_handoff_status or None,
|
||||||
|
expected=expected_mcp_discovery_execution_handoff_status,
|
||||||
|
)
|
||||||
|
|
||||||
|
expected_mcp_discovery_execution_handoff_can_use_guarded_response = normalize_optional_bool(
|
||||||
|
resolve_nested_placeholders(
|
||||||
|
step.get("expected_mcp_discovery_execution_handoff_can_use_guarded_response"),
|
||||||
|
step_results,
|
||||||
|
bindings,
|
||||||
|
runtime_bindings,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if expected_mcp_discovery_execution_handoff_can_use_guarded_response is not None:
|
||||||
|
actual_can_use_guarded_response = (
|
||||||
|
step_state.get("mcp_discovery_execution_handoff_can_use_guarded_response") is True
|
||||||
|
)
|
||||||
|
if actual_can_use_guarded_response != expected_mcp_discovery_execution_handoff_can_use_guarded_response:
|
||||||
|
append_finding(
|
||||||
|
findings,
|
||||||
|
step,
|
||||||
|
"wrong_mcp_discovery_execution_handoff_guarded_response",
|
||||||
|
"MCP discovery execution handoff guarded-response flag does not match the expected hot path.",
|
||||||
|
actual=actual_can_use_guarded_response,
|
||||||
|
expected=expected_mcp_discovery_execution_handoff_can_use_guarded_response,
|
||||||
|
)
|
||||||
|
|
||||||
expected_route_candidate_status = str(
|
expected_route_candidate_status = str(
|
||||||
resolve_nested_placeholders(
|
resolve_nested_placeholders(
|
||||||
step.get("expected_route_candidate_status"),
|
step.get("expected_route_candidate_status"),
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue