Закрепить целевой AGENT-прогон hot value-flow handoff
This commit is contained in:
parent
e7603a9d29
commit
50d938b8f1
|
|
@ -0,0 +1,157 @@
|
|||
{
|
||||
"schema_version": "domain_truth_harness_spec_v1",
|
||||
"scenario_id": "agent_hot_value_flow_handoff_20260522",
|
||||
"domain": "autonomy_hot_value_flow_handoff",
|
||||
"title": "AGENT | Hot value-flow discovery handoff",
|
||||
"description": "Targeted AGENT replay for the current Autonomy Core slice: organization-scoped value-flow questions must be answered through guarded MCP discovery response with hot handoff, not through stale exact fallback or a counterparty-only route.",
|
||||
"bindings": {},
|
||||
"steps": [
|
||||
{
|
||||
"step_id": "step_01_incoming_total_hot_handoff",
|
||||
"title": "Organization-scoped incoming total uses hot value-flow discovery candidate",
|
||||
"question": "Сколько входящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
|
||||
"allowed_reply_types": [
|
||||
"partial_coverage",
|
||||
"factual_with_explanation",
|
||||
"factual"
|
||||
],
|
||||
"expected_mcp_discovery_response_applied": true,
|
||||
"expected_mcp_discovery_selected_chain_id": "value_flow",
|
||||
"expected_mcp_discovery_response_candidate_status": "ready_for_guarded_use",
|
||||
"expected_mcp_discovery_candidate_hot_runtime_wired": true,
|
||||
"expected_mcp_discovery_hot_runtime_wired": true,
|
||||
"expected_mcp_discovery_execution_handoff_status": "ready_for_guarded_response",
|
||||
"expected_mcp_discovery_execution_handoff_can_use_guarded_response": true,
|
||||
"expected_catalog_alignment_status": "selected_matches_top",
|
||||
"expected_catalog_chain_top_match": "value_flow",
|
||||
"expected_catalog_selected_matches_top": true,
|
||||
"expected_route_candidate_status": "ready_for_reviewed_execution",
|
||||
"expected_route_candidate_executable_now": true,
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)2020",
|
||||
"(?i)входящ|получ|поступ",
|
||||
"(?i)руб"
|
||||
],
|
||||
"required_answer_patterns_any": [
|
||||
"(?i)Альтернатива",
|
||||
"(?i)проверенн",
|
||||
"(?i)1С"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)уточните контрагента",
|
||||
"(?i)по какому контрагенту",
|
||||
"(?i)не найден контрагент",
|
||||
"(?i)runtime_",
|
||||
"(?i)planner_",
|
||||
"(?i)query_movements",
|
||||
"(?i)primitive"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"autonomy_core",
|
||||
"value_flow",
|
||||
"hot_handoff",
|
||||
"guarded_response",
|
||||
"incoming_total"
|
||||
]
|
||||
},
|
||||
{
|
||||
"step_id": "step_02_outgoing_total_hot_handoff",
|
||||
"title": "Organization-scoped outgoing total uses hot value-flow discovery candidate",
|
||||
"question": "Сколько исходящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
|
||||
"allowed_reply_types": [
|
||||
"partial_coverage",
|
||||
"factual_with_explanation",
|
||||
"factual"
|
||||
],
|
||||
"expected_mcp_discovery_response_applied": true,
|
||||
"expected_mcp_discovery_selected_chain_id": "value_flow",
|
||||
"expected_mcp_discovery_response_candidate_status": "ready_for_guarded_use",
|
||||
"expected_mcp_discovery_candidate_hot_runtime_wired": true,
|
||||
"expected_mcp_discovery_hot_runtime_wired": true,
|
||||
"expected_mcp_discovery_execution_handoff_status": "ready_for_guarded_response",
|
||||
"expected_mcp_discovery_execution_handoff_can_use_guarded_response": true,
|
||||
"expected_catalog_alignment_status": "selected_matches_top",
|
||||
"expected_catalog_chain_top_match": "value_flow",
|
||||
"expected_catalog_selected_matches_top": true,
|
||||
"expected_route_candidate_status": "ready_for_reviewed_execution",
|
||||
"expected_route_candidate_executable_now": true,
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)2020",
|
||||
"(?i)исходящ|списан|заплат",
|
||||
"(?i)руб"
|
||||
],
|
||||
"required_answer_patterns_any": [
|
||||
"(?i)Альтернатива",
|
||||
"(?i)проверенн",
|
||||
"(?i)1С"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)уточните контрагента",
|
||||
"(?i)по какому контрагенту",
|
||||
"(?i)не найден контрагент",
|
||||
"(?i)runtime_",
|
||||
"(?i)planner_",
|
||||
"(?i)query_movements",
|
||||
"(?i)primitive"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"autonomy_core",
|
||||
"value_flow",
|
||||
"hot_handoff",
|
||||
"guarded_response",
|
||||
"outgoing_total"
|
||||
]
|
||||
},
|
||||
{
|
||||
"step_id": "step_03_colloquial_money_total_hot_handoff",
|
||||
"title": "Colloquial company money wording still uses hot value-flow discovery candidate",
|
||||
"question": "А всего сколько денег пришло в ООО Альтернатива Плюс за 2020, без топов и без контрагентов?",
|
||||
"allowed_reply_types": [
|
||||
"partial_coverage",
|
||||
"factual_with_explanation",
|
||||
"factual"
|
||||
],
|
||||
"expected_mcp_discovery_response_applied": true,
|
||||
"expected_mcp_discovery_selected_chain_id": "value_flow",
|
||||
"expected_mcp_discovery_response_candidate_status": "ready_for_guarded_use",
|
||||
"expected_mcp_discovery_candidate_hot_runtime_wired": true,
|
||||
"expected_mcp_discovery_hot_runtime_wired": true,
|
||||
"expected_mcp_discovery_execution_handoff_status": "ready_for_guarded_response",
|
||||
"expected_mcp_discovery_execution_handoff_can_use_guarded_response": true,
|
||||
"expected_catalog_alignment_status": "selected_matches_top",
|
||||
"expected_catalog_chain_top_match": "value_flow",
|
||||
"expected_catalog_selected_matches_top": true,
|
||||
"expected_route_candidate_status": "ready_for_reviewed_execution",
|
||||
"expected_route_candidate_executable_now": true,
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)2020",
|
||||
"(?i)пришл|получ|поступ|входящ",
|
||||
"(?i)руб"
|
||||
],
|
||||
"required_answer_patterns_any": [
|
||||
"(?i)Альтернатива",
|
||||
"(?i)проверенн",
|
||||
"(?i)1С"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)уточните контрагента",
|
||||
"(?i)по какому контрагенту",
|
||||
"(?i)не найден контрагент",
|
||||
"(?i)runtime_",
|
||||
"(?i)planner_",
|
||||
"(?i)query_movements",
|
||||
"(?i)primitive"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"autonomy_core",
|
||||
"value_flow",
|
||||
"hot_handoff",
|
||||
"guarded_response",
|
||||
"colloquial_total"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -193,6 +193,12 @@ function rankingNeedFromRawUtterance(value) {
|
|||
if (!text) {
|
||||
return null;
|
||||
}
|
||||
if (/\u0438\u0441\u043a\u043b\u044e\u0447[\p{L}\p{N}_]*\s+\u0442\u043e\u043f/iu.test(text)) {
|
||||
return null;
|
||||
}
|
||||
if (/(?:\u0431\u0435\u0437\s+\u0442\u043e\u043f(?:\u043e\u0432|\u0430)?\b|\u043d\u0435\s+\u0442\u043e\u043f\b|\u0438\u0441\u043a\u043b\u044e\u0447\w*\s+\u0442\u043e\u043f|\u0431\u0435\u0437\s+\u0440\u0435\u0439\u0442\u0438\u043d\u0433\u0430\b|без\s+топ(?:ов|а)?\b|не\s+топ\b|исключ\S*\s+топ|без\s+рейтинга\b)/iu.test(text)) {
|
||||
return null;
|
||||
}
|
||||
if (/(?:\u0442\u043e\u043f[-\s]?\d*|\u0441\u0430\u043c(?:\u044b\u0439|\u0430\u044f|\u043e\u0435|\u044b\u0435)|\u0431\u043e\u043b\u044c\u0448\u0435\s+\u0432\u0441\u0435\u0433\u043e|\u043d\u0430\u0438\u0431\u043e\u043b[\u0435\u0451]\u0435|\u043c\u0430\u043a\u0441\u0438\u043c\u0430\u043b\u044c\u043d|\u043c\u0430\u043a\u0441\u0438\u043c\u0443\u043c|\u043a\u0440\u0443\u043f\u043d\u0435\u0439\u0448|\u043b\u0443\u0447\u0448\u0438\u0439)/iu.test(text)) {
|
||||
return "top_desc";
|
||||
}
|
||||
|
|
|
|||
|
|
@ -68,11 +68,14 @@ function buildAssistantMcpDiscoveryDebugAttachmentFields(input) {
|
|||
const routeCandidate = isRouteCandidateContract(bridge?.route_candidate) ? bridge.route_candidate : null;
|
||||
const executionHandoff = isExecutionHandoffContract(bridge?.execution_handoff) ? bridge.execution_handoff : null;
|
||||
const answerDraft = toRecordObject(bridge?.answer_draft);
|
||||
const hotRuntimeWired = entryPoint?.hot_runtime_wired === true ||
|
||||
bridge?.hot_runtime_wired === true ||
|
||||
executionHandoff?.can_use_guarded_response === true;
|
||||
return {
|
||||
assistant_mcp_discovery_entry_point_v1: entryPoint,
|
||||
mcp_discovery_entry_status: toNonEmptyString(entryPoint?.entry_status),
|
||||
mcp_discovery_attempted: Boolean(entryPoint?.discovery_attempted),
|
||||
mcp_discovery_hot_runtime_wired: false,
|
||||
mcp_discovery_hot_runtime_wired: hotRuntimeWired,
|
||||
mcp_discovery_bridge_status: toNonEmptyString(bridge?.bridge_status),
|
||||
mcp_discovery_selected_chain_id: toNonEmptyString(planner?.selected_chain_id),
|
||||
mcp_discovery_evidence_plan_v1: evidencePlan,
|
||||
|
|
|
|||
|
|
@ -204,12 +204,16 @@ function hasMetadataDiscoveryPriority(input, entryPoint) {
|
|||
}
|
||||
function isOpenScopeValueFlowWithoutSubject(entryPoint) {
|
||||
const graph = readDiscoveryDataNeedGraph(entryPoint);
|
||||
const turnMeaning = readDiscoveryTurnMeaning(entryPoint);
|
||||
const businessFactFamily = toNonEmptyString(graph?.business_fact_family);
|
||||
const subjectCandidates = Array.isArray(graph?.subject_candidates) ? graph.subject_candidates : [];
|
||||
const reasonCodes = Array.isArray(graph?.reason_codes) ? graph.reason_codes : [];
|
||||
const reasonCodes = readStringArray(graph?.reason_codes);
|
||||
const clarificationGaps = readStringArray(graph?.clarification_gaps);
|
||||
const explicitOrganizationScope = toNonEmptyString(turnMeaning?.explicit_organization_scope);
|
||||
return (businessFactFamily === "value_flow" &&
|
||||
subjectCandidates.length === 0 &&
|
||||
reasonCodes.some((reason) => toNonEmptyString(reason) === "data_need_graph_open_scope_total_without_subject"));
|
||||
(reasonCodes.includes("data_need_graph_open_scope_total_without_subject") ||
|
||||
(Boolean(explicitOrganizationScope) && clarificationGaps.includes("subject"))));
|
||||
}
|
||||
function needsOpenScopeValueFlowOrganizationClarification(entryPoint) {
|
||||
const graph = readDiscoveryDataNeedGraph(entryPoint);
|
||||
|
|
@ -440,6 +444,9 @@ function hasRuntimeAdjustedExactReply(input, entryPoint) {
|
|||
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
||||
return false;
|
||||
}
|
||||
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
|
||||
return false;
|
||||
}
|
||||
if (hasEvidenceLaneConflictWithDiscoveryTurnMeaning(input, entryPoint)) {
|
||||
return false;
|
||||
}
|
||||
|
|
@ -463,6 +470,9 @@ function hasRuntimeMatchedExactReply(input, entryPoint) {
|
|||
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
||||
return false;
|
||||
}
|
||||
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
|
||||
return false;
|
||||
}
|
||||
if (hasEvidenceLaneConflictWithDiscoveryTurnMeaning(input, entryPoint)) {
|
||||
return false;
|
||||
}
|
||||
|
|
@ -483,6 +493,9 @@ function hasAlignedFactualAddressReply(input, entryPoint) {
|
|||
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
||||
return false;
|
||||
}
|
||||
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
|
||||
return false;
|
||||
}
|
||||
if (hasSemanticConflictWithDiscoveryTurnMeaning(input, entryPoint)) {
|
||||
return false;
|
||||
}
|
||||
|
|
@ -538,6 +551,9 @@ function hasMatchedFactualAddressContinuationTarget(input, entryPoint) {
|
|||
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
||||
return false;
|
||||
}
|
||||
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
|
||||
return false;
|
||||
}
|
||||
const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent);
|
||||
const dialogContinuationContract = toRecordObject(input.addressRuntimeMeta?.dialogContinuationContract) ??
|
||||
toRecordObject(input.addressRuntimeMeta?.dialog_continuation_contract_v2);
|
||||
|
|
@ -578,6 +594,9 @@ function hasFullConfirmedFactualAddressReply(input, entryPoint) {
|
|||
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
||||
return false;
|
||||
}
|
||||
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
|
||||
return false;
|
||||
}
|
||||
return hasFullConfirmedTruth(input);
|
||||
}
|
||||
function applyAssistantMcpDiscoveryResponsePolicy(input) {
|
||||
|
|
|
|||
|
|
@ -293,6 +293,12 @@ function rankingNeedFromRawUtterance(value: string): string | null {
|
|||
if (!text) {
|
||||
return null;
|
||||
}
|
||||
if (/\u0438\u0441\u043a\u043b\u044e\u0447[\p{L}\p{N}_]*\s+\u0442\u043e\u043f/iu.test(text)) {
|
||||
return null;
|
||||
}
|
||||
if (/(?:\u0431\u0435\u0437\s+\u0442\u043e\u043f(?:\u043e\u0432|\u0430)?\b|\u043d\u0435\s+\u0442\u043e\u043f\b|\u0438\u0441\u043a\u043b\u044e\u0447\w*\s+\u0442\u043e\u043f|\u0431\u0435\u0437\s+\u0440\u0435\u0439\u0442\u0438\u043d\u0433\u0430\b|без\s+топ(?:ов|а)?\b|не\s+топ\b|исключ\S*\s+топ|без\s+рейтинга\b)/iu.test(text)) {
|
||||
return null;
|
||||
}
|
||||
if (/(?:\u0442\u043e\u043f[-\s]?\d*|\u0441\u0430\u043c(?:\u044b\u0439|\u0430\u044f|\u043e\u0435|\u044b\u0435)|\u0431\u043e\u043b\u044c\u0448\u0435\s+\u0432\u0441\u0435\u0433\u043e|\u043d\u0430\u0438\u0431\u043e\u043b[\u0435\u0451]\u0435|\u043c\u0430\u043a\u0441\u0438\u043c\u0430\u043b\u044c\u043d|\u043c\u0430\u043a\u0441\u0438\u043c\u0443\u043c|\u043a\u0440\u0443\u043f\u043d\u0435\u0439\u0448|\u043b\u0443\u0447\u0448\u0438\u0439)/iu.test(text)) {
|
||||
return "top_desc";
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ export interface AssistantMcpDiscoveryDebugAttachmentFields {
|
|||
assistant_mcp_discovery_entry_point_v1: AssistantMcpDiscoveryRuntimeEntryPointContract | null;
|
||||
mcp_discovery_entry_status: string | null;
|
||||
mcp_discovery_attempted: boolean;
|
||||
mcp_discovery_hot_runtime_wired: false;
|
||||
mcp_discovery_hot_runtime_wired: boolean;
|
||||
mcp_discovery_bridge_status: string | null;
|
||||
mcp_discovery_selected_chain_id: string | null;
|
||||
mcp_discovery_evidence_plan_v1: AssistantEvidencePlannerContract | null;
|
||||
|
|
@ -130,12 +130,16 @@ export function buildAssistantMcpDiscoveryDebugAttachmentFields(
|
|||
const routeCandidate = isRouteCandidateContract(bridge?.route_candidate) ? bridge.route_candidate : null;
|
||||
const executionHandoff = isExecutionHandoffContract(bridge?.execution_handoff) ? bridge.execution_handoff : null;
|
||||
const answerDraft = toRecordObject(bridge?.answer_draft);
|
||||
const hotRuntimeWired =
|
||||
entryPoint?.hot_runtime_wired === true ||
|
||||
bridge?.hot_runtime_wired === true ||
|
||||
executionHandoff?.can_use_guarded_response === true;
|
||||
|
||||
return {
|
||||
assistant_mcp_discovery_entry_point_v1: entryPoint,
|
||||
mcp_discovery_entry_status: toNonEmptyString(entryPoint?.entry_status),
|
||||
mcp_discovery_attempted: Boolean(entryPoint?.discovery_attempted),
|
||||
mcp_discovery_hot_runtime_wired: false,
|
||||
mcp_discovery_hot_runtime_wired: hotRuntimeWired,
|
||||
mcp_discovery_bridge_status: toNonEmptyString(bridge?.bridge_status),
|
||||
mcp_discovery_selected_chain_id: toNonEmptyString(planner?.selected_chain_id),
|
||||
mcp_discovery_evidence_plan_v1: evidencePlan,
|
||||
|
|
|
|||
|
|
@ -306,13 +306,17 @@ function isOpenScopeValueFlowWithoutSubject(
|
|||
entryPoint: AssistantMcpDiscoveryRuntimeEntryPointContract | null
|
||||
): boolean {
|
||||
const graph = readDiscoveryDataNeedGraph(entryPoint);
|
||||
const turnMeaning = readDiscoveryTurnMeaning(entryPoint);
|
||||
const businessFactFamily = toNonEmptyString(graph?.business_fact_family);
|
||||
const subjectCandidates = Array.isArray(graph?.subject_candidates) ? graph.subject_candidates : [];
|
||||
const reasonCodes = Array.isArray(graph?.reason_codes) ? graph.reason_codes : [];
|
||||
const reasonCodes = readStringArray(graph?.reason_codes);
|
||||
const clarificationGaps = readStringArray(graph?.clarification_gaps);
|
||||
const explicitOrganizationScope = toNonEmptyString(turnMeaning?.explicit_organization_scope);
|
||||
return (
|
||||
businessFactFamily === "value_flow" &&
|
||||
subjectCandidates.length === 0 &&
|
||||
reasonCodes.some((reason) => toNonEmptyString(reason) === "data_need_graph_open_scope_total_without_subject")
|
||||
(reasonCodes.includes("data_need_graph_open_scope_total_without_subject") ||
|
||||
(Boolean(explicitOrganizationScope) && clarificationGaps.includes("subject")))
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -609,6 +613,9 @@ function hasRuntimeAdjustedExactReply(
|
|||
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
||||
return false;
|
||||
}
|
||||
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
|
||||
return false;
|
||||
}
|
||||
if (hasEvidenceLaneConflictWithDiscoveryTurnMeaning(input, entryPoint)) {
|
||||
return false;
|
||||
}
|
||||
|
|
@ -638,6 +645,9 @@ function hasRuntimeMatchedExactReply(
|
|||
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
||||
return false;
|
||||
}
|
||||
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
|
||||
return false;
|
||||
}
|
||||
if (hasEvidenceLaneConflictWithDiscoveryTurnMeaning(input, entryPoint)) {
|
||||
return false;
|
||||
}
|
||||
|
|
@ -664,6 +674,9 @@ function hasAlignedFactualAddressReply(
|
|||
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
||||
return false;
|
||||
}
|
||||
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
|
||||
return false;
|
||||
}
|
||||
if (hasSemanticConflictWithDiscoveryTurnMeaning(input, entryPoint)) {
|
||||
return false;
|
||||
}
|
||||
|
|
@ -729,6 +742,9 @@ function hasMatchedFactualAddressContinuationTarget(
|
|||
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
||||
return false;
|
||||
}
|
||||
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
|
||||
return false;
|
||||
}
|
||||
const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent);
|
||||
const dialogContinuationContract =
|
||||
toRecordObject(input.addressRuntimeMeta?.dialogContinuationContract) ??
|
||||
|
|
@ -781,6 +797,9 @@ function hasFullConfirmedFactualAddressReply(
|
|||
if (hasMetadataDiscoveryPriority(input, entryPoint)) {
|
||||
return false;
|
||||
}
|
||||
if (hasOpenScopeValueFlowDiscoveryPriority(input, entryPoint)) {
|
||||
return false;
|
||||
}
|
||||
return hasFullConfirmedTruth(input);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -60,6 +60,25 @@ describe("assistant MCP discovery data need graph", () => {
|
|||
);
|
||||
});
|
||||
|
||||
it("does not turn explicit no-top wording into a value-flow ranking", () => {
|
||||
const result = buildAssistantMcpDiscoveryDataNeedGraph({
|
||||
semanticDataNeed: "counterparty value-flow evidence",
|
||||
rawUtterance:
|
||||
"Определить общую сумму поступлений в ООО Альтернатива Плюс за 2020 год, исключая топ-контрагентов и детализацию по контрагентам",
|
||||
turnMeaning: {
|
||||
asked_domain_family: "counterparty_value",
|
||||
asked_action_family: "counterparty_value_or_turnover",
|
||||
explicit_organization_scope: "ООО Альтернатива Плюс",
|
||||
explicit_date_scope: "2020"
|
||||
}
|
||||
});
|
||||
|
||||
expect(result.business_fact_family).toBe("value_flow");
|
||||
expect(result.ranking_need).toBeNull();
|
||||
expect(result.decomposition_candidates).toContain("aggregate_checked_amounts");
|
||||
expect(result.decomposition_candidates).not.toContain("aggregate_ranked_axis_values");
|
||||
});
|
||||
|
||||
it("marks metadata lane choice as a clarification-required graph", () => {
|
||||
const result = buildAssistantMcpDiscoveryDataNeedGraph({
|
||||
semanticDataNeed: "metadata lane clarification",
|
||||
|
|
|
|||
|
|
@ -100,7 +100,7 @@ describe("assistant MCP discovery debug attachment", () => {
|
|||
);
|
||||
expect(debug.mcp_discovery_entry_status).toBe("bridge_executed");
|
||||
expect(debug.mcp_discovery_attempted).toBe(true);
|
||||
expect(debug.mcp_discovery_hot_runtime_wired).toBe(false);
|
||||
expect(debug.mcp_discovery_hot_runtime_wired).toBe(true);
|
||||
expect(debug.mcp_discovery_bridge_status).toBe("answer_draft_ready");
|
||||
expect(debug.mcp_discovery_selected_chain_id).toBe("value_flow");
|
||||
expect(debug.mcp_discovery_evidence_plan_status).toBe("ready_for_execution");
|
||||
|
|
|
|||
|
|
@ -1,4 +1,48 @@
|
|||
[
|
||||
{
|
||||
"generation_id": "gen-ag05221957-713bbd",
|
||||
"created_at": "2026-05-22T19:57:37+00:00",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | Hot value-flow discovery handoff",
|
||||
"count": 3,
|
||||
"domain": "autonomy_hot_value_flow_handoff",
|
||||
"questions": [
|
||||
"Сколько входящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
|
||||
"Сколько исходящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
|
||||
"А всего сколько денег пришло в ООО Альтернатива Плюс за 2020, без топов и без контрагентов?"
|
||||
],
|
||||
"generated_by": "codex_agent",
|
||||
"saved_case_set_file": "assistant_autogen_saved_user_sessions_20260522195737_gen-ag05221957-713bbd.json",
|
||||
"context": {
|
||||
"llm_provider": null,
|
||||
"model": null,
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"autogen_personality_id": null,
|
||||
"autogen_personality_prompt": null,
|
||||
"source_session_id": null,
|
||||
"saved_session_file": "assistant_saved_session_20260522195737_gen-ag05221957-713bbd.json",
|
||||
"saved_case_set_kind": "agent_semantic_scenario",
|
||||
"agent_run": true,
|
||||
"agent_focus": "Targeted AGENT replay for the current Autonomy Core slice: organization-scoped value-flow questions must be answered through guarded MCP discovery response with hot handoff, not through stale exact fallback or a counterparty-only route.",
|
||||
"architecture_phase": "turnaround_11",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\agent_hot_value_flow_handoff_20260522.json",
|
||||
"scenario_id": "agent_hot_value_flow_handoff_20260522",
|
||||
"semantic_tags": [
|
||||
"autonomy_core",
|
||||
"colloquial_total",
|
||||
"guarded_response",
|
||||
"hot_handoff",
|
||||
"incoming_total",
|
||||
"outgoing_total",
|
||||
"value_flow"
|
||||
],
|
||||
"validation_status": "accepted_live_replay",
|
||||
"validated_run_dir": "artifacts\\domain_runs\\agent_hot_value_flow_handoff_live5",
|
||||
"saved_after_validated_replay": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"generation_id": "gen-ag05221319-4035f5",
|
||||
"created_at": "2026-05-22T13:19:31+00:00",
|
||||
|
|
|
|||
|
|
@ -0,0 +1,119 @@
|
|||
{
|
||||
"saved_at": "2026-05-22T19:57:37+00:00",
|
||||
"generation_id": "gen-ag05221957-713bbd",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | Hot value-flow discovery handoff",
|
||||
"agent_run": true,
|
||||
"questions": [
|
||||
"Сколько входящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
|
||||
"Сколько исходящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
|
||||
"А всего сколько денег пришло в ООО Альтернатива Плюс за 2020, без топов и без контрагентов?"
|
||||
],
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "Targeted AGENT replay for the current Autonomy Core slice: organization-scoped value-flow questions must be answered through guarded MCP discovery response with hot handoff, not through stale exact fallback or a counterparty-only route.",
|
||||
"architecture_phase": "turnaround_11",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\agent_hot_value_flow_handoff_20260522.json",
|
||||
"scenario_id": "agent_hot_value_flow_handoff_20260522",
|
||||
"semantic_tags": [
|
||||
"autonomy_core",
|
||||
"colloquial_total",
|
||||
"guarded_response",
|
||||
"hot_handoff",
|
||||
"incoming_total",
|
||||
"outgoing_total",
|
||||
"value_flow"
|
||||
],
|
||||
"validation_status": "accepted_live_replay",
|
||||
"validated_run_dir": "artifacts\\domain_runs\\agent_hot_value_flow_handoff_live5",
|
||||
"saved_after_validated_replay": true,
|
||||
"save_gate": {
|
||||
"schema_version": "agent_semantic_save_gate_v1",
|
||||
"validation_status": "accepted_live_replay",
|
||||
"validated_run_dir": "artifacts\\domain_runs\\agent_hot_value_flow_handoff_live5",
|
||||
"final_status": "accepted",
|
||||
"review_overall_status": "pass",
|
||||
"business_overall_status": "pass",
|
||||
"steps_total": 3,
|
||||
"steps_passed": 3,
|
||||
"steps_failed": 0,
|
||||
"steps_with_business_failures": 0,
|
||||
"steps_with_business_warnings": 0,
|
||||
"acceptance_gate_passed": true,
|
||||
"saved_after_validated_replay": true
|
||||
}
|
||||
},
|
||||
"source_session_id": null,
|
||||
"session": {
|
||||
"session_id": null,
|
||||
"mode": "agent_semantic_run",
|
||||
"items": [
|
||||
{
|
||||
"message_id": "agent-user-001",
|
||||
"role": "user",
|
||||
"text": "Сколько входящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
|
||||
"created_at": "2026-05-22T19:57:37+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-002",
|
||||
"role": "user",
|
||||
"text": "Сколько исходящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
|
||||
"created_at": "2026-05-22T19:57:37+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-003",
|
||||
"role": "user",
|
||||
"text": "А всего сколько денег пришло в ООО Альтернатива Плюс за 2020, без топов и без контрагентов?",
|
||||
"created_at": "2026-05-22T19:57:37+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
}
|
||||
],
|
||||
"agent_run": true,
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "Targeted AGENT replay for the current Autonomy Core slice: organization-scoped value-flow questions must be answered through guarded MCP discovery response with hot handoff, not through stale exact fallback or a counterparty-only route.",
|
||||
"architecture_phase": "turnaround_11",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\agent_hot_value_flow_handoff_20260522.json",
|
||||
"scenario_id": "agent_hot_value_flow_handoff_20260522",
|
||||
"semantic_tags": [
|
||||
"autonomy_core",
|
||||
"colloquial_total",
|
||||
"guarded_response",
|
||||
"hot_handoff",
|
||||
"incoming_total",
|
||||
"outgoing_total",
|
||||
"value_flow"
|
||||
],
|
||||
"validation_status": "accepted_live_replay",
|
||||
"validated_run_dir": "artifacts\\domain_runs\\agent_hot_value_flow_handoff_live5",
|
||||
"saved_after_validated_replay": true,
|
||||
"save_gate": {
|
||||
"schema_version": "agent_semantic_save_gate_v1",
|
||||
"validation_status": "accepted_live_replay",
|
||||
"validated_run_dir": "artifacts\\domain_runs\\agent_hot_value_flow_handoff_live5",
|
||||
"final_status": "accepted",
|
||||
"review_overall_status": "pass",
|
||||
"business_overall_status": "pass",
|
||||
"steps_total": 3,
|
||||
"steps_passed": 3,
|
||||
"steps_failed": 0,
|
||||
"steps_with_business_failures": 0,
|
||||
"steps_with_business_warnings": 0,
|
||||
"acceptance_gate_passed": true,
|
||||
"saved_after_validated_replay": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
{
|
||||
"suite_id": "assistant_saved_session_gen-ag05221957-713bbd",
|
||||
"suite_version": "0.1.0",
|
||||
"schema_version": "assistant_saved_session_suite_v0_1",
|
||||
"generated_at": "2026-05-22T19:57:37+00:00",
|
||||
"generation_id": "gen-ag05221957-713bbd",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | Hot value-flow discovery handoff",
|
||||
"domain": "autonomy_hot_value_flow_handoff",
|
||||
"scenario_count": 1,
|
||||
"case_ids": [
|
||||
"SAVED-001"
|
||||
],
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "SAVED-001",
|
||||
"scenario_tag": "agent_saved_user_sessions",
|
||||
"title": "AGENT | Hot value-flow discovery handoff",
|
||||
"question_type": "followup",
|
||||
"broadness_level": "medium",
|
||||
"turns": [
|
||||
{
|
||||
"user_message": "Сколько входящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?"
|
||||
},
|
||||
{
|
||||
"user_message": "Сколько исходящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?"
|
||||
},
|
||||
{
|
||||
"user_message": "А всего сколько денег пришло в ООО Альтернатива Плюс за 2020, без топов и без контрагентов?"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -2343,12 +2343,25 @@ def build_scenario_step_state(
|
|||
),
|
||||
"mcp_discovery_route_candidate_next_action": debug.get("mcp_discovery_route_candidate_next_action"),
|
||||
"mcp_discovery_response_applied": debug.get("mcp_discovery_response_applied"),
|
||||
"mcp_discovery_hot_runtime_wired": debug.get("mcp_discovery_hot_runtime_wired"),
|
||||
"mcp_discovery_selected_chain_id": debug.get("mcp_discovery_selected_chain_id"),
|
||||
"mcp_discovery_execution_handoff_status": debug.get("mcp_discovery_execution_handoff_status"),
|
||||
"mcp_discovery_execution_handoff_allowed_hot_chain": debug.get(
|
||||
"mcp_discovery_execution_handoff_allowed_hot_chain"
|
||||
),
|
||||
"mcp_discovery_execution_handoff_can_use_guarded_response": debug.get(
|
||||
"mcp_discovery_execution_handoff_can_use_guarded_response"
|
||||
),
|
||||
"mcp_discovery_response_candidate_status": (
|
||||
debug.get("mcp_discovery_response_candidate_v1", {}).get("candidate_status")
|
||||
if isinstance(debug.get("mcp_discovery_response_candidate_v1"), dict)
|
||||
else None
|
||||
),
|
||||
"mcp_discovery_response_candidate_hot_runtime_wired": (
|
||||
debug.get("mcp_discovery_response_candidate_v1", {}).get("hot_runtime_wired")
|
||||
if isinstance(debug.get("mcp_discovery_response_candidate_v1"), dict)
|
||||
else None
|
||||
),
|
||||
"mcp_discovery_response_reply_type": (
|
||||
debug.get("mcp_discovery_response_candidate_v1", {}).get("reply_type")
|
||||
if isinstance(debug.get("mcp_discovery_response_candidate_v1"), dict)
|
||||
|
|
|
|||
|
|
@ -28,6 +28,13 @@ TECHNICAL_QUESTION_FIELDS = (
|
|||
"expected_catalog_alignment_status",
|
||||
"expected_catalog_chain_top_match",
|
||||
"expected_catalog_selected_matches_top",
|
||||
"expected_mcp_discovery_response_applied",
|
||||
"expected_mcp_discovery_selected_chain_id",
|
||||
"expected_mcp_discovery_response_candidate_status",
|
||||
"expected_mcp_discovery_candidate_hot_runtime_wired",
|
||||
"expected_mcp_discovery_hot_runtime_wired",
|
||||
"expected_mcp_discovery_execution_handoff_status",
|
||||
"expected_mcp_discovery_execution_handoff_can_use_guarded_response",
|
||||
"expected_route_candidate_status",
|
||||
"expected_route_candidate_executable_now",
|
||||
"expected_route_candidate_missing_axes",
|
||||
|
|
@ -103,6 +110,27 @@ def normalize_step_spec(index: int, raw_step: Any) -> dict[str, Any]:
|
|||
str(step.get("expected_catalog_chain_top_match") or "").strip() or None
|
||||
)
|
||||
normalized_step["expected_catalog_selected_matches_top"] = step.get("expected_catalog_selected_matches_top")
|
||||
normalized_step["expected_mcp_discovery_response_applied"] = step.get(
|
||||
"expected_mcp_discovery_response_applied"
|
||||
)
|
||||
normalized_step["expected_mcp_discovery_selected_chain_id"] = (
|
||||
str(step.get("expected_mcp_discovery_selected_chain_id") or "").strip() or None
|
||||
)
|
||||
normalized_step["expected_mcp_discovery_response_candidate_status"] = (
|
||||
str(step.get("expected_mcp_discovery_response_candidate_status") or "").strip() or None
|
||||
)
|
||||
normalized_step["expected_mcp_discovery_candidate_hot_runtime_wired"] = step.get(
|
||||
"expected_mcp_discovery_candidate_hot_runtime_wired"
|
||||
)
|
||||
normalized_step["expected_mcp_discovery_hot_runtime_wired"] = step.get(
|
||||
"expected_mcp_discovery_hot_runtime_wired"
|
||||
)
|
||||
normalized_step["expected_mcp_discovery_execution_handoff_status"] = (
|
||||
str(step.get("expected_mcp_discovery_execution_handoff_status") or "").strip() or None
|
||||
)
|
||||
normalized_step["expected_mcp_discovery_execution_handoff_can_use_guarded_response"] = step.get(
|
||||
"expected_mcp_discovery_execution_handoff_can_use_guarded_response"
|
||||
)
|
||||
normalized_step["expected_route_candidate_status"] = (
|
||||
str(step.get("expected_route_candidate_status") or "").strip() or None
|
||||
)
|
||||
|
|
@ -486,6 +514,13 @@ def evaluate_truth_step(
|
|||
capability_id = str(step_state.get("capability_id") or "").strip()
|
||||
catalog_alignment_status = str(step_state.get("mcp_discovery_catalog_chain_alignment_status") or "").strip()
|
||||
catalog_chain_top_match = str(step_state.get("mcp_discovery_catalog_chain_top_match") or "").strip()
|
||||
mcp_discovery_selected_chain_id = str(step_state.get("mcp_discovery_selected_chain_id") or "").strip()
|
||||
mcp_discovery_response_candidate_status = str(
|
||||
step_state.get("mcp_discovery_response_candidate_status") or ""
|
||||
).strip()
|
||||
mcp_discovery_execution_handoff_status = str(
|
||||
step_state.get("mcp_discovery_execution_handoff_status") or ""
|
||||
).strip()
|
||||
route_candidate_status = str(step_state.get("mcp_discovery_route_candidate_status") or "").strip()
|
||||
limited_reason_category = str(step_state.get("limited_reason_category") or "").strip()
|
||||
extracted_filters = (
|
||||
|
|
@ -569,6 +604,156 @@ def evaluate_truth_step(
|
|||
expected=expected_catalog_selected_matches_top,
|
||||
)
|
||||
|
||||
expected_mcp_discovery_response_applied = normalize_optional_bool(
|
||||
resolve_nested_placeholders(
|
||||
step.get("expected_mcp_discovery_response_applied"),
|
||||
step_results,
|
||||
bindings,
|
||||
runtime_bindings,
|
||||
)
|
||||
)
|
||||
if expected_mcp_discovery_response_applied is not None:
|
||||
actual_mcp_discovery_response_applied = step_state.get("mcp_discovery_response_applied") is True
|
||||
if actual_mcp_discovery_response_applied != expected_mcp_discovery_response_applied:
|
||||
append_finding(
|
||||
findings,
|
||||
step,
|
||||
"wrong_mcp_discovery_response_applied",
|
||||
"MCP discovery response replacement flag does not match the expected hot handoff behavior.",
|
||||
actual=actual_mcp_discovery_response_applied,
|
||||
expected=expected_mcp_discovery_response_applied,
|
||||
)
|
||||
|
||||
expected_mcp_discovery_selected_chain_id = str(
|
||||
resolve_nested_placeholders(
|
||||
step.get("expected_mcp_discovery_selected_chain_id"),
|
||||
step_results,
|
||||
bindings,
|
||||
runtime_bindings,
|
||||
)
|
||||
or ""
|
||||
).strip()
|
||||
if (
|
||||
expected_mcp_discovery_selected_chain_id
|
||||
and mcp_discovery_selected_chain_id != expected_mcp_discovery_selected_chain_id
|
||||
):
|
||||
append_finding(
|
||||
findings,
|
||||
step,
|
||||
"wrong_mcp_discovery_selected_chain_id",
|
||||
"MCP discovery selected chain does not match the expected autonomy chain for this step.",
|
||||
actual=mcp_discovery_selected_chain_id or None,
|
||||
expected=expected_mcp_discovery_selected_chain_id,
|
||||
)
|
||||
|
||||
expected_mcp_discovery_response_candidate_status = str(
|
||||
resolve_nested_placeholders(
|
||||
step.get("expected_mcp_discovery_response_candidate_status"),
|
||||
step_results,
|
||||
bindings,
|
||||
runtime_bindings,
|
||||
)
|
||||
or ""
|
||||
).strip()
|
||||
if (
|
||||
expected_mcp_discovery_response_candidate_status
|
||||
and mcp_discovery_response_candidate_status != expected_mcp_discovery_response_candidate_status
|
||||
):
|
||||
append_finding(
|
||||
findings,
|
||||
step,
|
||||
"wrong_mcp_discovery_response_candidate_status",
|
||||
"MCP discovery response candidate status does not match the expected guarded response readiness.",
|
||||
actual=mcp_discovery_response_candidate_status or None,
|
||||
expected=expected_mcp_discovery_response_candidate_status,
|
||||
)
|
||||
|
||||
expected_mcp_discovery_candidate_hot_runtime_wired = normalize_optional_bool(
|
||||
resolve_nested_placeholders(
|
||||
step.get("expected_mcp_discovery_candidate_hot_runtime_wired"),
|
||||
step_results,
|
||||
bindings,
|
||||
runtime_bindings,
|
||||
)
|
||||
)
|
||||
if expected_mcp_discovery_candidate_hot_runtime_wired is not None:
|
||||
actual_candidate_hot_runtime_wired = (
|
||||
step_state.get("mcp_discovery_response_candidate_hot_runtime_wired") is True
|
||||
)
|
||||
if actual_candidate_hot_runtime_wired != expected_mcp_discovery_candidate_hot_runtime_wired:
|
||||
append_finding(
|
||||
findings,
|
||||
step,
|
||||
"wrong_mcp_discovery_candidate_hot_runtime_wired",
|
||||
"MCP discovery response candidate hot-runtime flag does not match the expected guarded handoff.",
|
||||
actual=actual_candidate_hot_runtime_wired,
|
||||
expected=expected_mcp_discovery_candidate_hot_runtime_wired,
|
||||
)
|
||||
|
||||
expected_mcp_discovery_hot_runtime_wired = normalize_optional_bool(
|
||||
resolve_nested_placeholders(
|
||||
step.get("expected_mcp_discovery_hot_runtime_wired"),
|
||||
step_results,
|
||||
bindings,
|
||||
runtime_bindings,
|
||||
)
|
||||
)
|
||||
if expected_mcp_discovery_hot_runtime_wired is not None:
|
||||
actual_hot_runtime_wired = step_state.get("mcp_discovery_hot_runtime_wired") is True
|
||||
if actual_hot_runtime_wired != expected_mcp_discovery_hot_runtime_wired:
|
||||
append_finding(
|
||||
findings,
|
||||
step,
|
||||
"wrong_mcp_discovery_hot_runtime_wired",
|
||||
"Top-level MCP discovery hot-runtime flag does not match the expected guarded handoff.",
|
||||
actual=actual_hot_runtime_wired,
|
||||
expected=expected_mcp_discovery_hot_runtime_wired,
|
||||
)
|
||||
|
||||
expected_mcp_discovery_execution_handoff_status = str(
|
||||
resolve_nested_placeholders(
|
||||
step.get("expected_mcp_discovery_execution_handoff_status"),
|
||||
step_results,
|
||||
bindings,
|
||||
runtime_bindings,
|
||||
)
|
||||
or ""
|
||||
).strip()
|
||||
if (
|
||||
expected_mcp_discovery_execution_handoff_status
|
||||
and mcp_discovery_execution_handoff_status != expected_mcp_discovery_execution_handoff_status
|
||||
):
|
||||
append_finding(
|
||||
findings,
|
||||
step,
|
||||
"wrong_mcp_discovery_execution_handoff_status",
|
||||
"MCP discovery execution handoff status does not match the expected guarded response status.",
|
||||
actual=mcp_discovery_execution_handoff_status or None,
|
||||
expected=expected_mcp_discovery_execution_handoff_status,
|
||||
)
|
||||
|
||||
expected_mcp_discovery_execution_handoff_can_use_guarded_response = normalize_optional_bool(
|
||||
resolve_nested_placeholders(
|
||||
step.get("expected_mcp_discovery_execution_handoff_can_use_guarded_response"),
|
||||
step_results,
|
||||
bindings,
|
||||
runtime_bindings,
|
||||
)
|
||||
)
|
||||
if expected_mcp_discovery_execution_handoff_can_use_guarded_response is not None:
|
||||
actual_can_use_guarded_response = (
|
||||
step_state.get("mcp_discovery_execution_handoff_can_use_guarded_response") is True
|
||||
)
|
||||
if actual_can_use_guarded_response != expected_mcp_discovery_execution_handoff_can_use_guarded_response:
|
||||
append_finding(
|
||||
findings,
|
||||
step,
|
||||
"wrong_mcp_discovery_execution_handoff_guarded_response",
|
||||
"MCP discovery execution handoff guarded-response flag does not match the expected hot path.",
|
||||
actual=actual_can_use_guarded_response,
|
||||
expected=expected_mcp_discovery_execution_handoff_can_use_guarded_response,
|
||||
)
|
||||
|
||||
expected_route_candidate_status = str(
|
||||
resolve_nested_placeholders(
|
||||
step.get("expected_route_candidate_status"),
|
||||
|
|
|
|||
Loading…
Reference in New Issue