From 72804557aae8974758e18266dd75801a48c90159 Mon Sep 17 00:00:00 2001 From: dctouch Date: Mon, 20 Apr 2026 12:23:53 +0300 Subject: [PATCH] =?UTF-8?q?ARCH:=20=D0=BF=D1=80=D0=BE=D0=B2=D0=B5=D1=80?= =?UTF-8?q?=D0=B8=D1=82=D1=8C=20=D0=B8=20=D1=80=D0=B0=D1=81=D1=88=D0=B8?= =?UTF-8?q?=D1=80=D0=B8=D1=82=D1=8C=20gate=20=D0=BE=D1=82=D0=B2=D0=B5?= =?UTF-8?q?=D1=82=D0=B0=20MCP=20discovery?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...alog_authority_recovery_plan_2026-04-19.md | 42 +++++++ ...s_phase19_mcp_discovery_response_gate.json | 106 ++++++++++++++++++ .../assistantMcpDiscoveryResponsePolicy.js | 16 ++- .../assistantMcpDiscoveryResponsePolicy.ts | 22 +++- .../assistantLivingChatRuntimeAdapter.test.ts | 48 ++++++++ ...ssistantMcpDiscoveryResponsePolicy.test.ts | 24 ++++ 6 files changed, 254 insertions(+), 4 deletions(-) create mode 100644 docs/orchestration/address_truth_harness_phase19_mcp_discovery_response_gate.json diff --git a/docs/ARCH/11 - architecture_turnaround/14 - semantic_dialog_authority_recovery_plan_2026-04-19.md b/docs/ARCH/11 - architecture_turnaround/14 - semantic_dialog_authority_recovery_plan_2026-04-19.md index 410c079..2e660ce 100644 --- a/docs/ARCH/11 - architecture_turnaround/14 - semantic_dialog_authority_recovery_plan_2026-04-19.md +++ b/docs/ARCH/11 - architecture_turnaround/14 - semantic_dialog_authority_recovery_plan_2026-04-19.md @@ -1003,6 +1003,48 @@ Validation: - `npm test -- assistantMcpDiscoveryResponsePolicy.test.ts assistantLivingChatRuntimeAdapter.test.ts assistantMcpDiscoveryResponseCandidate.test.ts assistantMcpDiscoveryDebugAttachment.test.ts` passed 19/19; - `npm run build` passed. +## Progress Update - 2026-04-20 MCP Discovery Response Gate Replay + +The fourteenth implementation slice of Big Block 5 added and executed a targeted AGENT semantic replay for the first guarded MCP discovery answer-replacement gate: + +- `docs/orchestration/address_truth_harness_phase19_mcp_discovery_response_gate.json` +- `assistantMcpDiscoveryResponsePolicy.ts` +- `assistantMcpDiscoveryResponsePolicy.test.ts` +- `assistantLivingChatRuntimeAdapter.test.ts` + +The first live replay exposed a real semantic/runtime gap: + +- the MCP discovery entry point executed successfully; +- the response candidate was `ready_for_guarded_use`; +- the final visible answer still came from `llm_chat` because the policy only applied to deterministic `unsupported_current_turn_meaning_boundary` replies; +- the LLM answer was more fluent but less contract-safe because it used a stale runtime date and did not clearly distinguish inferred 1C activity duration from legal registration age. + +The policy gate was expanded safely. + +It can now apply a guarded candidate when either: + +- the current reply is the deterministic unsupported-current-turn boundary; or +- the current reply source is `llm_chat`, the MCP discovery entry point is `bridge_executed`, `discovery_attempted=true`, `turn_input.should_run_discovery=true`, and the candidate passes all guarded response checks. + +The gate still does not apply to ordinary chat or exact supported routes because those turns do not have a valid discovery-ready entry point and eligible candidate. + +Replay result: + +- first run: `address_truth_harness_phase19_mcp_discovery_response_gate_live` failed 3/4 on the lifecycle step; +- rerun after the policy fix: `address_truth_harness_phase19_mcp_discovery_response_gate_live_rerun1` passed 4/4 with final status `accepted`; +- the lifecycle answer source became `mcp_discovery_response_candidate_guarded`; +- debug confirmed `mcp_discovery_response_applied=true`, `mcp_discovery_entry_status=bridge_executed`, and `mcp_discovery_answer_mode=confirmed_with_bounded_inference`. + +Validation: + +- `npm test -- assistantMcpDiscoveryResponsePolicy.test.ts assistantLivingChatRuntimeAdapter.test.ts assistantMcpDiscoveryResponseCandidate.test.ts assistantMcpDiscoveryDebugAttachment.test.ts` passed 21/21; +- `npm run build` passed; +- `python scripts/domain_truth_harness.py run-live --spec docs/orchestration/address_truth_harness_phase19_mcp_discovery_response_gate.json --output-dir artifacts/domain_runs/address_truth_harness_phase19_mcp_discovery_response_gate_live_rerun1 --timeout-seconds 180` passed 4/4, final status `accepted`. + +Known next quality gap: + +- the guarded candidate is now honest and safe, but it still does not compute and verbalize the exact activity duration such as "5 years N months" from first/latest confirmed rows. That belongs to the next evidence-derivation slice, not to the response gate itself. + ## Execution Rule Do not implement this plan as: diff --git a/docs/orchestration/address_truth_harness_phase19_mcp_discovery_response_gate.json b/docs/orchestration/address_truth_harness_phase19_mcp_discovery_response_gate.json new file mode 100644 index 0000000..9c05626 --- /dev/null +++ b/docs/orchestration/address_truth_harness_phase19_mcp_discovery_response_gate.json @@ -0,0 +1,106 @@ +{ + "schema_version": "domain_truth_harness_spec_v1", + "scenario_id": "address_truth_harness_phase19_mcp_discovery_response_gate", + "domain": "address_phase19_mcp_discovery_response_gate", + "title": "Phase 19 MCP discovery response gate replay", + "description": "Targeted AGENT replay for the first guarded MCP discovery answer-replacement gate. The scenario validates that exact supported routes remain authoritative, unsupported-but-understood counterparty lifecycle questions can use guarded discovered evidence, off-domain living chat is not hijacked, and internal MCP/runtime mechanics do not leak into the user-facing answer.", + "bindings": {}, + "steps": [ + { + "step_id": "step_01_human_smalltalk_sanity", + "title": "Human smalltalk remains living chat and does not expose discovery internals", + "question": "привет, ты на связи?", + "required_answer_patterns_any": [ + "(?i)привет|на связи|готов|помочь" + ], + "forbidden_answer_patterns": [ + "(?i)mcp", + "(?i)runtime_", + "(?i)query_documents", + "(?i)primitive" + ], + "criticality": "important", + "semantic_tags": [ + "human_answer", + "mcp_discovery_gate_sanity" + ] + }, + { + "step_id": "step_02_supported_counterparty_documents_stays_exact", + "title": "Supported counterparty documents route is not replaced by MCP discovery", + "question": "покажи документы по свк за 2020", + "allowed_reply_types": [ + "factual", + "factual_with_explanation" + ], + "expected_intents": [ + "list_documents_by_counterparty" + ], + "required_direct_answer_patterns_any": [ + "(?i)свк|группа свк", + "(?i)документ|поступление|счет|счёт" + ], + "forbidden_direct_answer_patterns": [ + "(?i)mcp", + "(?i)runtime_", + "(?i)query_documents", + "(?i)primitive", + "(?i)точный маршрут.*не подключ" + ], + "criticality": "critical", + "semantic_tags": [ + "counterparty_documents", + "supported_route_not_hijacked_by_mcp_discovery" + ] + }, + { + "step_id": "step_03_counterparty_lifecycle_uses_guarded_discovery", + "title": "Unsupported-but-understood counterparty lifecycle question uses guarded discovery answer", + "question": "сколько лет мы работаем с Группа СВК?", + "required_answer_patterns_all": [ + "(?i)свк", + "(?i)1с|активност|подтвержд", + "(?i)вывод|оцен|инфер|можно оцен", + "(?i)юридическ|регистрац|не подтвержд|не доказ" + ], + "forbidden_answer_patterns": [ + "(?i)точный маршрут.*не подключ", + "(?i)не буду подставлять", + "(?i)query_documents", + "(?i)query_movements", + "(?i)runtime_", + "(?i)planner_", + "(?i)catalog_", + "(?i)primitive" + ], + "criticality": "critical", + "semantic_tags": [ + "mcp_discovery_response_gate", + "counterparty_lifecycle", + "unsupported_current_turn_meaning_boundary" + ] + }, + { + "step_id": "step_04_off_domain_living_chat_not_hijacked", + "title": "Off-domain living chat remains human and is not hijacked by discovery carryover", + "question": "а чем капибара отличается от утки?", + "required_answer_patterns_any": [ + "(?i)капибар.*утк|утк.*капибар", + "(?i)млекопита|птиц|грызун" + ], + "forbidden_answer_patterns": [ + "(?i)свк", + "(?i)контрагент", + "(?i)mcp", + "(?i)query_documents", + "(?i)runtime_", + "(?i)primitive" + ], + "criticality": "critical", + "semantic_tags": [ + "off_domain_living_chat", + "stale_replay_forbidden" + ] + } + ] +} diff --git a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryResponsePolicy.js b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryResponsePolicy.js index 3a64e1d..a99f1f9 100644 --- a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryResponsePolicy.js +++ b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryResponsePolicy.js @@ -65,18 +65,30 @@ function isUnsupportedCurrentTurnBoundary(input) { input.livingChatSource === "deterministic_unsupported_current_turn_boundary" || input.currentReplySource === "deterministic_unsupported_current_turn_boundary"); } +function isDiscoveryReadyChatCandidate(input, entryPoint) { + const turnInput = toRecordObject(entryPoint?.turn_input); + return (entryPoint?.entry_status === "bridge_executed" && + entryPoint.discovery_attempted === true && + turnInput?.should_run_discovery === true && + (input.livingChatSource === "llm_chat" || input.currentReplySource === "llm_chat")); +} function applyAssistantMcpDiscoveryResponsePolicy(input) { const currentReply = String(input.currentReply ?? ""); const currentReplySource = toNonEmptyString(input.currentReplySource) ?? toNonEmptyString(input.livingChatSource) ?? "unknown"; const entryPoint = resolveEntryPoint(input); const candidate = (0, assistantMcpDiscoveryResponseCandidate_1.buildAssistantMcpDiscoveryResponseCandidate)(entryPoint); const reasonCodes = [...candidate.reason_codes]; + const unsupportedBoundary = isUnsupportedCurrentTurnBoundary(input); + const discoveryReadyChatCandidate = isDiscoveryReadyChatCandidate(input, entryPoint); if (!entryPoint) { pushReason(reasonCodes, "mcp_discovery_response_policy_no_entry_point"); } - if (!isUnsupportedCurrentTurnBoundary(input)) { + if (!unsupportedBoundary) { pushReason(reasonCodes, "mcp_discovery_response_policy_not_unsupported_boundary"); } + if (!discoveryReadyChatCandidate) { + pushReason(reasonCodes, "mcp_discovery_response_policy_not_discovery_ready_chat_candidate"); + } if (!ALLOWED_CANDIDATE_STATUSES.has(candidate.candidate_status)) { pushReason(reasonCodes, "mcp_discovery_response_policy_candidate_status_not_allowed"); } @@ -90,7 +102,7 @@ function applyAssistantMcpDiscoveryResponsePolicy(input) { pushReason(reasonCodes, "mcp_discovery_response_policy_candidate_contains_internal_mechanics"); } const canApply = Boolean(entryPoint) && - isUnsupportedCurrentTurnBoundary(input) && + (unsupportedBoundary || discoveryReadyChatCandidate) && ALLOWED_CANDIDATE_STATUSES.has(candidate.candidate_status) && candidate.eligible_for_future_hot_runtime && Boolean(toNonEmptyString(candidate.reply_text)) && diff --git a/llm_normalizer/backend/src/services/assistantMcpDiscoveryResponsePolicy.ts b/llm_normalizer/backend/src/services/assistantMcpDiscoveryResponsePolicy.ts index b24473a..768e90b 100644 --- a/llm_normalizer/backend/src/services/assistantMcpDiscoveryResponsePolicy.ts +++ b/llm_normalizer/backend/src/services/assistantMcpDiscoveryResponsePolicy.ts @@ -109,6 +109,19 @@ function isUnsupportedCurrentTurnBoundary(input: ApplyAssistantMcpDiscoveryRespo ); } +function isDiscoveryReadyChatCandidate( + input: ApplyAssistantMcpDiscoveryResponsePolicyInput, + entryPoint: AssistantMcpDiscoveryRuntimeEntryPointContract | null +): boolean { + const turnInput = toRecordObject(entryPoint?.turn_input); + return ( + entryPoint?.entry_status === "bridge_executed" && + entryPoint.discovery_attempted === true && + turnInput?.should_run_discovery === true && + (input.livingChatSource === "llm_chat" || input.currentReplySource === "llm_chat") + ); +} + export function applyAssistantMcpDiscoveryResponsePolicy( input: ApplyAssistantMcpDiscoveryResponsePolicyInput ): AssistantMcpDiscoveryResponsePolicyResult { @@ -118,13 +131,18 @@ export function applyAssistantMcpDiscoveryResponsePolicy( const entryPoint = resolveEntryPoint(input); const candidate = buildAssistantMcpDiscoveryResponseCandidate(entryPoint); const reasonCodes = [...candidate.reason_codes]; + const unsupportedBoundary = isUnsupportedCurrentTurnBoundary(input); + const discoveryReadyChatCandidate = isDiscoveryReadyChatCandidate(input, entryPoint); if (!entryPoint) { pushReason(reasonCodes, "mcp_discovery_response_policy_no_entry_point"); } - if (!isUnsupportedCurrentTurnBoundary(input)) { + if (!unsupportedBoundary) { pushReason(reasonCodes, "mcp_discovery_response_policy_not_unsupported_boundary"); } + if (!discoveryReadyChatCandidate) { + pushReason(reasonCodes, "mcp_discovery_response_policy_not_discovery_ready_chat_candidate"); + } if (!ALLOWED_CANDIDATE_STATUSES.has(candidate.candidate_status)) { pushReason(reasonCodes, "mcp_discovery_response_policy_candidate_status_not_allowed"); } @@ -140,7 +158,7 @@ export function applyAssistantMcpDiscoveryResponsePolicy( const canApply = Boolean(entryPoint) && - isUnsupportedCurrentTurnBoundary(input) && + (unsupportedBoundary || discoveryReadyChatCandidate) && ALLOWED_CANDIDATE_STATUSES.has(candidate.candidate_status) && candidate.eligible_for_future_hot_runtime && Boolean(toNonEmptyString(candidate.reply_text)) && diff --git a/llm_normalizer/backend/tests/assistantLivingChatRuntimeAdapter.test.ts b/llm_normalizer/backend/tests/assistantLivingChatRuntimeAdapter.test.ts index cdce02e..775f127 100644 --- a/llm_normalizer/backend/tests/assistantLivingChatRuntimeAdapter.test.ts +++ b/llm_normalizer/backend/tests/assistantLivingChatRuntimeAdapter.test.ts @@ -229,6 +229,54 @@ describe("assistant living chat runtime adapter", () => { expect(executeLlmChat).not.toHaveBeenCalled(); }); + it("replaces discovery-ready llm chat business answer with guarded MCP discovery response", async () => { + const executeLlmChat = vi.fn(async () => "stale llm answer with old date"); + const input = buildRuntimeInput({ + userMessage: "how long has svk been active", + modeDecision: { mode: "chat", reason: "non_domain_query_indexed" }, + addressRuntimeMeta: { + mcpDiscoveryRuntimeEntryPoint: { + schema_version: "assistant_mcp_discovery_runtime_entry_point_v1", + policy_owner: "assistantMcpDiscoveryRuntimeEntryPoint", + entry_status: "bridge_executed", + hot_runtime_wired: false, + discovery_attempted: true, + turn_input: { + adapter_status: "ready", + should_run_discovery: true + }, + bridge: { + bridge_status: "answer_draft_ready", + user_facing_response_allowed: true, + business_fact_answer_allowed: true, + requires_user_clarification: false, + answer_draft: { + answer_mode: "confirmed_with_bounded_inference", + headline: "Confirmed scoped answer.", + confirmed_lines: ["Confirmed fact"], + inference_lines: ["Bounded inference"], + unknown_lines: ["Unconfirmed legal fact"], + limitation_lines: [], + next_step_line: null + } + }, + reason_codes: ["runtime_entry_point_bridge_executed"] + } + }, + executeLlmChat + }); + + const output = await runAssistantLivingChatRuntime(input); + + expect(output.handled).toBe(true); + expect(output.chatText).toContain("Confirmed fact"); + expect(output.chatText).not.toContain("old date"); + expect(output.debug?.living_chat_response_source).toBe("mcp_discovery_response_candidate_guarded"); + expect(output.debug?.mcp_discovery_response_applied).toBe(true); + expect(output.debug?.mcp_discovery_entry_status).toBe("bridge_executed"); + expect(executeLlmChat).toHaveBeenCalledTimes(1); + }); + it("adds proactive organization offer on first smalltalk turn when multiple organizations are available", async () => { const resolveDataScopeProbe = vi.fn(async () => ({ status: "resolved", diff --git a/llm_normalizer/backend/tests/assistantMcpDiscoveryResponsePolicy.test.ts b/llm_normalizer/backend/tests/assistantMcpDiscoveryResponsePolicy.test.ts index a0aa298..aef1b93 100644 --- a/llm_normalizer/backend/tests/assistantMcpDiscoveryResponsePolicy.test.ts +++ b/llm_normalizer/backend/tests/assistantMcpDiscoveryResponsePolicy.test.ts @@ -63,6 +63,30 @@ describe("assistant MCP discovery response policy", () => { expect(result.reply_text).toBe("regular chat"); expect(result.reply_source).toBe("llm_chat"); expect(result.reason_codes).toContain("mcp_discovery_response_policy_not_unsupported_boundary"); + expect(result.reason_codes).toContain("mcp_discovery_response_policy_not_discovery_ready_chat_candidate"); + }); + + it("applies a guarded candidate for discovery-ready llm chat business answers", () => { + const result = applyAssistantMcpDiscoveryResponsePolicy({ + currentReply: "stale llm business answer", + currentReplySource: "llm_chat", + modeDecisionReason: "non_domain_query_indexed", + addressRuntimeMeta: { + mcpDiscoveryRuntimeEntryPoint: entryPoint({ + turn_input: { + adapter_status: "ready", + should_run_discovery: true + } + }) + } + }); + + expect(result.applied).toBe(true); + expect(result.decision).toBe("apply_candidate"); + expect(result.reply_source).toBe("mcp_discovery_response_candidate_guarded"); + expect(result.reply_text).toContain("Confirmed fact"); + expect(result.reason_codes).toContain("mcp_discovery_response_policy_not_unsupported_boundary"); + expect(result.reason_codes).not.toContain("mcp_discovery_response_policy_not_discovery_ready_chat_candidate"); }); it("keeps the current reply when the candidate has no grounded text", () => {