From a63742f0d6ebe1519a7ee6d75cb9dc051d1d0915 Mon Sep 17 00:00:00 2001
From: dctouch <support@dctouch.ru>
Date: Fri, 1 May 2026 15:38:06 +0300
Subject: [PATCH] =?UTF-8?q?Planner=20Autonomy:=20=D0=B2=D1=8B=D0=B2=D0=B5?=
 =?UTF-8?q?=D1=81=D1=82=D0=B8=20catalog-alignment=20=D0=B2=20replay=20arti?=
 =?UTF-8?q?facts?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ...anner_autonomy_consolidation_2026-05-01.md |  8 ++-
 .../11 - architecture_turnaround/README.md    |  6 ++-
 scripts/domain_case_loop.py                   |  3 ++
 scripts/domain_truth_harness.py               |  3 ++
 scripts/scenario_acceptance_policy.py         |  6 +++
 scripts/test_domain_case_loop_step_state.py   | 54 +++++++++++++++++++
 scripts/test_scenario_acceptance_policy.py    | 12 +++++
 7 files changed, 89 insertions(+), 3 deletions(-)
 create mode 100644 scripts/test_domain_case_loop_step_state.py

diff --git a/docs/ARCH/11 - architecture_turnaround/20 - planner_autonomy_consolidation_2026-05-01.md b/docs/ARCH/11 - architecture_turnaround/20 - planner_autonomy_consolidation_2026-05-01.md
index 0c2c3de..d957983 100644
--- a/docs/ARCH/11 - architecture_turnaround/20 - planner_autonomy_consolidation_2026-05-01.md	
+++ b/docs/ARCH/11 - architecture_turnaround/20 - planner_autonomy_consolidation_2026-05-01.md	
@@ -121,6 +121,7 @@ The following consolidation step added catalog-level chain-template scoring:
 - `catalog_chain_template_alignment` now records whether the selected chain is the top catalog match, its rank, and whether it appeared in the catalog search results; runtime loop state and debug summary expose the same verdict.
 - planner reason codes now emit stable catalog-alignment telemetry for evaluated top-match, selected-equals-top, selected-lower-rank, selected-outside-match-set, and unscored selected-chain states.
 - `catalog_chain_template_alignment.alignment_status` now carries the same verdict as one enum-like field, and debug summary exposes it as `mcp_discovery_catalog_chain_alignment_status`.
+- `domain_truth_harness` and `scenario_acceptance_policy` now carry the alignment status, top catalog match, and selected-matches-top flag into replay artifacts instead of leaving them buried in raw debug JSON.
 
 ## Why This Matters
 
@@ -251,9 +252,14 @@ Latest validation after explicit catalog-alignment status propagation:
 - `npm.cmd run build`: passed
 - graphify rebuild: `5943 nodes`, `12915 edges`, `136 communities`
 
+Latest validation after truth-harness catalog-alignment artifact surfacing:
+
+- Python replay-tooling tests: passed, `4 passed`
+- graphify rebuild: `5946 nodes`, `12918 edges`, `136 communities`
+
 ## Next Step
 
-The next safe step is still to re-run live replay once the 1C side is actively polling the proxy. In parallel, local-only consolidation can continue by using `alignment_status`, alignment reason-code telemetry, and the representative guard to find remaining manual branches where selected chains diverge from reviewed catalog-fabric intent.
+The next safe step is still to re-run live replay once the 1C side is actively polling the proxy. In parallel, local-only consolidation can continue by using `alignment_status`, alignment reason-code telemetry, truth-harness artifact surfacing, and the representative guard to find remaining manual branches where selected chains diverge from reviewed catalog-fabric intent.
 
 Recommended order:
 
diff --git a/docs/ARCH/11 - architecture_turnaround/README.md b/docs/ARCH/11 - architecture_turnaround/README.md
index 9cb1055..47dbf0b 100644
--- a/docs/ARCH/11 - architecture_turnaround/README.md	
+++ b/docs/ARCH/11 - architecture_turnaround/README.md	
@@ -84,6 +84,7 @@ It now documents a turnaround that is already operational in code, already mater
   - planner/runtime/debug surfaces now expose `catalog_chain_template_alignment`, so semantic replay can see whether selected chains match the catalog top match, fall back to a lower-ranked template, or bypass catalog search;
   - planner reason codes now also emit stable catalog-alignment telemetry, so automated replay review can filter top-match, lower-rank, outside-match, and unscored selected-chain states without hand-parsing debug JSON;
   - catalog-alignment now carries a single `alignment_status` verdict through planner/runtime/debug, making replay divergence detection explicit instead of reconstructing it from booleans;
+  - truth-harness and scenario acceptance artifacts now preserve catalog-alignment status/top-match fields, so AGENT replay review can spot planner-vs-catalog divergence directly in `truth_review.md` and `scenario_acceptance_matrix.json`;
   - explicit-counterparty incoming-vs-outgoing data-need graphs now select the reviewed `value_flow_comparison` chain instead of falling back to generic `value_flow`;
   - live map sync: [20 - planner_autonomy_consolidation_2026-05-01.md](./20%20-%20planner_autonomy_consolidation_2026-05-01.md)
 
@@ -96,8 +97,8 @@ Current honest status:
 - open-world bounded-autonomy readiness: `~85%`
 - Post-F semantic integrity module progress: `~99%` operationally closed, with remaining risk now treated as next-slice discovery rather than an open blocker inside the closed slice
 - active inventory-stock breadth slice progress: `100%` for the declared scenario pack, not for arbitrary inventory questions
-- Planner Autonomy Consolidation progress: `~87%` for the declared module, with catalog-fabric, value-flow arbitration, lifecycle bounded inference, broad-evaluation bridge, inventory catalog templates, inventory runtime-boundary honesty, exact inventory recipe bridging, unambiguous metadata-surface lane inference, catalog chain-template scoring, structured chain-match contract exposure, runtime/debug propagation, subject-aware bidirectional comparison arbitration, structured catalog-alignment verdicts, representative alignment regression guard, catalog-alignment reason-code telemetry, and explicit `alignment_status` propagation validated locally, but live replay for the new bridge is currently blocked by missing active 1C polling and broader unfamiliar 1C asks still need replay-backed growth
-- graph snapshot after latest rebuild: `5943 nodes`, `12915 edges`, `136 communities`
+- Planner Autonomy Consolidation progress: `~88%` for the declared module, with catalog-fabric, value-flow arbitration, lifecycle bounded inference, broad-evaluation bridge, inventory catalog templates, inventory runtime-boundary honesty, exact inventory recipe bridging, unambiguous metadata-surface lane inference, catalog chain-template scoring, structured chain-match contract exposure, runtime/debug propagation, subject-aware bidirectional comparison arbitration, structured catalog-alignment verdicts, representative alignment regression guard, catalog-alignment reason-code telemetry, explicit `alignment_status` propagation, and truth-harness/acceptance-matrix surfacing validated locally, but live replay for the new bridge is currently blocked by missing active 1C polling and broader unfamiliar 1C asks still need replay-backed growth
+- graph snapshot after latest rebuild: `5946 nodes`, `12918 edges`, `136 communities`
 - current breakpoint:
   - the validated hot paths are no longer structurally broken;
   - flagship continuity collapse is no longer the primary risk;
@@ -150,6 +151,7 @@ Latest live proof now includes:
 - representative catalog-alignment regression guard accepted locally: planner slice passed `37/37`; full MCP-discovery slice passed `283/283` with `9` skipped; build passed; graphify rebuilt to `5942 nodes`, `12912 edges`, `140 communities`
 - catalog-alignment reason-code telemetry accepted locally: planner/runtime slice passed `53/53`; full MCP-discovery suite passed `283/283` with `9` skipped; build passed; graphify rebuilt to `5943 nodes`, `12915 edges`, `136 communities`
 - catalog-alignment status verdict accepted locally: planner/runtime/debug slice passed `55/55`; full MCP-discovery suite passed `283/283` with `9` skipped; build passed; graphify rebuilt to `5943 nodes`, `12915 edges`, `136 communities`
+- catalog-alignment replay artifact surfacing accepted locally: Python truth-harness/acceptance tests passed `4/4`; graphify rebuilt to `5946 nodes`, `12918 edges`, `136 communities`
 
 Current architectural reading:
 
diff --git a/scripts/domain_case_loop.py b/scripts/domain_case_loop.py
index 774fb1f..cc082a8 100644
--- a/scripts/domain_case_loop.py
+++ b/scripts/domain_case_loop.py
@@ -1727,6 +1727,9 @@ def build_scenario_step_state(
         "selected_recipe": debug.get("selected_recipe"),
         "capability_id": debug.get("capability_id"),
         "capability_route_mode": debug.get("capability_route_mode"),
+        "mcp_discovery_catalog_chain_alignment_status": debug.get("mcp_discovery_catalog_chain_alignment_status"),
+        "mcp_discovery_catalog_chain_top_match": debug.get("mcp_discovery_catalog_chain_top_match"),
+        "mcp_discovery_catalog_chain_selected_matches_top": debug.get("mcp_discovery_catalog_chain_selected_matches_top"),
         "route_expectation_status": debug.get("route_expectation_status"),
         "result_mode": debug.get("result_mode"),
         "response_type": debug.get("response_type"),
diff --git a/scripts/domain_truth_harness.py b/scripts/domain_truth_harness.py
index 2825b5d..2315f0b 100644
--- a/scripts/domain_truth_harness.py
+++ b/scripts/domain_truth_harness.py
@@ -679,6 +679,9 @@ def build_truth_review_markdown(spec: dict[str, Any], scenario_state: dict[str,
                 f"intent: `{step_state.get('detected_intent') or 'n/a'}`",
                 f"recipe: `{step_state.get('selected_recipe') or 'n/a'}`",
                 f"capability: `{step_state.get('capability_id') or 'n/a'}`",
+                f"catalog_alignment_status: `{step_state.get('mcp_discovery_catalog_chain_alignment_status') or 'n/a'}`",
+                f"catalog_top_match: `{step_state.get('mcp_discovery_catalog_chain_top_match') or 'n/a'}`",
+                f"catalog_selected_matches_top: `{step_state.get('mcp_discovery_catalog_chain_selected_matches_top')}`",
                 f"limited_reason_category: `{step_state.get('limited_reason_category') or 'n/a'}`",
                 f"filters: `{dump_json(step_state.get('extracted_filters') or {})}`",
                 f"direct_answer: {step_state.get('actual_direct_answer') or 'n/a'}",
diff --git a/scripts/scenario_acceptance_policy.py b/scripts/scenario_acceptance_policy.py
index 69c519d..6cbf1da 100644
--- a/scripts/scenario_acceptance_policy.py
+++ b/scripts/scenario_acceptance_policy.py
@@ -198,6 +198,9 @@ def build_scenario_acceptance_matrix(
                 "reply_type": step_state.get("reply_type"),
                 "detected_intent": step_state.get("detected_intent"),
                 "capability_id": step_state.get("capability_id"),
+                "mcp_discovery_catalog_chain_alignment_status": step_state.get("mcp_discovery_catalog_chain_alignment_status"),
+                "mcp_discovery_catalog_chain_top_match": step_state.get("mcp_discovery_catalog_chain_top_match"),
+                "mcp_discovery_catalog_chain_selected_matches_top": step_state.get("mcp_discovery_catalog_chain_selected_matches_top"),
                 "selected_object_step": _has_selected_object_signal(step),
                 "meta_context_step": _has_meta_context_signal(step),
                 "highest_unresolved_priority": highest_priority,
@@ -330,6 +333,9 @@ def build_scenario_acceptance_matrix_markdown(acceptance_matrix: dict[str, Any])
                 f"  review_status: `{row.get('review_status')}`",
                 f"  criticality: `{row.get('criticality')}`",
                 f"  semantic_tags: {', '.join(row.get('semantic_tags') or []) or 'none'}",
+                f"  catalog_alignment_status: `{row.get('mcp_discovery_catalog_chain_alignment_status') or 'n/a'}`",
+                f"  catalog_top_match: `{row.get('mcp_discovery_catalog_chain_top_match') or 'n/a'}`",
+                f"  catalog_selected_matches_top: `{row.get('mcp_discovery_catalog_chain_selected_matches_top')}`",
                 f"  highest_unresolved_priority: `{row.get('highest_unresolved_priority')}`",
                 f"  selected_object_step: `{row.get('selected_object_step')}`",
                 f"  meta_context_step: `{row.get('meta_context_step')}`",
diff --git a/scripts/test_domain_case_loop_step_state.py b/scripts/test_domain_case_loop_step_state.py
new file mode 100644
index 0000000..d2ed7a2
--- /dev/null
+++ b/scripts/test_domain_case_loop_step_state.py
@@ -0,0 +1,54 @@
+from __future__ import annotations
+
+import sys
+import unittest
+from pathlib import Path
+
+
+sys.path.insert(0, str(Path(__file__).resolve().parent))
+
+import domain_case_loop as dcl
+
+
+class DomainCaseLoopStepStateTests(unittest.TestCase):
+    def test_preserves_mcp_catalog_alignment_debug_fields(self) -> None:
+        step_state = dcl.build_scenario_step_state(
+            scenario_id="planner_alignment_demo",
+            domain="planner_autonomy",
+            step={
+                "step_id": "step_01",
+                "title": "Alignment visibility",
+                "depends_on": [],
+                "question_template": "show planner alignment",
+            },
+            step_index=1,
+            question_resolved="show planner alignment",
+            analysis_context={},
+            turn_artifact={
+                "assistant_message": {
+                    "reply_type": "factual",
+                    "text": "Confirmed answer",
+                    "message_id": "msg-1",
+                    "trace_id": "trace-1",
+                },
+                "technical_debug_payload": {
+                    "detected_mode": "address_query",
+                    "detected_intent": "counterparty_turnover",
+                    "selected_recipe": "counterparty_turnover_by_period",
+                    "capability_id": "confirmed_counterparty_turnover",
+                    "mcp_discovery_catalog_chain_alignment_status": "selected_matches_top",
+                    "mcp_discovery_catalog_chain_top_match": "value_flow",
+                    "mcp_discovery_catalog_chain_selected_matches_top": True,
+                },
+                "session_summary": {},
+            },
+            entries=[],
+        )
+
+        self.assertEqual(step_state["mcp_discovery_catalog_chain_alignment_status"], "selected_matches_top")
+        self.assertEqual(step_state["mcp_discovery_catalog_chain_top_match"], "value_flow")
+        self.assertTrue(step_state["mcp_discovery_catalog_chain_selected_matches_top"])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/scripts/test_scenario_acceptance_policy.py b/scripts/test_scenario_acceptance_policy.py
index 63a9d52..0cb5cb4 100644
--- a/scripts/test_scenario_acceptance_policy.py
+++ b/scripts/test_scenario_acceptance_policy.py
@@ -84,6 +84,9 @@ class ScenarioAcceptancePolicyTests(unittest.TestCase):
                     "reply_type": "factual",
                     "detected_intent": "inventory_on_hand_as_of_date",
                     "capability_id": "confirmed_inventory_on_hand_as_of_date",
+                    "mcp_discovery_catalog_chain_alignment_status": "selected_matches_top",
+                    "mcp_discovery_catalog_chain_top_match": "inventory_stock_snapshot",
+                    "mcp_discovery_catalog_chain_selected_matches_top": True,
                     "review_findings": [],
                 }
             },
@@ -104,6 +107,15 @@ class ScenarioAcceptancePolicyTests(unittest.TestCase):
         self.assertTrue(pack_state["acceptance_gate_passed"])
         self.assertTrue(pack_state["critical_path_green"])
         self.assertTrue(all(pack_state["invariants"].values()))
+        self.assertEqual(
+            acceptance_matrix["rows"][0]["mcp_discovery_catalog_chain_alignment_status"],
+            "selected_matches_top",
+        )
+        self.assertEqual(
+            acceptance_matrix["rows"][0]["mcp_discovery_catalog_chain_top_match"],
+            "inventory_stock_snapshot",
+        )
+        self.assertTrue(acceptance_matrix["rows"][0]["mcp_discovery_catalog_chain_selected_matches_top"])
 
     def test_flags_meta_context_integrity_when_meta_step_leaks_technical_answer_shape(self) -> None:
         spec = {