diff --git a/docs/ARCH/11 - architecture_turnaround/20 - planner_autonomy_consolidation_2026-05-01.md b/docs/ARCH/11 - architecture_turnaround/20 - planner_autonomy_consolidation_2026-05-01.md index 9c8a36e..3b2a99d 100644 --- a/docs/ARCH/11 - architecture_turnaround/20 - planner_autonomy_consolidation_2026-05-01.md +++ b/docs/ARCH/11 - architecture_turnaround/20 - planner_autonomy_consolidation_2026-05-01.md @@ -119,6 +119,7 @@ The following consolidation step added catalog-level chain-template scoring: - `assistantMcpDiscoveryPlanner` records the top catalog chain-template match in reason codes and exposes the ranked matches as `catalog_chain_template_matches` in the planner contract while preserving existing guarded execution behavior. - the ranked chain-template matches are now propagated into runtime loop state and debug attachment fields, so replay analysis can inspect catalog-fabric intent without parsing reason-code strings. - `catalog_chain_template_alignment` now records whether the selected chain is the top catalog match, its rank, and whether it appeared in the catalog search results; runtime loop state and debug summary expose the same verdict. +- planner reason codes now emit stable catalog-alignment telemetry for evaluated top-match, selected-equals-top, selected-lower-rank, selected-outside-match-set, and unscored selected-chain states. ## Why This Matters @@ -235,9 +236,16 @@ Latest validation after representative catalog-alignment regression guard: - `npm.cmd run build`: passed - graphify rebuild: `5942 nodes`, `12912 edges`, `140 communities` +Latest validation after catalog-alignment reason-code telemetry: + +- targeted planner/runtime tests: passed, `53 passed` +- full MCP-discovery suite: passed, `283 passed`, `9 skipped` +- `npm.cmd run build`: passed +- graphify rebuild: `5943 nodes`, `12915 edges`, `136 communities` + ## Next Step -The next safe step is still to re-run live replay once the 1C side is actively polling the proxy. In parallel, local-only consolidation can continue by using the alignment verdict to find remaining manual branches where selected chains diverge from reviewed catalog-fabric intent. +The next safe step is still to re-run live replay once the 1C side is actively polling the proxy. In parallel, local-only consolidation can continue by using the alignment verdict and reason-code telemetry to find remaining manual branches where selected chains diverge from reviewed catalog-fabric intent. Recommended order: diff --git a/docs/ARCH/11 - architecture_turnaround/README.md b/docs/ARCH/11 - architecture_turnaround/README.md index 6774169..4230504 100644 --- a/docs/ARCH/11 - architecture_turnaround/README.md +++ b/docs/ARCH/11 - architecture_turnaround/README.md @@ -82,6 +82,7 @@ It now documents a turnaround that is already operational in code, already mater - unambiguous metadata surfaces can now infer the next reviewed lane from `Document.*`, `Register.*`, or `Catalog.*` objects even before upstream labels `downstream_route_family`, while mixed surfaces still do not guess; - catalog index now scores reviewed chain templates directly from fact/action/axis/comparison/ranking needs, and planner/runtime/debug surfaces expose ranked catalog chain matches through the structured `catalog_chain_template_matches` contract path instead of relying only on reason-code strings; - planner/runtime/debug surfaces now expose `catalog_chain_template_alignment`, so semantic replay can see whether selected chains match the catalog top match, fall back to a lower-ranked template, or bypass catalog search; + - planner reason codes now also emit stable catalog-alignment telemetry, so automated replay review can filter top-match, lower-rank, outside-match, and unscored selected-chain states without hand-parsing debug JSON; - explicit-counterparty incoming-vs-outgoing data-need graphs now select the reviewed `value_flow_comparison` chain instead of falling back to generic `value_flow`; - live map sync: [20 - planner_autonomy_consolidation_2026-05-01.md](./20%20-%20planner_autonomy_consolidation_2026-05-01.md) @@ -94,8 +95,8 @@ Current honest status: - open-world bounded-autonomy readiness: `~85%` - Post-F semantic integrity module progress: `~99%` operationally closed, with remaining risk now treated as next-slice discovery rather than an open blocker inside the closed slice - active inventory-stock breadth slice progress: `100%` for the declared scenario pack, not for arbitrary inventory questions -- Planner Autonomy Consolidation progress: `~85%` for the declared module, with catalog-fabric, value-flow arbitration, lifecycle bounded inference, broad-evaluation bridge, inventory catalog templates, inventory runtime-boundary honesty, exact inventory recipe bridging, unambiguous metadata-surface lane inference, catalog chain-template scoring, structured chain-match contract exposure, runtime/debug propagation, subject-aware bidirectional comparison arbitration, structured catalog-alignment verdicts, and representative alignment regression guard validated locally, but live replay for the new bridge is currently blocked by missing active 1C polling and broader unfamiliar 1C asks still need replay-backed growth -- graph snapshot after latest rebuild: `5942 nodes`, `12912 edges`, `140 communities` +- Planner Autonomy Consolidation progress: `~86%` for the declared module, with catalog-fabric, value-flow arbitration, lifecycle bounded inference, broad-evaluation bridge, inventory catalog templates, inventory runtime-boundary honesty, exact inventory recipe bridging, unambiguous metadata-surface lane inference, catalog chain-template scoring, structured chain-match contract exposure, runtime/debug propagation, subject-aware bidirectional comparison arbitration, structured catalog-alignment verdicts, representative alignment regression guard, and catalog-alignment reason-code telemetry validated locally, but live replay for the new bridge is currently blocked by missing active 1C polling and broader unfamiliar 1C asks still need replay-backed growth +- graph snapshot after latest rebuild: `5943 nodes`, `12915 edges`, `136 communities` - current breakpoint: - the validated hot paths are no longer structurally broken; - flagship continuity collapse is no longer the primary risk; @@ -146,6 +147,7 @@ Latest live proof now includes: - subject-aware bidirectional comparison arbitration accepted locally: planner slice passed `36/36`; full MCP-discovery slice passed `282/282` with `9` skipped; build passed; graphify rebuilt to `5940 nodes`, `12909 edges`, `137 communities` - structured catalog-alignment verdict accepted locally: planner/runtime/debug slice passed `54/54`; full MCP-discovery slice passed `282/282` with `9` skipped; build passed; graphify rebuilt to `5941 nodes`, `12911 edges`, `136 communities` - representative catalog-alignment regression guard accepted locally: planner slice passed `37/37`; full MCP-discovery slice passed `283/283` with `9` skipped; build passed; graphify rebuilt to `5942 nodes`, `12912 edges`, `140 communities` +- catalog-alignment reason-code telemetry accepted locally: planner/runtime slice passed `53/53`; full MCP-discovery suite passed `283/283` with `9` skipped; build passed; graphify rebuilt to `5943 nodes`, `12915 edges`, `136 communities` Current architectural reading: diff --git a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPlanner.js b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPlanner.js index ad4583d..2fd39eb 100644 --- a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPlanner.js +++ b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryPlanner.js @@ -40,6 +40,24 @@ function pushAllUnique(target, values) { pushUnique(target, value); } } +function pushCatalogChainTemplateAlignmentReasons(target, alignment) { + if (alignment.top_chain_template_match) { + pushReason(target, "planner_catalog_chain_template_alignment_evaluated"); + if (alignment.selected_chain_matches_top) { + pushReason(target, "planner_selected_chain_matches_catalog_top"); + } + else if (alignment.selected_chain_in_catalog_matches) { + pushReason(target, "planner_selected_chain_uses_lower_rank_catalog_match"); + } + else { + pushReason(target, "planner_selected_chain_outside_catalog_match_set"); + } + return; + } + if (alignment.selected_chain_is_catalog_template) { + pushReason(target, "planner_catalog_chain_template_alignment_unscored"); + } +} const LIFECYCLE_BOUNDED_INFERENCE_REASON_CODES = [ "planner_lifecycle_bounded_activity_window_template", "planner_lifecycle_legal_fact_boundary_required" @@ -1000,6 +1018,7 @@ function planAssistantMcpDiscovery(input) { if (budgetOverride.maxProbeCount) { pushReason(reasonCodes, "planner_enabled_chunked_coverage_probe_budget"); } + pushCatalogChainTemplateAlignmentReasons(reasonCodes, catalogChainTemplateAlignment); const plan = (0, assistantMcpDiscoveryPolicy_1.buildAssistantMcpDiscoveryPlan)({ semanticDataNeed, turnMeaning: input.turnMeaning, diff --git a/llm_normalizer/backend/src/services/assistantMcpDiscoveryPlanner.ts b/llm_normalizer/backend/src/services/assistantMcpDiscoveryPlanner.ts index 57a84bf..2f955a2 100644 --- a/llm_normalizer/backend/src/services/assistantMcpDiscoveryPlanner.ts +++ b/llm_normalizer/backend/src/services/assistantMcpDiscoveryPlanner.ts @@ -142,6 +142,27 @@ function pushAllUnique(target: string[], values: string[]): void { } } +function pushCatalogChainTemplateAlignmentReasons( + target: string[], + alignment: AssistantMcpDiscoveryCatalogChainTemplateAlignment +): void { + if (alignment.top_chain_template_match) { + pushReason(target, "planner_catalog_chain_template_alignment_evaluated"); + if (alignment.selected_chain_matches_top) { + pushReason(target, "planner_selected_chain_matches_catalog_top"); + } else if (alignment.selected_chain_in_catalog_matches) { + pushReason(target, "planner_selected_chain_uses_lower_rank_catalog_match"); + } else { + pushReason(target, "planner_selected_chain_outside_catalog_match_set"); + } + return; + } + + if (alignment.selected_chain_is_catalog_template) { + pushReason(target, "planner_catalog_chain_template_alignment_unscored"); + } +} + const LIFECYCLE_BOUNDED_INFERENCE_REASON_CODES = [ "planner_lifecycle_bounded_activity_window_template", "planner_lifecycle_legal_fact_boundary_required" @@ -1255,6 +1276,7 @@ export function planAssistantMcpDiscovery( if (budgetOverride.maxProbeCount) { pushReason(reasonCodes, "planner_enabled_chunked_coverage_probe_budget"); } + pushCatalogChainTemplateAlignmentReasons(reasonCodes, catalogChainTemplateAlignment); const plan = buildAssistantMcpDiscoveryPlan({ semanticDataNeed, diff --git a/llm_normalizer/backend/tests/assistantMcpDiscoveryPlanner.test.ts b/llm_normalizer/backend/tests/assistantMcpDiscoveryPlanner.test.ts index 5241ceb..35a0040 100644 --- a/llm_normalizer/backend/tests/assistantMcpDiscoveryPlanner.test.ts +++ b/llm_normalizer/backend/tests/assistantMcpDiscoveryPlanner.test.ts @@ -61,6 +61,8 @@ describe("assistant MCP discovery planner", () => { expect(result.reason_codes).toContain("planner_selected_catalog_primitives_from_decomposition_candidates"); expect(result.reason_codes).toContain("planner_scored_catalog_chain_templates_from_fact_axis"); expect(result.reason_codes).toContain("planner_catalog_chain_template_search_top_value_flow"); + expect(result.reason_codes).toContain("planner_catalog_chain_template_alignment_evaluated"); + expect(result.reason_codes).toContain("planner_selected_chain_matches_catalog_top"); }); it("keeps representative graph-selected chains aligned with top catalog template matches", () => { @@ -201,6 +203,7 @@ describe("assistant MCP discovery planner", () => { expect(result.catalog_chain_template_alignment.top_chain_template_match, item.name).toBe(item.expected); expect(result.catalog_chain_template_alignment.selected_chain_template_rank, item.name).toBe(1); expect(result.catalog_chain_template_alignment.selected_chain_matches_top, item.name).toBe(true); + expect(result.reason_codes, item.name).toContain("planner_selected_chain_matches_catalog_top"); } }); @@ -217,6 +220,7 @@ describe("assistant MCP discovery planner", () => { expect(result.catalog_review.review_status).toBe("needs_more_axes"); expect(result.catalog_review.missing_axes_by_primitive.query_movements).toContainEqual(["period", "counterparty"]); expect(result.reason_codes).toContain("planner_needs_more_user_or_scope_context"); + expect(result.reason_codes).toContain("planner_catalog_chain_template_alignment_unscored"); }); it("keeps requested monthly aggregation as an explicit planning axis for value-flow discovery", () => { diff --git a/llm_normalizer/backend/tests/assistantMcpDiscoveryRuntimeBridge.test.ts b/llm_normalizer/backend/tests/assistantMcpDiscoveryRuntimeBridge.test.ts index 7c205e3..6e6a6c8 100644 --- a/llm_normalizer/backend/tests/assistantMcpDiscoveryRuntimeBridge.test.ts +++ b/llm_normalizer/backend/tests/assistantMcpDiscoveryRuntimeBridge.test.ts @@ -149,6 +149,7 @@ describe("assistant MCP discovery runtime bridge", () => { expect(result.loop_state.provided_axes).toContain("aggregate_axis"); expect(result.loop_state.catalog_chain_template_matches[0]).toBe("value_flow_ranking"); expect(result.loop_state.catalog_chain_template_alignment.selected_chain_matches_top).toBe(true); + expect(result.reason_codes).toContain("planner_selected_chain_matches_catalog_top"); expect(result.reason_codes).toContain("runtime_bridge_loop_state_awaiting_clarification"); });