Planner Autonomy: закрепить alignment guard для catalog chains

This commit is contained in:
dctouch 2026-05-01 14:47:19 +03:00
parent 417b51096e
commit 67f5e908c9
3 changed files with 151 additions and 2 deletions

View File

@ -228,6 +228,13 @@ Latest validation after structured catalog chain-template alignment verdict:
- `npm.cmd run build`: passed
- graphify rebuild: `5941 nodes`, `12911 edges`, `136 communities`
Latest validation after representative catalog-alignment regression guard:
- targeted planner tests: passed, `37 passed`
- full MCP-discovery suite: passed, `283 passed`, `9 skipped`
- `npm.cmd run build`: passed
- graphify rebuild: `5942 nodes`, `12912 edges`, `140 communities`
## Next Step
The next safe step is still to re-run live replay once the 1C side is actively polling the proxy. In parallel, local-only consolidation can continue by using the alignment verdict to find remaining manual branches where selected chains diverge from reviewed catalog-fabric intent.

View File

@ -94,8 +94,8 @@ Current honest status:
- open-world bounded-autonomy readiness: `~85%`
- Post-F semantic integrity module progress: `~99%` operationally closed, with remaining risk now treated as next-slice discovery rather than an open blocker inside the closed slice
- active inventory-stock breadth slice progress: `100%` for the declared scenario pack, not for arbitrary inventory questions
- Planner Autonomy Consolidation progress: `~84%` for the declared module, with catalog-fabric, value-flow arbitration, lifecycle bounded inference, broad-evaluation bridge, inventory catalog templates, inventory runtime-boundary honesty, exact inventory recipe bridging, unambiguous metadata-surface lane inference, catalog chain-template scoring, structured chain-match contract exposure, runtime/debug propagation, subject-aware bidirectional comparison arbitration, and structured catalog-alignment verdicts validated locally, but live replay for the new bridge is currently blocked by missing active 1C polling and broader unfamiliar 1C asks still need replay-backed growth
- graph snapshot after latest rebuild: `5941 nodes`, `12911 edges`, `136 communities`
- Planner Autonomy Consolidation progress: `~85%` for the declared module, with catalog-fabric, value-flow arbitration, lifecycle bounded inference, broad-evaluation bridge, inventory catalog templates, inventory runtime-boundary honesty, exact inventory recipe bridging, unambiguous metadata-surface lane inference, catalog chain-template scoring, structured chain-match contract exposure, runtime/debug propagation, subject-aware bidirectional comparison arbitration, structured catalog-alignment verdicts, and representative alignment regression guard validated locally, but live replay for the new bridge is currently blocked by missing active 1C polling and broader unfamiliar 1C asks still need replay-backed growth
- graph snapshot after latest rebuild: `5942 nodes`, `12912 edges`, `140 communities`
- current breakpoint:
- the validated hot paths are no longer structurally broken;
- flagship continuity collapse is no longer the primary risk;
@ -145,6 +145,7 @@ Latest live proof now includes:
- structured chain-template runtime/debug propagation accepted locally: runtime/debug slice passed `18/18`; full MCP-discovery slice passed `282/282` with `9` skipped; build passed; graphify rebuilt to `5940 nodes`, `12909 edges`, `137 communities`
- subject-aware bidirectional comparison arbitration accepted locally: planner slice passed `36/36`; full MCP-discovery slice passed `282/282` with `9` skipped; build passed; graphify rebuilt to `5940 nodes`, `12909 edges`, `137 communities`
- structured catalog-alignment verdict accepted locally: planner/runtime/debug slice passed `54/54`; full MCP-discovery slice passed `282/282` with `9` skipped; build passed; graphify rebuilt to `5941 nodes`, `12911 edges`, `136 communities`
- representative catalog-alignment regression guard accepted locally: planner slice passed `37/37`; full MCP-discovery slice passed `283/283` with `9` skipped; build passed; graphify rebuilt to `5942 nodes`, `12912 edges`, `140 communities`
Current architectural reading:

View File

@ -63,6 +63,147 @@ describe("assistant MCP discovery planner", () => {
expect(result.reason_codes).toContain("planner_catalog_chain_template_search_top_value_flow");
});
it("keeps representative graph-selected chains aligned with top catalog template matches", () => {
const graph = (
businessFactFamily: string,
actionFamily: string,
extra: {
subject_candidates?: string[];
comparison_need?: string | null;
ranking_need?: string | null;
} = {}
) => ({
schema_version: "assistant_data_need_graph_v1" as const,
policy_owner: "assistantMcpDiscoveryDataNeedGraph" as const,
subject_candidates: extra.subject_candidates ?? ["SVK"],
business_fact_family: businessFactFamily,
action_family: actionFamily,
aggregation_need: null,
time_scope_need: "explicit_period",
comparison_need: extra.comparison_need ?? null,
ranking_need: extra.ranking_need ?? null,
proof_expectation: "coverage_checked_fact",
clarification_gaps: [],
decomposition_candidates: [],
forbidden_overclaim_flags: ["no_raw_model_claims", "no_unchecked_fact_totals"],
reason_codes: ["data_need_graph_built"]
});
const cases = [
{
name: "value_flow",
expected: "value_flow",
input: {
dataNeedGraph: graph("value_flow", "turnover"),
turnMeaning: {
asked_action_family: "turnover",
explicit_entity_candidates: ["SVK"],
explicit_date_scope: "2020"
}
}
},
{
name: "value_flow_comparison",
expected: "value_flow_comparison",
input: {
dataNeedGraph: graph("value_flow", "net_value_flow", { comparison_need: "incoming_vs_outgoing" }),
turnMeaning: {
asked_action_family: "net_value_flow",
explicit_entity_candidates: ["SVK"],
explicit_date_scope: "2020"
}
}
},
{
name: "value_flow_ranking",
expected: "value_flow_ranking",
input: {
dataNeedGraph: graph("value_flow", "turnover", { subject_candidates: [], ranking_need: "top_desc" }),
turnMeaning: {
asked_action_family: "turnover",
explicit_date_scope: "2020",
explicit_organization_scope: "Org"
}
}
},
{
name: "document_evidence",
expected: "document_evidence",
input: {
dataNeedGraph: graph("document_evidence", "list_documents"),
turnMeaning: {
asked_action_family: "list_documents",
explicit_entity_candidates: ["SVK"],
explicit_date_scope: "2020"
}
}
},
{
name: "movement_evidence",
expected: "movement_evidence",
input: {
dataNeedGraph: graph("movement_evidence", "list_movements"),
turnMeaning: {
asked_action_family: "list_movements",
explicit_entity_candidates: ["SVK"],
explicit_date_scope: "2020"
}
}
},
{
name: "schema_surface",
expected: "metadata_inspection",
input: {
dataNeedGraph: graph("schema_surface", "inspect_catalog", { subject_candidates: [] }),
turnMeaning: {
asked_action_family: "inspect_catalog"
}
}
},
{
name: "entity_resolution",
expected: "entity_resolution",
input: {
dataNeedGraph: graph("entity_grounding", "search_business_entity"),
turnMeaning: {
asked_action_family: "search_business_entity",
explicit_entity_candidates: ["SVK"]
}
}
},
{
name: "lifecycle",
expected: "lifecycle",
input: {
dataNeedGraph: graph("activity_lifecycle", "activity_duration"),
turnMeaning: {
asked_action_family: "activity_duration",
explicit_entity_candidates: ["SVK"]
}
}
},
{
name: "inventory_stock_snapshot",
expected: "inventory_stock_snapshot",
input: {
dataNeedGraph: graph("inventory_stock_snapshot", "stock_snapshot", { subject_candidates: [] }),
turnMeaning: {
asked_action_family: "stock_snapshot",
explicit_date_scope: "2020",
explicit_organization_scope: "Org"
}
}
}
] as const;
for (const item of cases) {
const result = planAssistantMcpDiscovery(item.input);
expect(result.selected_chain_id, item.name).toBe(item.expected);
expect(result.catalog_chain_template_alignment.top_chain_template_match, item.name).toBe(item.expected);
expect(result.catalog_chain_template_alignment.selected_chain_template_rank, item.name).toBe(1);
expect(result.catalog_chain_template_alignment.selected_chain_matches_top, item.name).toBe(true);
}
});
it("keeps a value-flow plan in clarification state when period axis is missing", () => {
const result = planAssistantMcpDiscovery({
turnMeaning: {