Planner Autonomy: скрыть внутренние MCP-ошибки в checked-source ответах
This commit is contained in:
parent
f6846206ad
commit
472d982486
|
|
@ -129,6 +129,8 @@ The following consolidation step added catalog-level chain-template scoring:
|
|||
- `address_truth_harness_phase32_planner_selected_chain_end_to_end.json` now uses the same assertions across selected-counterparty entity grounding, incoming/outgoing/net value-flow, document evidence, and movement evidence follow-ups.
|
||||
- `agent_semantic_pack_builder` now preserves these expected catalog-alignment fields in the reusable source catalog and adds the `planner_catalog_alignment` tag, so future mixed AGENT packs can deliberately select planner-brain regression probes instead of relying on hand-picked replay filenames.
|
||||
- The new `turnaround_11_planner_brain_alignment_mix` builder recipe generates `address_truth_harness_phase83_planner_brain_alignment_mix.json`, a 20-step mixed canary that crosses selected-counterparty value-flow, open-organization totals/comparison/ranking, broad-evaluation continuity, metadata drilldown, and off-domain living-chat safety.
|
||||
- The phase83 live replay now confirms that selected chains match the reviewed catalog top match across the mixed planner-brain pack. The remaining failure is not catalog arbitration: 1C/MCP fetches abort before confirmed business evidence is available.
|
||||
- Checked-source failure replies now sanitize raw MCP transport/internal continuation strings from the user-facing answer while keeping the raw diagnostics in technical debug payloads.
|
||||
|
||||
## Why This Matters
|
||||
|
||||
|
|
@ -297,9 +299,19 @@ Latest validation after phase83 mixed planner-brain spec generation:
|
|||
- regenerated `agent_semantic_source_catalog.*`: `planner_catalog_alignment` is visible with `26` reusable entries, including phase32, phase66, and phase83 probes
|
||||
- graphify rebuild: `5952 nodes`, `12927 edges`, `138 communities`
|
||||
|
||||
Latest validation after phase83 live replay and checked-source error sanitation:
|
||||
|
||||
- live phase83 after backend restart: `phase83_planner_brain_alignment_live_20260501_rerun2` and `phase83_planner_brain_alignment_live_20260501_rerun3` both ended `partial`, with `8/20` pass, `2` warning, `10` fail
|
||||
- phase83 invariant result: `catalog_alignment_ok=true`, `direct_answer_ok=true`, `temporal_honesty_ok=true`, `truth_gate_ok=true`
|
||||
- phase83 remaining blocker: confirmed business facts fail because 1C/MCP fetches abort before evidence is returned; this is not a planner/catalog top-match regression
|
||||
- targeted sanitation tests: `assistantMcpDiscoveryAnswerAdapter.test.ts`, `assistantMcpDiscoveryResponseCandidate.test.ts`, and `assistantMcpDiscoveryResponsePolicy.test.ts` passed `61/61` with `1` skipped
|
||||
- `npm.cmd run build`: passed
|
||||
- short live sanity `phase83_first2_sanity_live_20260501_errorfilter`: user-facing assistant answer no longer exposes raw `MCP fetch failed`, `This operation was aborted`, or `Entity-resolution could not continue`; those remain only in technical debug
|
||||
- graphify rebuild: `5953 nodes`, `12930 edges`, `137 communities`
|
||||
|
||||
## Next Step
|
||||
|
||||
The next safe step is still to re-run live replay once the 1C side is actively polling the proxy. The first live replay candidate should be `address_truth_harness_phase83_planner_brain_alignment_mix.json`; only after it is executed, reviewed semantically, fixed/rerun if needed, and accepted should it be saved into autoruns as a legacy AGENT pack. In parallel, local-only consolidation can continue by hardening additional planner-autonomy specs with expected catalog-chain assertions and using `alignment_status`, alignment reason-code telemetry, truth-harness artifact surfacing, the soft divergence warning, `catalog_alignment_ok`, and the representative guard to find remaining manual branches where selected chains diverge from reviewed catalog-fabric intent.
|
||||
The next safe step is to restore/verify the 1C polling side and re-run `address_truth_harness_phase83_planner_brain_alignment_mix.json` as the main live acceptance candidate. The planner-brain invariant is now proven in replay (`catalog_alignment_ok=true`); the remaining acceptance gap is confirmed business evidence. Only after phase83 is semantically accepted should it be saved into autoruns as a legacy AGENT pack. In parallel, local-only consolidation can continue by hardening additional planner-autonomy specs with expected catalog-chain assertions and using `alignment_status`, alignment reason-code telemetry, truth-harness artifact surfacing, the soft divergence warning, `catalog_alignment_ok`, and the representative guard to find remaining manual branches where selected chains diverge from reviewed catalog-fabric intent.
|
||||
|
||||
Recommended order:
|
||||
|
||||
|
|
|
|||
|
|
@ -92,6 +92,8 @@ It now documents a turnaround that is already operational in code, already mater
|
|||
- the phase32 selected-counterparty chain spec now asserts expected catalog-chain top matches across entity grounding, incoming/outgoing/net value-flow, document evidence, and movement evidence follow-ups;
|
||||
- AGENT semantic source catalog generation now preserves expected catalog-alignment fields and tags reusable steps as `planner_catalog_alignment`, so mixed pack construction can find planner-brain regression probes explicitly;
|
||||
- phase83 planner-brain mixed replay spec is now generated from the AGENT source catalog and interleaves selected-counterparty catalog alignment, open-organization money flow/ranking, broad-evaluation continuity, metadata drilldown, and off-domain living-chat safety;
|
||||
- phase83 live replay now proves the catalog-alignment invariant across the mixed pack (`catalog_alignment_ok=true`) even while business answers remain partial because 1C/MCP fetches abort before confirmed evidence is returned;
|
||||
- checked-source failure answers now keep raw MCP transport/internal continuation errors out of the user-facing layer while preserving those details in technical debug artifacts;
|
||||
- explicit-counterparty incoming-vs-outgoing data-need graphs now select the reviewed `value_flow_comparison` chain instead of falling back to generic `value_flow`;
|
||||
- live map sync: [20 - planner_autonomy_consolidation_2026-05-01.md](./20%20-%20planner_autonomy_consolidation_2026-05-01.md)
|
||||
|
||||
|
|
@ -104,8 +106,8 @@ Current honest status:
|
|||
- open-world bounded-autonomy readiness: `~85%`
|
||||
- Post-F semantic integrity module progress: `~99%` operationally closed, with remaining risk now treated as next-slice discovery rather than an open blocker inside the closed slice
|
||||
- active inventory-stock breadth slice progress: `100%` for the declared scenario pack, not for arbitrary inventory questions
|
||||
- Planner Autonomy Consolidation progress: `~94%` for the declared module, with catalog-fabric, value-flow arbitration, lifecycle bounded inference, broad-evaluation bridge, inventory catalog templates, inventory runtime-boundary honesty, exact inventory recipe bridging, unambiguous metadata-surface lane inference, catalog chain-template scoring, structured chain-match contract exposure, runtime/debug propagation, subject-aware bidirectional comparison arbitration, structured catalog-alignment verdicts, representative alignment regression guard, catalog-alignment reason-code telemetry, explicit `alignment_status` propagation, truth-harness/acceptance-matrix surfacing, soft divergence warning, `catalog_alignment_ok` acceptance invariant, step-level expected catalog-alignment assertions, phase66 and phase32 spec alignment expectations, AGENT source-catalog surfacing, and generated phase83 mixed planner-brain replay spec validated locally, but live replay for the new bridge is currently blocked by missing active 1C polling and broader unfamiliar 1C asks still need replay-backed growth
|
||||
- graph snapshot after latest rebuild: `5952 nodes`, `12927 edges`, `138 communities`
|
||||
- Planner Autonomy Consolidation progress: `~95%` for the declared module, with catalog-fabric, value-flow arbitration, lifecycle bounded inference, broad-evaluation bridge, inventory catalog templates, inventory runtime-boundary honesty, exact inventory recipe bridging, unambiguous metadata-surface lane inference, catalog chain-template scoring, structured chain-match contract exposure, runtime/debug propagation, subject-aware bidirectional comparison arbitration, structured catalog-alignment verdicts, representative alignment regression guard, catalog-alignment reason-code telemetry, explicit `alignment_status` propagation, truth-harness/acceptance-matrix surfacing, soft divergence warning, `catalog_alignment_ok` acceptance invariant, step-level expected catalog-alignment assertions, phase66 and phase32 spec alignment expectations, AGENT source-catalog surfacing, generated phase83 mixed planner-brain replay spec, live phase83 catalog-alignment proof, and checked-source user-facing error sanitation validated, but accepted business replay is still blocked by 1C/MCP fetch aborts and broader unfamiliar 1C asks still need replay-backed growth
|
||||
- graph snapshot after latest rebuild: `5953 nodes`, `12930 edges`, `137 communities`
|
||||
- current breakpoint:
|
||||
- the validated hot paths are no longer structurally broken;
|
||||
- flagship continuity collapse is no longer the primary risk;
|
||||
|
|
@ -165,6 +167,8 @@ Latest live proof now includes:
|
|||
- phase66 planner-alignment spec hardening accepted locally: Python truth-harness/acceptance tests passed `7/7`; `load_truth_harness_spec` confirmed expected top matches `[value_flow, value_flow, value_flow, value_flow_comparison, value_flow_comparison, value_flow_ranking, value_flow_ranking]`
|
||||
- phase32 selected-counterparty planner-alignment spec hardening and AGENT source-catalog surfacing accepted locally: Python replay-tooling tests passed `9/9`; `load_truth_harness_spec` confirmed expected top matches `[entity_resolution, value_flow, value_flow, value_flow_comparison, document_evidence, movement_evidence]`; regenerated source catalog exposes `planner_catalog_alignment` as a reusable tag
|
||||
- phase83 mixed planner-brain spec generation accepted locally: Python replay-tooling tests passed `10/10`; generated spec has `20` steps and `13` expected catalog top-match checks; regenerated source catalog exposes `planner_catalog_alignment` with `26` reusable entries; graphify rebuilt to `5952 nodes`, `12927 edges`, `138 communities`
|
||||
- phase83 live replay after backend restart: `phase83_planner_brain_alignment_live_20260501_rerun2` and `rerun3` reached `8/20` pass, `2` warning, `10` fail, final `partial`; key result is `catalog_alignment_ok=true` across all expected catalog checks, while business-answer failures are caused by `MCP fetch failed: This operation was aborted` before confirmed 1C evidence is available
|
||||
- checked-source error sanitation accepted: targeted answer/candidate/policy tests passed `61/61` with `1` skipped; build passed; `phase83_first2_sanity_live_20260501_errorfilter` confirms the user-facing assistant section no longer exposes raw `MCP fetch failed` or `Entity-resolution could not continue` strings; graphify rebuilt to `5953 nodes`, `12930 edges`, `137 communities`
|
||||
|
||||
Current architectural reading:
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,12 @@ function formatNamedChoiceList(values) {
|
|||
}
|
||||
function isInternalMechanicsLine(value) {
|
||||
const text = value.toLowerCase();
|
||||
return (text.includes("primitive") ||
|
||||
return (text.includes("mcp fetch failed") ||
|
||||
text.includes("this operation was aborted") ||
|
||||
text.includes("entity-resolution") ||
|
||||
text.includes("could not continue") ||
|
||||
text.includes("checked catalog search step") ||
|
||||
text.includes("primitive") ||
|
||||
text.includes("query_documents") ||
|
||||
text.includes("query_movements") ||
|
||||
text.includes("resolve_entity_reference") ||
|
||||
|
|
@ -50,6 +55,12 @@ function isInternalMechanicsLine(value) {
|
|||
text.includes("needs more scope before execution") ||
|
||||
text.includes("mcp_execution_performed"));
|
||||
}
|
||||
function isMcpTransportFailureLine(value) {
|
||||
const text = value.toLowerCase();
|
||||
return (text.includes("mcp fetch failed") ||
|
||||
text.includes("this operation was aborted") ||
|
||||
text.includes("operation was aborted"));
|
||||
}
|
||||
function userFacingUnknowns(values) {
|
||||
return uniqueStrings(values).filter((value) => !isInternalMechanicsLine(value));
|
||||
}
|
||||
|
|
@ -62,7 +73,20 @@ function rankedValueFlowUnknownLines(pilot) {
|
|||
return [`Полный рейтинг контрагентов вне ${period} этим поиском не подтвержден.`];
|
||||
}
|
||||
function userFacingLimitations(values) {
|
||||
return uniqueStrings(values).filter((value) => !isInternalMechanicsLine(value));
|
||||
const result = [];
|
||||
for (const value of uniqueStrings(values)) {
|
||||
if (isMcpTransportFailureLine(value)) {
|
||||
const line = "Доступ к 1С во время проверки оборвался; подтвержденные строки не получены.";
|
||||
if (!result.includes(line)) {
|
||||
result.push(line);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (!isInternalMechanicsLine(value)) {
|
||||
result.push(value);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
function modeFor(pilot) {
|
||||
if (pilot.pilot_status === "blocked") {
|
||||
|
|
@ -452,7 +476,7 @@ function nextStepFor(mode, pilot) {
|
|||
}
|
||||
}
|
||||
if (mode === "checked_sources_only" && pilot.query_limitations.length > 0) {
|
||||
return "Можно повторить проверку после восстановления MCP-доступа или сузить вопрос до конкретного контрагента/периода.";
|
||||
return "Можно повторить проверку после восстановления доступа к 1С или сузить вопрос до конкретного контрагента/периода.";
|
||||
}
|
||||
if (mode === "blocked") {
|
||||
return "Нужно сначала снять policy/blocking причину, иначе данные 1С использовать нельзя.";
|
||||
|
|
|
|||
|
|
@ -48,7 +48,12 @@ function uniqueStrings(values) {
|
|||
}
|
||||
function hasInternalMechanics(value) {
|
||||
const text = value.toLowerCase();
|
||||
return (text.includes("query_documents") ||
|
||||
return (text.includes("mcp fetch failed") ||
|
||||
text.includes("this operation was aborted") ||
|
||||
text.includes("entity-resolution") ||
|
||||
text.includes("could not continue") ||
|
||||
text.includes("checked catalog search step") ||
|
||||
text.includes("query_documents") ||
|
||||
text.includes("query_movements") ||
|
||||
text.includes("primitive") ||
|
||||
text.includes("pilot_") ||
|
||||
|
|
|
|||
|
|
@ -38,7 +38,12 @@ function pushReason(target, value) {
|
|||
}
|
||||
function hasInternalMechanics(value) {
|
||||
const text = value.toLowerCase();
|
||||
return (text.includes("query_documents") ||
|
||||
return (text.includes("mcp fetch failed") ||
|
||||
text.includes("this operation was aborted") ||
|
||||
text.includes("entity-resolution") ||
|
||||
text.includes("could not continue") ||
|
||||
text.includes("checked catalog search step") ||
|
||||
text.includes("query_documents") ||
|
||||
text.includes("query_movements") ||
|
||||
text.includes("primitive") ||
|
||||
text.includes("pilot_") ||
|
||||
|
|
|
|||
|
|
@ -62,6 +62,11 @@ function formatNamedChoiceList(values: string[]): string {
|
|||
function isInternalMechanicsLine(value: string): boolean {
|
||||
const text = value.toLowerCase();
|
||||
return (
|
||||
text.includes("mcp fetch failed") ||
|
||||
text.includes("this operation was aborted") ||
|
||||
text.includes("entity-resolution") ||
|
||||
text.includes("could not continue") ||
|
||||
text.includes("checked catalog search step") ||
|
||||
text.includes("primitive") ||
|
||||
text.includes("query_documents") ||
|
||||
text.includes("query_movements") ||
|
||||
|
|
@ -79,6 +84,15 @@ function isInternalMechanicsLine(value: string): boolean {
|
|||
);
|
||||
}
|
||||
|
||||
function isMcpTransportFailureLine(value: string): boolean {
|
||||
const text = value.toLowerCase();
|
||||
return (
|
||||
text.includes("mcp fetch failed") ||
|
||||
text.includes("this operation was aborted") ||
|
||||
text.includes("operation was aborted")
|
||||
);
|
||||
}
|
||||
|
||||
function userFacingUnknowns(values: string[]): string[] {
|
||||
return uniqueStrings(values).filter((value) => !isInternalMechanicsLine(value));
|
||||
}
|
||||
|
|
@ -93,7 +107,20 @@ function rankedValueFlowUnknownLines(pilot: AssistantMcpDiscoveryPilotExecutionC
|
|||
}
|
||||
|
||||
function userFacingLimitations(values: string[]): string[] {
|
||||
return uniqueStrings(values).filter((value) => !isInternalMechanicsLine(value));
|
||||
const result: string[] = [];
|
||||
for (const value of uniqueStrings(values)) {
|
||||
if (isMcpTransportFailureLine(value)) {
|
||||
const line = "Доступ к 1С во время проверки оборвался; подтвержденные строки не получены.";
|
||||
if (!result.includes(line)) {
|
||||
result.push(line);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (!isInternalMechanicsLine(value)) {
|
||||
result.push(value);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function modeFor(pilot: AssistantMcpDiscoveryPilotExecutionContract): AssistantMcpDiscoveryAnswerMode {
|
||||
|
|
@ -554,7 +581,7 @@ function nextStepFor(mode: AssistantMcpDiscoveryAnswerMode, pilot: AssistantMcpD
|
|||
}
|
||||
}
|
||||
if (mode === "checked_sources_only" && pilot.query_limitations.length > 0) {
|
||||
return "Можно повторить проверку после восстановления MCP-доступа или сузить вопрос до конкретного контрагента/периода.";
|
||||
return "Можно повторить проверку после восстановления доступа к 1С или сузить вопрос до конкретного контрагента/периода.";
|
||||
}
|
||||
if (mode === "blocked") {
|
||||
return "Нужно сначала снять policy/blocking причину, иначе данные 1С использовать нельзя.";
|
||||
|
|
|
|||
|
|
@ -75,6 +75,11 @@ function uniqueStrings(values: string[]): string[] {
|
|||
function hasInternalMechanics(value: string): boolean {
|
||||
const text = value.toLowerCase();
|
||||
return (
|
||||
text.includes("mcp fetch failed") ||
|
||||
text.includes("this operation was aborted") ||
|
||||
text.includes("entity-resolution") ||
|
||||
text.includes("could not continue") ||
|
||||
text.includes("checked catalog search step") ||
|
||||
text.includes("query_documents") ||
|
||||
text.includes("query_movements") ||
|
||||
text.includes("primitive") ||
|
||||
|
|
|
|||
|
|
@ -71,6 +71,11 @@ function pushReason(target: string[], value: string): void {
|
|||
function hasInternalMechanics(value: string): boolean {
|
||||
const text = value.toLowerCase();
|
||||
return (
|
||||
text.includes("mcp fetch failed") ||
|
||||
text.includes("this operation was aborted") ||
|
||||
text.includes("entity-resolution") ||
|
||||
text.includes("could not continue") ||
|
||||
text.includes("checked catalog search step") ||
|
||||
text.includes("query_documents") ||
|
||||
text.includes("query_movements") ||
|
||||
text.includes("primitive") ||
|
||||
|
|
|
|||
|
|
@ -110,8 +110,10 @@ describe("assistant MCP discovery answer adapter", () => {
|
|||
|
||||
expect(draft.answer_mode).toBe("checked_sources_only");
|
||||
expect(draft.confirmed_lines).toEqual([]);
|
||||
expect(draft.limitation_lines).toContain("MCP fetch failed: timeout");
|
||||
expect(draft.next_step_line).toContain("MCP");
|
||||
expect(draft.limitation_lines).toContain("Доступ к 1С во время проверки оборвался; подтвержденные строки не получены.");
|
||||
expect(draft.limitation_lines).not.toContain("MCP fetch failed: timeout");
|
||||
expect(draft.next_step_line).toContain("доступа к 1С");
|
||||
expect(draft.next_step_line).not.toContain("MCP");
|
||||
expect(draft.must_not_claim).toContain("Do not claim a confirmed business fact when confirmed_facts is empty.");
|
||||
});
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue