Запретить закрытие stage-loop без repair validation

This commit is contained in:
dctouch 2026-05-09 13:04:13 +03:00
parent 37d33bd6e6
commit f628266e44
3 changed files with 157 additions and 6 deletions

View File

@ -161,6 +161,8 @@ python scripts/stage_agent_loop.py run-repair --manifest docs/orchestration/<sta
When the coder result is `patched`, the next `ingest-gui-run` is treated as post-repair validation for that repair iteration. `stage_loop_summary.json` records `latest_repair_validation` and `repair_validation_history`, including the validation run id, remaining P0/P1 findings, and whether the repair was actually accepted after replay. A patch without this rerun/ingest evidence is not a closed stage.
The stage closing gate enforces that rule even when the inner pack loop reports `accepted`: `loop_accepted_gate` preserves the raw loop verdict, but stage-level `accepted_gate` stays `false` with `stage_closing_gate.status = blocked_pending_repair_validation` until the latest patched repair has a matching successful validation run.
## Placeholder contract
Scenario questions can reference earlier step outputs with placeholders such as:

View File

@ -193,17 +193,61 @@ def run_command(command: list[str], cwd: Path, stdout_path: Path, stderr_path: P
raise RuntimeError(f"Command failed with exit code {result.returncode}: {' '.join(command)}")
def build_stage_summary(stage_manifest: dict[str, Any], loop_dir: Path) -> dict[str, Any]:
def latest_repair_needs_validation(previous_summary: dict[str, Any] | None) -> bool:
if not isinstance(previous_summary, dict):
return False
latest_repair = (
previous_summary.get("latest_repair_execution")
if isinstance(previous_summary.get("latest_repair_execution"), dict)
else {}
)
if not latest_repair:
return False
if bool(latest_repair.get("dry_run")) or latest_repair.get("coder_status") != "patched":
return False
latest_validation = (
previous_summary.get("latest_repair_validation")
if isinstance(previous_summary.get("latest_repair_validation"), dict)
else {}
)
if not latest_validation:
return True
return not (
bool(latest_validation.get("accepted_after_repair"))
and latest_validation.get("validation_status") == "passed"
and latest_validation.get("validated_repair_iteration") == latest_repair.get("iteration_dir")
)
def build_stage_closing_gate(previous_summary: dict[str, Any] | None) -> dict[str, Any]:
blocked = latest_repair_needs_validation(previous_summary)
return {
"schema_version": "stage_closing_gate_v1",
"status": "blocked_pending_repair_validation" if blocked else "pass",
"passed": not blocked,
"blockers": ["latest_patched_repair_requires_successful_rerun_ingest"] if blocked else [],
}
def build_stage_summary(
stage_manifest: dict[str, Any],
loop_dir: Path,
previous_summary: dict[str, Any] | None = None,
) -> dict[str, Any]:
loop_state = load_json_object(loop_dir / "loop_state.json", "Stage domain loop_state.json")
iterations = loop_state.get("iterations") if isinstance(loop_state.get("iterations"), list) else []
last_iteration = iterations[-1] if iterations and isinstance(iterations[-1], dict) else {}
final_status = str(loop_state.get("final_status") or "unknown").strip()
accepted = final_status == "accepted" and bool(last_iteration.get("accepted_gate"))
raw_loop_accepted = final_status == "accepted" and bool(last_iteration.get("accepted_gate"))
closing_gate = build_stage_closing_gate(previous_summary)
accepted = raw_loop_accepted and bool(closing_gate.get("passed"))
manual_confirmation_required = bool(stage_manifest.get("manual_confirmation_required_after_accept", True)) and accepted
if accepted and manual_confirmation_required:
next_action = "manual_gui_confirmation"
elif accepted:
next_action = "stage_closed_without_manual_confirmation"
elif raw_loop_accepted and not bool(closing_gate.get("passed")):
next_action = "rerun_same_stage_or_gui_and_ingest_result"
elif bool(loop_state.get("last_user_decision_prompt")):
next_action = "user_decision_required"
else:
@ -224,12 +268,24 @@ def build_stage_summary(stage_manifest: dict[str, Any], loop_dir: Path) -> dict[
"last_analyst_decision": last_iteration.get("loop_decision") or loop_state.get("last_analyst_decision"),
"last_deterministic_gate_ok": last_iteration.get("deterministic_gate_ok"),
"last_deterministic_gate_reason": last_iteration.get("deterministic_gate_reason"),
"accepted_gate": bool(last_iteration.get("accepted_gate")),
"loop_accepted_gate": bool(last_iteration.get("accepted_gate")),
"accepted_gate": accepted,
"stage_closing_gate": closing_gate,
"manual_confirmation_required": manual_confirmation_required,
"next_action": next_action,
"save_autorun_on_accept": bool(stage_manifest.get("save_autorun_on_accept", True)),
"updated_at": now_iso(),
}
if isinstance(previous_summary, dict):
for key in [
"latest_gui_review",
"gui_review_history",
"latest_repair_execution",
"latest_repair_validation",
"repair_validation_history",
]:
if key in previous_summary:
summary[key] = previous_summary[key]
summary["next_step_guidance"] = build_next_step_guidance(next_action)
return summary
@ -298,6 +354,8 @@ def build_stage_handoff_markdown(summary: dict[str, Any]) -> str:
f"- iterations_ran: `{summary.get('iterations_ran')}`",
f"- last_quality_score: `{summary.get('last_quality_score')}`",
f"- accepted_gate: `{summary.get('accepted_gate')}`",
f"- loop_accepted_gate: `{summary.get('loop_accepted_gate')}`",
f"- stage_closing_gate: `{(summary.get('stage_closing_gate') or {}).get('status') if isinstance(summary.get('stage_closing_gate'), dict) else 'n/a'}`",
f"- deterministic_gate_ok: `{summary.get('last_deterministic_gate_ok')}`",
f"- deterministic_gate_reason: `{summary.get('last_deterministic_gate_reason') or 'n/a'}`",
f"- manual_confirmation_required: `{summary.get('manual_confirmation_required')}`",
@ -1045,7 +1103,9 @@ def handle_summarize(args: argparse.Namespace) -> int:
stage_manifest = load_stage_manifest(stage_manifest_path)
stage_dir = stage_dir_for(repo_path(args.output_root), stage_manifest["stage_id"])
loop_dir = repo_path(args.loop_dir) if args.loop_dir else stage_loop_dir(stage_dir, stage_manifest)
summary = build_stage_summary(stage_manifest, loop_dir)
summary_path = stage_dir / "stage_loop_summary.json"
previous_summary = load_json_object(summary_path, "Existing stage summary") if summary_path.exists() else None
summary = build_stage_summary(stage_manifest, loop_dir, previous_summary=previous_summary)
save_stage_summary(stage_dir, summary)
print(json.dumps(summary, ensure_ascii=False, indent=2))
return 0
@ -1073,11 +1133,13 @@ def handle_run(args: argparse.Namespace) -> int:
timeout_seconds=max(3600, int(args.codex_timeout_seconds) * max(1, int(stage_manifest["max_iterations"]))),
)
loop_dir = stage_loop_dir(stage_dir, stage_manifest)
summary = build_stage_summary(stage_manifest, loop_dir)
summary_path = stage_dir / "stage_loop_summary.json"
previous_summary = load_json_object(summary_path, "Existing stage summary") if summary_path.exists() else None
summary = build_stage_summary(stage_manifest, loop_dir, previous_summary=previous_summary)
save_stage_summary(stage_dir, summary)
if (
summary["loop_final_status"] == "accepted"
bool(summary.get("accepted_gate"))
and bool(stage_manifest.get("save_autorun_on_accept", True))
and not args.no_save_autorun
):

View File

@ -169,6 +169,93 @@ class StageAgentLoopTests(unittest.TestCase):
self.assertFalse(summary["manual_confirmation_required"])
self.assertEqual(summary["next_action"], "continue_autonomous_or_fix_blocker")
def test_build_stage_summary_blocks_close_when_repair_lacks_validation(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
loop_dir = Path(tmp)
write_json(
loop_dir / "loop_state.json",
{
"final_status": "accepted",
"iterations": [
{
"quality_score": 94,
"loop_decision": "accepted",
"accepted_gate": True,
"deterministic_gate_ok": True,
}
],
},
)
summary = stage_loop.build_stage_summary(
{
"stage_id": "agent_loop",
"module_name": "Agent Loop",
"title": "Agent Loop",
"target_score": 88,
},
loop_dir,
previous_summary={
"latest_repair_execution": {
"iteration_dir": "artifacts/domain_runs/stage_agent_loops/agent_loop/repair_iterations/repair_001",
"dry_run": False,
"coder_status": "patched",
}
},
)
self.assertFalse(summary["accepted_gate"])
self.assertTrue(summary["loop_accepted_gate"])
self.assertEqual(summary["stage_closing_gate"]["status"], "blocked_pending_repair_validation")
self.assertEqual(summary["next_action"], "rerun_same_stage_or_gui_and_ingest_result")
self.assertEqual(summary["latest_repair_execution"]["coder_status"], "patched")
def test_build_stage_summary_allows_close_after_repair_validation(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
loop_dir = Path(tmp)
repair_iteration = "artifacts/domain_runs/stage_agent_loops/agent_loop/repair_iterations/repair_001"
write_json(
loop_dir / "loop_state.json",
{
"final_status": "accepted",
"iterations": [
{
"quality_score": 94,
"loop_decision": "accepted",
"accepted_gate": True,
"deterministic_gate_ok": True,
}
],
},
)
summary = stage_loop.build_stage_summary(
{
"stage_id": "agent_loop",
"module_name": "Agent Loop",
"title": "Agent Loop",
"target_score": 88,
"manual_confirmation_required_after_accept": True,
},
loop_dir,
previous_summary={
"latest_repair_execution": {
"iteration_dir": repair_iteration,
"dry_run": False,
"coder_status": "patched",
},
"latest_repair_validation": {
"validated_repair_iteration": repair_iteration,
"validation_status": "passed",
"accepted_after_repair": True,
},
},
)
self.assertTrue(summary["accepted_gate"])
self.assertEqual(summary["stage_closing_gate"]["status"], "pass")
self.assertEqual(summary["next_action"], "manual_gui_confirmation")
def test_gui_review_stage_summary_routes_p0_to_repair(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
stage_dir = Path(tmp) / "stage"