From f628266e440c5a6e430e02287f4e5af1f55ebdae Mon Sep 17 00:00:00 2001 From: dctouch Date: Sat, 9 May 2026 13:04:13 +0300 Subject: [PATCH] =?UTF-8?q?=D0=97=D0=B0=D0=BF=D1=80=D0=B5=D1=82=D0=B8?= =?UTF-8?q?=D1=82=D1=8C=20=D0=B7=D0=B0=D0=BA=D1=80=D1=8B=D1=82=D0=B8=D0=B5?= =?UTF-8?q?=20stage-loop=20=D0=B1=D0=B5=D0=B7=20repair=20validation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../domain_scenario_loop_repo_adapter.md | 2 + scripts/stage_agent_loop.py | 74 ++++++++++++++-- scripts/test_stage_agent_loop.py | 87 +++++++++++++++++++ 3 files changed, 157 insertions(+), 6 deletions(-) diff --git a/docs/orchestration/domain_scenario_loop_repo_adapter.md b/docs/orchestration/domain_scenario_loop_repo_adapter.md index 10f9138..aa3f91d 100644 --- a/docs/orchestration/domain_scenario_loop_repo_adapter.md +++ b/docs/orchestration/domain_scenario_loop_repo_adapter.md @@ -161,6 +161,8 @@ python scripts/stage_agent_loop.py run-repair --manifest docs/orchestration/ dict[str, Any]: +def latest_repair_needs_validation(previous_summary: dict[str, Any] | None) -> bool: + if not isinstance(previous_summary, dict): + return False + latest_repair = ( + previous_summary.get("latest_repair_execution") + if isinstance(previous_summary.get("latest_repair_execution"), dict) + else {} + ) + if not latest_repair: + return False + if bool(latest_repair.get("dry_run")) or latest_repair.get("coder_status") != "patched": + return False + latest_validation = ( + previous_summary.get("latest_repair_validation") + if isinstance(previous_summary.get("latest_repair_validation"), dict) + else {} + ) + if not latest_validation: + return True + return not ( + bool(latest_validation.get("accepted_after_repair")) + and latest_validation.get("validation_status") == "passed" + and latest_validation.get("validated_repair_iteration") == latest_repair.get("iteration_dir") + ) + + +def build_stage_closing_gate(previous_summary: dict[str, Any] | None) -> dict[str, Any]: + blocked = latest_repair_needs_validation(previous_summary) + return { + "schema_version": "stage_closing_gate_v1", + "status": "blocked_pending_repair_validation" if blocked else "pass", + "passed": not blocked, + "blockers": ["latest_patched_repair_requires_successful_rerun_ingest"] if blocked else [], + } + + +def build_stage_summary( + stage_manifest: dict[str, Any], + loop_dir: Path, + previous_summary: dict[str, Any] | None = None, +) -> dict[str, Any]: loop_state = load_json_object(loop_dir / "loop_state.json", "Stage domain loop_state.json") iterations = loop_state.get("iterations") if isinstance(loop_state.get("iterations"), list) else [] last_iteration = iterations[-1] if iterations and isinstance(iterations[-1], dict) else {} final_status = str(loop_state.get("final_status") or "unknown").strip() - accepted = final_status == "accepted" and bool(last_iteration.get("accepted_gate")) + raw_loop_accepted = final_status == "accepted" and bool(last_iteration.get("accepted_gate")) + closing_gate = build_stage_closing_gate(previous_summary) + accepted = raw_loop_accepted and bool(closing_gate.get("passed")) manual_confirmation_required = bool(stage_manifest.get("manual_confirmation_required_after_accept", True)) and accepted if accepted and manual_confirmation_required: next_action = "manual_gui_confirmation" elif accepted: next_action = "stage_closed_without_manual_confirmation" + elif raw_loop_accepted and not bool(closing_gate.get("passed")): + next_action = "rerun_same_stage_or_gui_and_ingest_result" elif bool(loop_state.get("last_user_decision_prompt")): next_action = "user_decision_required" else: @@ -224,12 +268,24 @@ def build_stage_summary(stage_manifest: dict[str, Any], loop_dir: Path) -> dict[ "last_analyst_decision": last_iteration.get("loop_decision") or loop_state.get("last_analyst_decision"), "last_deterministic_gate_ok": last_iteration.get("deterministic_gate_ok"), "last_deterministic_gate_reason": last_iteration.get("deterministic_gate_reason"), - "accepted_gate": bool(last_iteration.get("accepted_gate")), + "loop_accepted_gate": bool(last_iteration.get("accepted_gate")), + "accepted_gate": accepted, + "stage_closing_gate": closing_gate, "manual_confirmation_required": manual_confirmation_required, "next_action": next_action, "save_autorun_on_accept": bool(stage_manifest.get("save_autorun_on_accept", True)), "updated_at": now_iso(), } + if isinstance(previous_summary, dict): + for key in [ + "latest_gui_review", + "gui_review_history", + "latest_repair_execution", + "latest_repair_validation", + "repair_validation_history", + ]: + if key in previous_summary: + summary[key] = previous_summary[key] summary["next_step_guidance"] = build_next_step_guidance(next_action) return summary @@ -298,6 +354,8 @@ def build_stage_handoff_markdown(summary: dict[str, Any]) -> str: f"- iterations_ran: `{summary.get('iterations_ran')}`", f"- last_quality_score: `{summary.get('last_quality_score')}`", f"- accepted_gate: `{summary.get('accepted_gate')}`", + f"- loop_accepted_gate: `{summary.get('loop_accepted_gate')}`", + f"- stage_closing_gate: `{(summary.get('stage_closing_gate') or {}).get('status') if isinstance(summary.get('stage_closing_gate'), dict) else 'n/a'}`", f"- deterministic_gate_ok: `{summary.get('last_deterministic_gate_ok')}`", f"- deterministic_gate_reason: `{summary.get('last_deterministic_gate_reason') or 'n/a'}`", f"- manual_confirmation_required: `{summary.get('manual_confirmation_required')}`", @@ -1045,7 +1103,9 @@ def handle_summarize(args: argparse.Namespace) -> int: stage_manifest = load_stage_manifest(stage_manifest_path) stage_dir = stage_dir_for(repo_path(args.output_root), stage_manifest["stage_id"]) loop_dir = repo_path(args.loop_dir) if args.loop_dir else stage_loop_dir(stage_dir, stage_manifest) - summary = build_stage_summary(stage_manifest, loop_dir) + summary_path = stage_dir / "stage_loop_summary.json" + previous_summary = load_json_object(summary_path, "Existing stage summary") if summary_path.exists() else None + summary = build_stage_summary(stage_manifest, loop_dir, previous_summary=previous_summary) save_stage_summary(stage_dir, summary) print(json.dumps(summary, ensure_ascii=False, indent=2)) return 0 @@ -1073,11 +1133,13 @@ def handle_run(args: argparse.Namespace) -> int: timeout_seconds=max(3600, int(args.codex_timeout_seconds) * max(1, int(stage_manifest["max_iterations"]))), ) loop_dir = stage_loop_dir(stage_dir, stage_manifest) - summary = build_stage_summary(stage_manifest, loop_dir) + summary_path = stage_dir / "stage_loop_summary.json" + previous_summary = load_json_object(summary_path, "Existing stage summary") if summary_path.exists() else None + summary = build_stage_summary(stage_manifest, loop_dir, previous_summary=previous_summary) save_stage_summary(stage_dir, summary) if ( - summary["loop_final_status"] == "accepted" + bool(summary.get("accepted_gate")) and bool(stage_manifest.get("save_autorun_on_accept", True)) and not args.no_save_autorun ): diff --git a/scripts/test_stage_agent_loop.py b/scripts/test_stage_agent_loop.py index 7957b82..24371c5 100644 --- a/scripts/test_stage_agent_loop.py +++ b/scripts/test_stage_agent_loop.py @@ -169,6 +169,93 @@ class StageAgentLoopTests(unittest.TestCase): self.assertFalse(summary["manual_confirmation_required"]) self.assertEqual(summary["next_action"], "continue_autonomous_or_fix_blocker") + def test_build_stage_summary_blocks_close_when_repair_lacks_validation(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + loop_dir = Path(tmp) + write_json( + loop_dir / "loop_state.json", + { + "final_status": "accepted", + "iterations": [ + { + "quality_score": 94, + "loop_decision": "accepted", + "accepted_gate": True, + "deterministic_gate_ok": True, + } + ], + }, + ) + + summary = stage_loop.build_stage_summary( + { + "stage_id": "agent_loop", + "module_name": "Agent Loop", + "title": "Agent Loop", + "target_score": 88, + }, + loop_dir, + previous_summary={ + "latest_repair_execution": { + "iteration_dir": "artifacts/domain_runs/stage_agent_loops/agent_loop/repair_iterations/repair_001", + "dry_run": False, + "coder_status": "patched", + } + }, + ) + + self.assertFalse(summary["accepted_gate"]) + self.assertTrue(summary["loop_accepted_gate"]) + self.assertEqual(summary["stage_closing_gate"]["status"], "blocked_pending_repair_validation") + self.assertEqual(summary["next_action"], "rerun_same_stage_or_gui_and_ingest_result") + self.assertEqual(summary["latest_repair_execution"]["coder_status"], "patched") + + def test_build_stage_summary_allows_close_after_repair_validation(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + loop_dir = Path(tmp) + repair_iteration = "artifacts/domain_runs/stage_agent_loops/agent_loop/repair_iterations/repair_001" + write_json( + loop_dir / "loop_state.json", + { + "final_status": "accepted", + "iterations": [ + { + "quality_score": 94, + "loop_decision": "accepted", + "accepted_gate": True, + "deterministic_gate_ok": True, + } + ], + }, + ) + + summary = stage_loop.build_stage_summary( + { + "stage_id": "agent_loop", + "module_name": "Agent Loop", + "title": "Agent Loop", + "target_score": 88, + "manual_confirmation_required_after_accept": True, + }, + loop_dir, + previous_summary={ + "latest_repair_execution": { + "iteration_dir": repair_iteration, + "dry_run": False, + "coder_status": "patched", + }, + "latest_repair_validation": { + "validated_repair_iteration": repair_iteration, + "validation_status": "passed", + "accepted_after_repair": True, + }, + }, + ) + + self.assertTrue(summary["accepted_gate"]) + self.assertEqual(summary["stage_closing_gate"]["status"], "pass") + self.assertEqual(summary["next_action"], "manual_gui_confirmation") + def test_gui_review_stage_summary_routes_p0_to_repair(self) -> None: with tempfile.TemporaryDirectory() as tmp: stage_dir = Path(tmp) / "stage"