From a3378a3d52bd8617575a674660a856c6fcde6de1 Mon Sep 17 00:00:00 2001 From: dctouch Date: Sat, 9 May 2026 12:47:04 +0300 Subject: [PATCH] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=B8=D1=82?= =?UTF-8?q?=D1=8C=20post-repair=20validation=20=D0=B2=20stage-loop?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../domain_scenario_loop_repo_adapter.md | 2 + scripts/stage_agent_loop.py | 89 +++++++++++++++++++ scripts/test_stage_agent_loop.py | 48 ++++++++++ 3 files changed, 139 insertions(+) diff --git a/docs/orchestration/domain_scenario_loop_repo_adapter.md b/docs/orchestration/domain_scenario_loop_repo_adapter.md index 08d8fec..dace5af 100644 --- a/docs/orchestration/domain_scenario_loop_repo_adapter.md +++ b/docs/orchestration/domain_scenario_loop_repo_adapter.md @@ -157,6 +157,8 @@ python scripts/stage_agent_loop.py run-repair --manifest docs/orchestration/`. +When the coder result is `patched`, the next `ingest-gui-run` is treated as post-repair validation for that repair iteration. `stage_loop_summary.json` records `latest_repair_validation` and `repair_validation_history`, including the validation run id, remaining P0/P1 findings, and whether the repair was actually accepted after replay. A patch without this rerun/ingest evidence is not a closed stage. + ## Placeholder contract Scenario questions can reference earlier step outputs with placeholders such as: diff --git a/scripts/stage_agent_loop.py b/scripts/stage_agent_loop.py index 3a80372..64e4e6e 100644 --- a/scripts/stage_agent_loop.py +++ b/scripts/stage_agent_loop.py @@ -291,6 +291,24 @@ def build_stage_handoff_markdown(summary: dict[str, Any]) -> str: f"- execution_summary: `{latest_repair_execution.get('repair_execution_summary')}`", ] ) + latest_repair_validation = ( + summary.get("latest_repair_validation") + if isinstance(summary.get("latest_repair_validation"), dict) + else {} + ) + if latest_repair_validation: + lines.extend( + [ + "", + "## Latest Repair Validation", + f"- validation_run_id: `{latest_repair_validation.get('validation_run_id')}`", + f"- validation_status: `{latest_repair_validation.get('validation_status')}`", + f"- accepted_after_repair: `{latest_repair_validation.get('accepted_after_repair')}`", + f"- validated_repair_iteration: `{latest_repair_validation.get('validated_repair_iteration')}`", + f"- remaining_p0_findings: `{latest_repair_validation.get('remaining_p0_findings')}`", + f"- remaining_p1_findings: `{latest_repair_validation.get('remaining_p1_findings')}`", + ] + ) return "\n".join(lines).strip() + "\n" @@ -354,6 +372,56 @@ def build_repair_execution_stage_summary( return base +def repair_validation_status(*, business_status: str, p0_count: int, p1_count: int) -> str: + if p0_count > 0: + return "failed_p0" + if p1_count > 0 or business_status == "warning": + return "warning_p1" + if business_status == "pass": + return "passed" + if business_status == "fail": + return "failed" + return "unknown" + + +def build_latest_repair_validation( + *, + previous_summary: dict[str, Any] | None, + review: dict[str, Any], + business_status: str, + p0_count: int, + p1_count: int, + next_action: str, +) -> dict[str, Any] | None: + previous_repair = ( + previous_summary.get("latest_repair_execution") + if isinstance(previous_summary, dict) and isinstance(previous_summary.get("latest_repair_execution"), dict) + else {} + ) + if not previous_repair: + return None + if bool(previous_repair.get("dry_run")) or previous_repair.get("coder_status") != "patched": + return None + validation_status = repair_validation_status( + business_status=business_status, + p0_count=p0_count, + p1_count=p1_count, + ) + return { + "schema_version": "stage_repair_validation_v1", + "validation_run_id": review.get("run_id"), + "validated_repair_iteration": previous_repair.get("iteration_dir"), + "validated_repair_result": previous_repair.get("repair_coder_result"), + "validation_status": validation_status, + "accepted_after_repair": bool(validation_status == "passed"), + "remaining_p0_findings": p0_count, + "remaining_p1_findings": p1_count, + "overall_business_status": business_status, + "next_action": next_action, + "validated_at": now_iso(), + } + + def build_save_autorun_command(args: argparse.Namespace, stage_manifest: dict[str, Any], loop_dir: Path) -> list[str]: return [ sys.executable, @@ -427,6 +495,27 @@ def build_gui_review_stage_summary( }, } ) + latest_repair_validation = build_latest_repair_validation( + previous_summary=previous_summary, + review=review, + business_status=status, + p0_count=p0_count, + p1_count=p1_count, + next_action=next_action, + ) + if latest_repair_validation is not None: + base["latest_repair_validation"] = latest_repair_validation + validations = base.get("repair_validation_history") if isinstance(base.get("repair_validation_history"), list) else [] + validations = [ + item + for item in validations + if not ( + isinstance(item, dict) + and item.get("validation_run_id") == latest_repair_validation.get("validation_run_id") + ) + ] + validations.append(latest_repair_validation) + base["repair_validation_history"] = validations history = base.get("gui_review_history") if isinstance(base.get("gui_review_history"), list) else [] history = [ item diff --git a/scripts/test_stage_agent_loop.py b/scripts/test_stage_agent_loop.py index 8262a65..db37c7e 100644 --- a/scripts/test_stage_agent_loop.py +++ b/scripts/test_stage_agent_loop.py @@ -204,6 +204,54 @@ class StageAgentLoopTests(unittest.TestCase): self.assertFalse(summary["accepted_gate"]) self.assertEqual(summary["latest_gui_review"]["repair_targets_count"], 1) + def test_gui_review_stage_summary_links_post_repair_validation(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + stage_dir = Path(tmp) / "stage" + review_dir = stage_dir / "gui_run_reviews" / "assistant-stage1-rerun" + review = { + "run_id": "assistant-stage1-rerun", + "summary": { + "overall_business_status": "pass", + "turn_pairs_total": 5, + "business_issue_turns": 0, + "p0_findings": 0, + "p1_findings": 0, + "question_quality_status": "strong", + "question_quality_score": 96, + }, + "repair_targets": [], + } + previous_summary = { + "latest_repair_execution": { + "iteration_dir": "artifacts/domain_runs/stage_agent_loops/agent_loop/repair_iterations/repair_001", + "repair_coder_result": "artifacts/domain_runs/stage_agent_loops/agent_loop/repair_iterations/repair_001/repair_coder_result.json", + "dry_run": False, + "coder_status": "patched", + "changed_files": ["scripts/stage_agent_loop.py"], + } + } + + summary = stage_loop.build_gui_review_stage_summary( + stage_manifest={ + "stage_id": "agent_loop", + "module_name": "Agent Loop", + "title": "Agent Loop", + "target_score": 88, + "acceptance_invariants": [], + "global_plan_refs": [], + }, + stage_dir=stage_dir, + review=review, + review_dir=review_dir, + previous_summary=previous_summary, + ) + + self.assertEqual(summary["next_action"], "manual_gui_confirmation_or_stage_close") + self.assertTrue(summary["latest_repair_validation"]["accepted_after_repair"]) + self.assertEqual(summary["latest_repair_validation"]["validation_status"], "passed") + self.assertEqual(summary["latest_repair_validation"]["validation_run_id"], "assistant-stage1-rerun") + self.assertEqual(len(summary["repair_validation_history"]), 1) + def test_stage_repair_handoff_keeps_primary_targets_and_samples(self) -> None: summary = { "stage_id": "agent_loop",