Добавить post-repair validation в stage-loop

2026-05-09 12:47:04 +03:00 · 2026-05-09 12:47:04 +03:00 · a3378a3d52
parent b4f50346cc
commit a3378a3d52
3 changed files with 139 additions and 0 deletions
--- a/docs/orchestration/domain_scenario_loop_repo_adapter.md
+++ b/docs/orchestration/domain_scenario_loop_repo_adapter.md
@ -157,6 +157,8 @@ python scripts/stage_agent_loop.py run-repair --manifest docs/orchestration/<sta

 `--dry-run` writes `repair_coder.command.txt`, records `repair_execution_summary.json`, updates `stage_loop_summary.json`, and prints the exact non-interactive Codex command without changing code. Without `--dry-run`, it executes the coder command with the prepared `repair_prompt.md`, writes `repair_coder_result.json`, captures stdout/stderr, records `repair_execution_summary.json`, and updates the stage next action to rerun/ingest, inspect, or stop for a decision depending on the coder status. After a real coder patch, rerun the same semantic pack or GUI session and ingest the new `assistant-stage1-<id>`.

+When the coder result is `patched`, the next `ingest-gui-run` is treated as post-repair validation for that repair iteration. `stage_loop_summary.json` records `latest_repair_validation` and `repair_validation_history`, including the validation run id, remaining P0/P1 findings, and whether the repair was actually accepted after replay. A patch without this rerun/ingest evidence is not a closed stage.
+
 ## Placeholder contract

 Scenario questions can reference earlier step outputs with placeholders such as:
--- a/scripts/stage_agent_loop.py
+++ b/scripts/stage_agent_loop.py
@ -291,6 +291,24 @@ def build_stage_handoff_markdown(summary: dict[str, Any]) -> str:
                f"- execution_summary: `{latest_repair_execution.get('repair_execution_summary')}`",
            ]
        )
+    latest_repair_validation = (
+        summary.get("latest_repair_validation")
+        if isinstance(summary.get("latest_repair_validation"), dict)
+        else {}
+    )
+    if latest_repair_validation:
+        lines.extend(
+            [
+                "",
+                "## Latest Repair Validation",
+                f"- validation_run_id: `{latest_repair_validation.get('validation_run_id')}`",
+                f"- validation_status: `{latest_repair_validation.get('validation_status')}`",
+                f"- accepted_after_repair: `{latest_repair_validation.get('accepted_after_repair')}`",
+                f"- validated_repair_iteration: `{latest_repair_validation.get('validated_repair_iteration')}`",
+                f"- remaining_p0_findings: `{latest_repair_validation.get('remaining_p0_findings')}`",
+                f"- remaining_p1_findings: `{latest_repair_validation.get('remaining_p1_findings')}`",
+            ]
+        )
    return "\n".join(lines).strip() + "\n"


@ -354,6 +372,56 @@ def build_repair_execution_stage_summary(
    return base


+def repair_validation_status(*, business_status: str, p0_count: int, p1_count: int) -> str:
+    if p0_count > 0:
+        return "failed_p0"
+    if p1_count > 0 or business_status == "warning":
+        return "warning_p1"
+    if business_status == "pass":
+        return "passed"
+    if business_status == "fail":
+        return "failed"
+    return "unknown"
+
+
+def build_latest_repair_validation(
+    *,
+    previous_summary: dict[str, Any] | None,
+    review: dict[str, Any],
+    business_status: str,
+    p0_count: int,
+    p1_count: int,
+    next_action: str,
+) -> dict[str, Any] | None:
+    previous_repair = (
+        previous_summary.get("latest_repair_execution")
+        if isinstance(previous_summary, dict) and isinstance(previous_summary.get("latest_repair_execution"), dict)
+        else {}
+    )
+    if not previous_repair:
+        return None
+    if bool(previous_repair.get("dry_run")) or previous_repair.get("coder_status") != "patched":
+        return None
+    validation_status = repair_validation_status(
+        business_status=business_status,
+        p0_count=p0_count,
+        p1_count=p1_count,
+    )
+    return {
+        "schema_version": "stage_repair_validation_v1",
+        "validation_run_id": review.get("run_id"),
+        "validated_repair_iteration": previous_repair.get("iteration_dir"),
+        "validated_repair_result": previous_repair.get("repair_coder_result"),
+        "validation_status": validation_status,
+        "accepted_after_repair": bool(validation_status == "passed"),
+        "remaining_p0_findings": p0_count,
+        "remaining_p1_findings": p1_count,
+        "overall_business_status": business_status,
+        "next_action": next_action,
+        "validated_at": now_iso(),
+    }
+
+
 def build_save_autorun_command(args: argparse.Namespace, stage_manifest: dict[str, Any], loop_dir: Path) -> list[str]:
    return [
        sys.executable,
@ -427,6 +495,27 @@ def build_gui_review_stage_summary(
            },
        }
    )
+    latest_repair_validation = build_latest_repair_validation(
+        previous_summary=previous_summary,
+        review=review,
+        business_status=status,
+        p0_count=p0_count,
+        p1_count=p1_count,
+        next_action=next_action,
+    )
+    if latest_repair_validation is not None:
+        base["latest_repair_validation"] = latest_repair_validation
+        validations = base.get("repair_validation_history") if isinstance(base.get("repair_validation_history"), list) else []
+        validations = [
+            item
+            for item in validations
+            if not (
+                isinstance(item, dict)
+                and item.get("validation_run_id") == latest_repair_validation.get("validation_run_id")
+            )
+        ]
+        validations.append(latest_repair_validation)
+        base["repair_validation_history"] = validations
    history = base.get("gui_review_history") if isinstance(base.get("gui_review_history"), list) else []
    history = [
        item
--- a/scripts/test_stage_agent_loop.py
+++ b/scripts/test_stage_agent_loop.py
@ -204,6 +204,54 @@ class StageAgentLoopTests(unittest.TestCase):
        self.assertFalse(summary["accepted_gate"])
        self.assertEqual(summary["latest_gui_review"]["repair_targets_count"], 1)

+    def test_gui_review_stage_summary_links_post_repair_validation(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp:
+            stage_dir = Path(tmp) / "stage"
+            review_dir = stage_dir / "gui_run_reviews" / "assistant-stage1-rerun"
+            review = {
+                "run_id": "assistant-stage1-rerun",
+                "summary": {
+                    "overall_business_status": "pass",
+                    "turn_pairs_total": 5,
+                    "business_issue_turns": 0,
+                    "p0_findings": 0,
+                    "p1_findings": 0,
+                    "question_quality_status": "strong",
+                    "question_quality_score": 96,
+                },
+                "repair_targets": [],
+            }
+            previous_summary = {
+                "latest_repair_execution": {
+                    "iteration_dir": "artifacts/domain_runs/stage_agent_loops/agent_loop/repair_iterations/repair_001",
+                    "repair_coder_result": "artifacts/domain_runs/stage_agent_loops/agent_loop/repair_iterations/repair_001/repair_coder_result.json",
+                    "dry_run": False,
+                    "coder_status": "patched",
+                    "changed_files": ["scripts/stage_agent_loop.py"],
+                }
+            }
+
+            summary = stage_loop.build_gui_review_stage_summary(
+                stage_manifest={
+                    "stage_id": "agent_loop",
+                    "module_name": "Agent Loop",
+                    "title": "Agent Loop",
+                    "target_score": 88,
+                    "acceptance_invariants": [],
+                    "global_plan_refs": [],
+                },
+                stage_dir=stage_dir,
+                review=review,
+                review_dir=review_dir,
+                previous_summary=previous_summary,
+            )
+
+        self.assertEqual(summary["next_action"], "manual_gui_confirmation_or_stage_close")
+        self.assertTrue(summary["latest_repair_validation"]["accepted_after_repair"])
+        self.assertEqual(summary["latest_repair_validation"]["validation_status"], "passed")
+        self.assertEqual(summary["latest_repair_validation"]["validation_run_id"], "assistant-stage1-rerun")
+        self.assertEqual(len(summary["repair_validation_history"]), 1)
+
    def test_stage_repair_handoff_keeps_primary_targets_and_samples(self) -> None:
        summary = {
            "stage_id": "agent_loop",