Запретить закрытие stage-loop без repair validation
This commit is contained in:
parent
37d33bd6e6
commit
f628266e44
|
|
@ -161,6 +161,8 @@ python scripts/stage_agent_loop.py run-repair --manifest docs/orchestration/<sta
|
||||||
|
|
||||||
When the coder result is `patched`, the next `ingest-gui-run` is treated as post-repair validation for that repair iteration. `stage_loop_summary.json` records `latest_repair_validation` and `repair_validation_history`, including the validation run id, remaining P0/P1 findings, and whether the repair was actually accepted after replay. A patch without this rerun/ingest evidence is not a closed stage.
|
When the coder result is `patched`, the next `ingest-gui-run` is treated as post-repair validation for that repair iteration. `stage_loop_summary.json` records `latest_repair_validation` and `repair_validation_history`, including the validation run id, remaining P0/P1 findings, and whether the repair was actually accepted after replay. A patch without this rerun/ingest evidence is not a closed stage.
|
||||||
|
|
||||||
|
The stage closing gate enforces that rule even when the inner pack loop reports `accepted`: `loop_accepted_gate` preserves the raw loop verdict, but stage-level `accepted_gate` stays `false` with `stage_closing_gate.status = blocked_pending_repair_validation` until the latest patched repair has a matching successful validation run.
|
||||||
|
|
||||||
## Placeholder contract
|
## Placeholder contract
|
||||||
|
|
||||||
Scenario questions can reference earlier step outputs with placeholders such as:
|
Scenario questions can reference earlier step outputs with placeholders such as:
|
||||||
|
|
|
||||||
|
|
@ -193,17 +193,61 @@ def run_command(command: list[str], cwd: Path, stdout_path: Path, stderr_path: P
|
||||||
raise RuntimeError(f"Command failed with exit code {result.returncode}: {' '.join(command)}")
|
raise RuntimeError(f"Command failed with exit code {result.returncode}: {' '.join(command)}")
|
||||||
|
|
||||||
|
|
||||||
def build_stage_summary(stage_manifest: dict[str, Any], loop_dir: Path) -> dict[str, Any]:
|
def latest_repair_needs_validation(previous_summary: dict[str, Any] | None) -> bool:
|
||||||
|
if not isinstance(previous_summary, dict):
|
||||||
|
return False
|
||||||
|
latest_repair = (
|
||||||
|
previous_summary.get("latest_repair_execution")
|
||||||
|
if isinstance(previous_summary.get("latest_repair_execution"), dict)
|
||||||
|
else {}
|
||||||
|
)
|
||||||
|
if not latest_repair:
|
||||||
|
return False
|
||||||
|
if bool(latest_repair.get("dry_run")) or latest_repair.get("coder_status") != "patched":
|
||||||
|
return False
|
||||||
|
latest_validation = (
|
||||||
|
previous_summary.get("latest_repair_validation")
|
||||||
|
if isinstance(previous_summary.get("latest_repair_validation"), dict)
|
||||||
|
else {}
|
||||||
|
)
|
||||||
|
if not latest_validation:
|
||||||
|
return True
|
||||||
|
return not (
|
||||||
|
bool(latest_validation.get("accepted_after_repair"))
|
||||||
|
and latest_validation.get("validation_status") == "passed"
|
||||||
|
and latest_validation.get("validated_repair_iteration") == latest_repair.get("iteration_dir")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def build_stage_closing_gate(previous_summary: dict[str, Any] | None) -> dict[str, Any]:
|
||||||
|
blocked = latest_repair_needs_validation(previous_summary)
|
||||||
|
return {
|
||||||
|
"schema_version": "stage_closing_gate_v1",
|
||||||
|
"status": "blocked_pending_repair_validation" if blocked else "pass",
|
||||||
|
"passed": not blocked,
|
||||||
|
"blockers": ["latest_patched_repair_requires_successful_rerun_ingest"] if blocked else [],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_stage_summary(
|
||||||
|
stage_manifest: dict[str, Any],
|
||||||
|
loop_dir: Path,
|
||||||
|
previous_summary: dict[str, Any] | None = None,
|
||||||
|
) -> dict[str, Any]:
|
||||||
loop_state = load_json_object(loop_dir / "loop_state.json", "Stage domain loop_state.json")
|
loop_state = load_json_object(loop_dir / "loop_state.json", "Stage domain loop_state.json")
|
||||||
iterations = loop_state.get("iterations") if isinstance(loop_state.get("iterations"), list) else []
|
iterations = loop_state.get("iterations") if isinstance(loop_state.get("iterations"), list) else []
|
||||||
last_iteration = iterations[-1] if iterations and isinstance(iterations[-1], dict) else {}
|
last_iteration = iterations[-1] if iterations and isinstance(iterations[-1], dict) else {}
|
||||||
final_status = str(loop_state.get("final_status") or "unknown").strip()
|
final_status = str(loop_state.get("final_status") or "unknown").strip()
|
||||||
accepted = final_status == "accepted" and bool(last_iteration.get("accepted_gate"))
|
raw_loop_accepted = final_status == "accepted" and bool(last_iteration.get("accepted_gate"))
|
||||||
|
closing_gate = build_stage_closing_gate(previous_summary)
|
||||||
|
accepted = raw_loop_accepted and bool(closing_gate.get("passed"))
|
||||||
manual_confirmation_required = bool(stage_manifest.get("manual_confirmation_required_after_accept", True)) and accepted
|
manual_confirmation_required = bool(stage_manifest.get("manual_confirmation_required_after_accept", True)) and accepted
|
||||||
if accepted and manual_confirmation_required:
|
if accepted and manual_confirmation_required:
|
||||||
next_action = "manual_gui_confirmation"
|
next_action = "manual_gui_confirmation"
|
||||||
elif accepted:
|
elif accepted:
|
||||||
next_action = "stage_closed_without_manual_confirmation"
|
next_action = "stage_closed_without_manual_confirmation"
|
||||||
|
elif raw_loop_accepted and not bool(closing_gate.get("passed")):
|
||||||
|
next_action = "rerun_same_stage_or_gui_and_ingest_result"
|
||||||
elif bool(loop_state.get("last_user_decision_prompt")):
|
elif bool(loop_state.get("last_user_decision_prompt")):
|
||||||
next_action = "user_decision_required"
|
next_action = "user_decision_required"
|
||||||
else:
|
else:
|
||||||
|
|
@ -224,12 +268,24 @@ def build_stage_summary(stage_manifest: dict[str, Any], loop_dir: Path) -> dict[
|
||||||
"last_analyst_decision": last_iteration.get("loop_decision") or loop_state.get("last_analyst_decision"),
|
"last_analyst_decision": last_iteration.get("loop_decision") or loop_state.get("last_analyst_decision"),
|
||||||
"last_deterministic_gate_ok": last_iteration.get("deterministic_gate_ok"),
|
"last_deterministic_gate_ok": last_iteration.get("deterministic_gate_ok"),
|
||||||
"last_deterministic_gate_reason": last_iteration.get("deterministic_gate_reason"),
|
"last_deterministic_gate_reason": last_iteration.get("deterministic_gate_reason"),
|
||||||
"accepted_gate": bool(last_iteration.get("accepted_gate")),
|
"loop_accepted_gate": bool(last_iteration.get("accepted_gate")),
|
||||||
|
"accepted_gate": accepted,
|
||||||
|
"stage_closing_gate": closing_gate,
|
||||||
"manual_confirmation_required": manual_confirmation_required,
|
"manual_confirmation_required": manual_confirmation_required,
|
||||||
"next_action": next_action,
|
"next_action": next_action,
|
||||||
"save_autorun_on_accept": bool(stage_manifest.get("save_autorun_on_accept", True)),
|
"save_autorun_on_accept": bool(stage_manifest.get("save_autorun_on_accept", True)),
|
||||||
"updated_at": now_iso(),
|
"updated_at": now_iso(),
|
||||||
}
|
}
|
||||||
|
if isinstance(previous_summary, dict):
|
||||||
|
for key in [
|
||||||
|
"latest_gui_review",
|
||||||
|
"gui_review_history",
|
||||||
|
"latest_repair_execution",
|
||||||
|
"latest_repair_validation",
|
||||||
|
"repair_validation_history",
|
||||||
|
]:
|
||||||
|
if key in previous_summary:
|
||||||
|
summary[key] = previous_summary[key]
|
||||||
summary["next_step_guidance"] = build_next_step_guidance(next_action)
|
summary["next_step_guidance"] = build_next_step_guidance(next_action)
|
||||||
return summary
|
return summary
|
||||||
|
|
||||||
|
|
@ -298,6 +354,8 @@ def build_stage_handoff_markdown(summary: dict[str, Any]) -> str:
|
||||||
f"- iterations_ran: `{summary.get('iterations_ran')}`",
|
f"- iterations_ran: `{summary.get('iterations_ran')}`",
|
||||||
f"- last_quality_score: `{summary.get('last_quality_score')}`",
|
f"- last_quality_score: `{summary.get('last_quality_score')}`",
|
||||||
f"- accepted_gate: `{summary.get('accepted_gate')}`",
|
f"- accepted_gate: `{summary.get('accepted_gate')}`",
|
||||||
|
f"- loop_accepted_gate: `{summary.get('loop_accepted_gate')}`",
|
||||||
|
f"- stage_closing_gate: `{(summary.get('stage_closing_gate') or {}).get('status') if isinstance(summary.get('stage_closing_gate'), dict) else 'n/a'}`",
|
||||||
f"- deterministic_gate_ok: `{summary.get('last_deterministic_gate_ok')}`",
|
f"- deterministic_gate_ok: `{summary.get('last_deterministic_gate_ok')}`",
|
||||||
f"- deterministic_gate_reason: `{summary.get('last_deterministic_gate_reason') or 'n/a'}`",
|
f"- deterministic_gate_reason: `{summary.get('last_deterministic_gate_reason') or 'n/a'}`",
|
||||||
f"- manual_confirmation_required: `{summary.get('manual_confirmation_required')}`",
|
f"- manual_confirmation_required: `{summary.get('manual_confirmation_required')}`",
|
||||||
|
|
@ -1045,7 +1103,9 @@ def handle_summarize(args: argparse.Namespace) -> int:
|
||||||
stage_manifest = load_stage_manifest(stage_manifest_path)
|
stage_manifest = load_stage_manifest(stage_manifest_path)
|
||||||
stage_dir = stage_dir_for(repo_path(args.output_root), stage_manifest["stage_id"])
|
stage_dir = stage_dir_for(repo_path(args.output_root), stage_manifest["stage_id"])
|
||||||
loop_dir = repo_path(args.loop_dir) if args.loop_dir else stage_loop_dir(stage_dir, stage_manifest)
|
loop_dir = repo_path(args.loop_dir) if args.loop_dir else stage_loop_dir(stage_dir, stage_manifest)
|
||||||
summary = build_stage_summary(stage_manifest, loop_dir)
|
summary_path = stage_dir / "stage_loop_summary.json"
|
||||||
|
previous_summary = load_json_object(summary_path, "Existing stage summary") if summary_path.exists() else None
|
||||||
|
summary = build_stage_summary(stage_manifest, loop_dir, previous_summary=previous_summary)
|
||||||
save_stage_summary(stage_dir, summary)
|
save_stage_summary(stage_dir, summary)
|
||||||
print(json.dumps(summary, ensure_ascii=False, indent=2))
|
print(json.dumps(summary, ensure_ascii=False, indent=2))
|
||||||
return 0
|
return 0
|
||||||
|
|
@ -1073,11 +1133,13 @@ def handle_run(args: argparse.Namespace) -> int:
|
||||||
timeout_seconds=max(3600, int(args.codex_timeout_seconds) * max(1, int(stage_manifest["max_iterations"]))),
|
timeout_seconds=max(3600, int(args.codex_timeout_seconds) * max(1, int(stage_manifest["max_iterations"]))),
|
||||||
)
|
)
|
||||||
loop_dir = stage_loop_dir(stage_dir, stage_manifest)
|
loop_dir = stage_loop_dir(stage_dir, stage_manifest)
|
||||||
summary = build_stage_summary(stage_manifest, loop_dir)
|
summary_path = stage_dir / "stage_loop_summary.json"
|
||||||
|
previous_summary = load_json_object(summary_path, "Existing stage summary") if summary_path.exists() else None
|
||||||
|
summary = build_stage_summary(stage_manifest, loop_dir, previous_summary=previous_summary)
|
||||||
save_stage_summary(stage_dir, summary)
|
save_stage_summary(stage_dir, summary)
|
||||||
|
|
||||||
if (
|
if (
|
||||||
summary["loop_final_status"] == "accepted"
|
bool(summary.get("accepted_gate"))
|
||||||
and bool(stage_manifest.get("save_autorun_on_accept", True))
|
and bool(stage_manifest.get("save_autorun_on_accept", True))
|
||||||
and not args.no_save_autorun
|
and not args.no_save_autorun
|
||||||
):
|
):
|
||||||
|
|
|
||||||
|
|
@ -169,6 +169,93 @@ class StageAgentLoopTests(unittest.TestCase):
|
||||||
self.assertFalse(summary["manual_confirmation_required"])
|
self.assertFalse(summary["manual_confirmation_required"])
|
||||||
self.assertEqual(summary["next_action"], "continue_autonomous_or_fix_blocker")
|
self.assertEqual(summary["next_action"], "continue_autonomous_or_fix_blocker")
|
||||||
|
|
||||||
|
def test_build_stage_summary_blocks_close_when_repair_lacks_validation(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
loop_dir = Path(tmp)
|
||||||
|
write_json(
|
||||||
|
loop_dir / "loop_state.json",
|
||||||
|
{
|
||||||
|
"final_status": "accepted",
|
||||||
|
"iterations": [
|
||||||
|
{
|
||||||
|
"quality_score": 94,
|
||||||
|
"loop_decision": "accepted",
|
||||||
|
"accepted_gate": True,
|
||||||
|
"deterministic_gate_ok": True,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
summary = stage_loop.build_stage_summary(
|
||||||
|
{
|
||||||
|
"stage_id": "agent_loop",
|
||||||
|
"module_name": "Agent Loop",
|
||||||
|
"title": "Agent Loop",
|
||||||
|
"target_score": 88,
|
||||||
|
},
|
||||||
|
loop_dir,
|
||||||
|
previous_summary={
|
||||||
|
"latest_repair_execution": {
|
||||||
|
"iteration_dir": "artifacts/domain_runs/stage_agent_loops/agent_loop/repair_iterations/repair_001",
|
||||||
|
"dry_run": False,
|
||||||
|
"coder_status": "patched",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertFalse(summary["accepted_gate"])
|
||||||
|
self.assertTrue(summary["loop_accepted_gate"])
|
||||||
|
self.assertEqual(summary["stage_closing_gate"]["status"], "blocked_pending_repair_validation")
|
||||||
|
self.assertEqual(summary["next_action"], "rerun_same_stage_or_gui_and_ingest_result")
|
||||||
|
self.assertEqual(summary["latest_repair_execution"]["coder_status"], "patched")
|
||||||
|
|
||||||
|
def test_build_stage_summary_allows_close_after_repair_validation(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
loop_dir = Path(tmp)
|
||||||
|
repair_iteration = "artifacts/domain_runs/stage_agent_loops/agent_loop/repair_iterations/repair_001"
|
||||||
|
write_json(
|
||||||
|
loop_dir / "loop_state.json",
|
||||||
|
{
|
||||||
|
"final_status": "accepted",
|
||||||
|
"iterations": [
|
||||||
|
{
|
||||||
|
"quality_score": 94,
|
||||||
|
"loop_decision": "accepted",
|
||||||
|
"accepted_gate": True,
|
||||||
|
"deterministic_gate_ok": True,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
summary = stage_loop.build_stage_summary(
|
||||||
|
{
|
||||||
|
"stage_id": "agent_loop",
|
||||||
|
"module_name": "Agent Loop",
|
||||||
|
"title": "Agent Loop",
|
||||||
|
"target_score": 88,
|
||||||
|
"manual_confirmation_required_after_accept": True,
|
||||||
|
},
|
||||||
|
loop_dir,
|
||||||
|
previous_summary={
|
||||||
|
"latest_repair_execution": {
|
||||||
|
"iteration_dir": repair_iteration,
|
||||||
|
"dry_run": False,
|
||||||
|
"coder_status": "patched",
|
||||||
|
},
|
||||||
|
"latest_repair_validation": {
|
||||||
|
"validated_repair_iteration": repair_iteration,
|
||||||
|
"validation_status": "passed",
|
||||||
|
"accepted_after_repair": True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertTrue(summary["accepted_gate"])
|
||||||
|
self.assertEqual(summary["stage_closing_gate"]["status"], "pass")
|
||||||
|
self.assertEqual(summary["next_action"], "manual_gui_confirmation")
|
||||||
|
|
||||||
def test_gui_review_stage_summary_routes_p0_to_repair(self) -> None:
|
def test_gui_review_stage_summary_routes_p0_to_repair(self) -> None:
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
stage_dir = Path(tmp) / "stage"
|
stage_dir = Path(tmp) / "stage"
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue