Добавить post-repair validation в stage-loop

This commit is contained in:
dctouch 2026-05-09 12:47:04 +03:00
parent b4f50346cc
commit a3378a3d52
3 changed files with 139 additions and 0 deletions

View File

@ -157,6 +157,8 @@ python scripts/stage_agent_loop.py run-repair --manifest docs/orchestration/<sta
`--dry-run` writes `repair_coder.command.txt`, records `repair_execution_summary.json`, updates `stage_loop_summary.json`, and prints the exact non-interactive Codex command without changing code. Without `--dry-run`, it executes the coder command with the prepared `repair_prompt.md`, writes `repair_coder_result.json`, captures stdout/stderr, records `repair_execution_summary.json`, and updates the stage next action to rerun/ingest, inspect, or stop for a decision depending on the coder status. After a real coder patch, rerun the same semantic pack or GUI session and ingest the new `assistant-stage1-<id>`.
When the coder result is `patched`, the next `ingest-gui-run` is treated as post-repair validation for that repair iteration. `stage_loop_summary.json` records `latest_repair_validation` and `repair_validation_history`, including the validation run id, remaining P0/P1 findings, and whether the repair was actually accepted after replay. A patch without this rerun/ingest evidence is not a closed stage.
## Placeholder contract
Scenario questions can reference earlier step outputs with placeholders such as:

View File

@ -291,6 +291,24 @@ def build_stage_handoff_markdown(summary: dict[str, Any]) -> str:
f"- execution_summary: `{latest_repair_execution.get('repair_execution_summary')}`",
]
)
latest_repair_validation = (
summary.get("latest_repair_validation")
if isinstance(summary.get("latest_repair_validation"), dict)
else {}
)
if latest_repair_validation:
lines.extend(
[
"",
"## Latest Repair Validation",
f"- validation_run_id: `{latest_repair_validation.get('validation_run_id')}`",
f"- validation_status: `{latest_repair_validation.get('validation_status')}`",
f"- accepted_after_repair: `{latest_repair_validation.get('accepted_after_repair')}`",
f"- validated_repair_iteration: `{latest_repair_validation.get('validated_repair_iteration')}`",
f"- remaining_p0_findings: `{latest_repair_validation.get('remaining_p0_findings')}`",
f"- remaining_p1_findings: `{latest_repair_validation.get('remaining_p1_findings')}`",
]
)
return "\n".join(lines).strip() + "\n"
@ -354,6 +372,56 @@ def build_repair_execution_stage_summary(
return base
def repair_validation_status(*, business_status: str, p0_count: int, p1_count: int) -> str:
if p0_count > 0:
return "failed_p0"
if p1_count > 0 or business_status == "warning":
return "warning_p1"
if business_status == "pass":
return "passed"
if business_status == "fail":
return "failed"
return "unknown"
def build_latest_repair_validation(
*,
previous_summary: dict[str, Any] | None,
review: dict[str, Any],
business_status: str,
p0_count: int,
p1_count: int,
next_action: str,
) -> dict[str, Any] | None:
previous_repair = (
previous_summary.get("latest_repair_execution")
if isinstance(previous_summary, dict) and isinstance(previous_summary.get("latest_repair_execution"), dict)
else {}
)
if not previous_repair:
return None
if bool(previous_repair.get("dry_run")) or previous_repair.get("coder_status") != "patched":
return None
validation_status = repair_validation_status(
business_status=business_status,
p0_count=p0_count,
p1_count=p1_count,
)
return {
"schema_version": "stage_repair_validation_v1",
"validation_run_id": review.get("run_id"),
"validated_repair_iteration": previous_repair.get("iteration_dir"),
"validated_repair_result": previous_repair.get("repair_coder_result"),
"validation_status": validation_status,
"accepted_after_repair": bool(validation_status == "passed"),
"remaining_p0_findings": p0_count,
"remaining_p1_findings": p1_count,
"overall_business_status": business_status,
"next_action": next_action,
"validated_at": now_iso(),
}
def build_save_autorun_command(args: argparse.Namespace, stage_manifest: dict[str, Any], loop_dir: Path) -> list[str]:
return [
sys.executable,
@ -427,6 +495,27 @@ def build_gui_review_stage_summary(
},
}
)
latest_repair_validation = build_latest_repair_validation(
previous_summary=previous_summary,
review=review,
business_status=status,
p0_count=p0_count,
p1_count=p1_count,
next_action=next_action,
)
if latest_repair_validation is not None:
base["latest_repair_validation"] = latest_repair_validation
validations = base.get("repair_validation_history") if isinstance(base.get("repair_validation_history"), list) else []
validations = [
item
for item in validations
if not (
isinstance(item, dict)
and item.get("validation_run_id") == latest_repair_validation.get("validation_run_id")
)
]
validations.append(latest_repair_validation)
base["repair_validation_history"] = validations
history = base.get("gui_review_history") if isinstance(base.get("gui_review_history"), list) else []
history = [
item

View File

@ -204,6 +204,54 @@ class StageAgentLoopTests(unittest.TestCase):
self.assertFalse(summary["accepted_gate"])
self.assertEqual(summary["latest_gui_review"]["repair_targets_count"], 1)
def test_gui_review_stage_summary_links_post_repair_validation(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
stage_dir = Path(tmp) / "stage"
review_dir = stage_dir / "gui_run_reviews" / "assistant-stage1-rerun"
review = {
"run_id": "assistant-stage1-rerun",
"summary": {
"overall_business_status": "pass",
"turn_pairs_total": 5,
"business_issue_turns": 0,
"p0_findings": 0,
"p1_findings": 0,
"question_quality_status": "strong",
"question_quality_score": 96,
},
"repair_targets": [],
}
previous_summary = {
"latest_repair_execution": {
"iteration_dir": "artifacts/domain_runs/stage_agent_loops/agent_loop/repair_iterations/repair_001",
"repair_coder_result": "artifacts/domain_runs/stage_agent_loops/agent_loop/repair_iterations/repair_001/repair_coder_result.json",
"dry_run": False,
"coder_status": "patched",
"changed_files": ["scripts/stage_agent_loop.py"],
}
}
summary = stage_loop.build_gui_review_stage_summary(
stage_manifest={
"stage_id": "agent_loop",
"module_name": "Agent Loop",
"title": "Agent Loop",
"target_score": 88,
"acceptance_invariants": [],
"global_plan_refs": [],
},
stage_dir=stage_dir,
review=review,
review_dir=review_dir,
previous_summary=previous_summary,
)
self.assertEqual(summary["next_action"], "manual_gui_confirmation_or_stage_close")
self.assertTrue(summary["latest_repair_validation"]["accepted_after_repair"])
self.assertEqual(summary["latest_repair_validation"]["validation_status"], "passed")
self.assertEqual(summary["latest_repair_validation"]["validation_run_id"], "assistant-stage1-rerun")
self.assertEqual(len(summary["repair_validation_history"]), 1)
def test_stage_repair_handoff_keeps_primary_targets_and_samples(self) -> None:
summary = {
"stage_id": "agent_loop",