242 lines
9.4 KiB
Python
242 lines
9.4 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import sys
|
|
import tempfile
|
|
import unittest
|
|
from pathlib import Path
|
|
from types import SimpleNamespace
|
|
|
|
|
|
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
|
|
|
import save_agent_semantic_run as saver
|
|
|
|
|
|
def write_json(path: Path, payload: object) -> None:
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
path.write_text(json.dumps(payload, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
|
|
|
|
|
|
class SaveAgentSemanticRunTests(unittest.TestCase):
|
|
def test_extract_questions_accepts_truth_harness_question_template(self) -> None:
|
|
questions = saver.extract_questions_from_spec(
|
|
{
|
|
"steps": [
|
|
{"step_id": "step_01", "question_template": "first question"},
|
|
{"step_id": "step_02", "question": "second question"},
|
|
]
|
|
}
|
|
)
|
|
|
|
self.assertEqual(questions, ["first question", "second question"])
|
|
|
|
def test_extract_questions_accepts_domain_pack_scenarios(self) -> None:
|
|
questions = saver.extract_questions_from_spec(
|
|
{
|
|
"pack_id": "demo_pack",
|
|
"scenarios": [
|
|
{
|
|
"scenario_id": "scenario_01",
|
|
"steps": [
|
|
{"step_id": "step_01", "question_template": "first question"},
|
|
{"step_id": "step_02", "question": "second question"},
|
|
],
|
|
},
|
|
{
|
|
"scenario_id": "scenario_02",
|
|
"steps": [
|
|
{"step_id": "step_01", "question": "first question"},
|
|
{"step_id": "step_02", "question": "third question"},
|
|
],
|
|
},
|
|
],
|
|
}
|
|
)
|
|
|
|
self.assertEqual(questions, ["first question", "second question", "third question"])
|
|
|
|
def test_validate_accepted_run_dir_accepts_clean_business_review(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
run_dir = Path(tmp)
|
|
write_json(
|
|
run_dir / "pack_state.json",
|
|
{
|
|
"final_status": "accepted",
|
|
"review_overall_status": "pass",
|
|
"acceptance_gate_passed": True,
|
|
"no_unresolved_p0": True,
|
|
"unresolved_p0_count": 0,
|
|
"steps_total": 1,
|
|
"steps_passed": 1,
|
|
"steps_failed": 0,
|
|
},
|
|
)
|
|
write_json(run_dir / "truth_review.json", {"summary": {"overall_status": "pass"}})
|
|
write_json(
|
|
run_dir / "business_review.json",
|
|
{
|
|
"overall_business_status": "pass",
|
|
"steps_with_business_failures": 0,
|
|
"steps_with_business_warnings": 0,
|
|
},
|
|
)
|
|
|
|
metadata = saver.validate_accepted_run_dir(run_dir)
|
|
|
|
self.assertEqual(metadata["validation_status"], "accepted_live_replay")
|
|
self.assertTrue(metadata["saved_after_validated_replay"])
|
|
|
|
def test_validate_accepted_run_dir_rejects_business_review_failures(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
run_dir = Path(tmp)
|
|
write_json(
|
|
run_dir / "pack_state.json",
|
|
{
|
|
"final_status": "accepted",
|
|
"review_overall_status": "pass",
|
|
"acceptance_gate_passed": True,
|
|
"no_unresolved_p0": True,
|
|
"unresolved_p0_count": 0,
|
|
},
|
|
)
|
|
write_json(run_dir / "truth_review.json", {"summary": {"overall_status": "pass"}})
|
|
write_json(
|
|
run_dir / "business_review.json",
|
|
{
|
|
"overall_business_status": "fail",
|
|
"steps_with_business_failures": 1,
|
|
},
|
|
)
|
|
|
|
with self.assertRaisesRegex(RuntimeError, "business_review"):
|
|
saver.validate_accepted_run_dir(run_dir)
|
|
|
|
def test_validate_accepted_run_dir_accepts_clean_domain_pack_loop(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
loop_dir = Path(tmp)
|
|
iteration_dir = loop_dir / "iterations" / "iteration_00"
|
|
analyst_path = iteration_dir / "analyst_verdict.json"
|
|
repair_targets_path = iteration_dir / "pack_output" / "pack_run" / "repair_targets.json"
|
|
write_json(
|
|
loop_dir / "loop_state.json",
|
|
{
|
|
"loop_id": "stage_demo",
|
|
"target_score": 88,
|
|
"final_status": "accepted",
|
|
"iterations": [
|
|
{
|
|
"iteration_id": "iteration_00",
|
|
"quality_score": 91,
|
|
"accepted_gate": True,
|
|
"analyst_accepted_gate": True,
|
|
"deterministic_gate_ok": True,
|
|
"repair_target_count": 0,
|
|
"repair_target_severity_counts": {"P0": 0, "P1": 0, "P2": 0},
|
|
"analyst_verdict_path": str(analyst_path),
|
|
"repair_targets_path": str(repair_targets_path),
|
|
}
|
|
],
|
|
},
|
|
)
|
|
write_json(
|
|
analyst_path,
|
|
{
|
|
"loop_decision": "accepted",
|
|
"unresolved_p0_count": 0,
|
|
"regression_detected": False,
|
|
"direct_answer_ok": True,
|
|
"business_usefulness_ok": True,
|
|
"temporal_honesty_ok": True,
|
|
"field_truth_ok": True,
|
|
"answer_layering_ok": True,
|
|
},
|
|
)
|
|
write_json(repair_targets_path, {"severity_counts": {"P0": 0, "P1": 0, "P2": 0}})
|
|
|
|
metadata = saver.validate_accepted_run_dir(loop_dir)
|
|
|
|
self.assertEqual(metadata["validation_status"], "accepted_domain_pack_loop")
|
|
self.assertEqual(metadata["quality_score"], 91)
|
|
|
|
def test_validate_accepted_run_dir_rejects_domain_pack_loop_with_p1_targets(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
loop_dir = Path(tmp)
|
|
iteration_dir = loop_dir / "iterations" / "iteration_00"
|
|
analyst_path = iteration_dir / "analyst_verdict.json"
|
|
repair_targets_path = iteration_dir / "pack_output" / "pack_run" / "repair_targets.json"
|
|
write_json(
|
|
loop_dir / "loop_state.json",
|
|
{
|
|
"loop_id": "stage_demo",
|
|
"target_score": 88,
|
|
"final_status": "accepted",
|
|
"iterations": [
|
|
{
|
|
"quality_score": 91,
|
|
"accepted_gate": True,
|
|
"analyst_accepted_gate": True,
|
|
"deterministic_gate_ok": True,
|
|
"analyst_verdict_path": str(analyst_path),
|
|
"repair_targets_path": str(repair_targets_path),
|
|
}
|
|
],
|
|
},
|
|
)
|
|
write_json(
|
|
analyst_path,
|
|
{
|
|
"loop_decision": "accepted",
|
|
"unresolved_p0_count": 0,
|
|
"regression_detected": False,
|
|
"direct_answer_ok": True,
|
|
"business_usefulness_ok": True,
|
|
"temporal_honesty_ok": True,
|
|
"field_truth_ok": True,
|
|
"answer_layering_ok": True,
|
|
},
|
|
)
|
|
write_json(repair_targets_path, {"severity_counts": {"P0": 0, "P1": 1, "P2": 0}})
|
|
|
|
with self.assertRaisesRegex(RuntimeError, "repair_targets"):
|
|
saver.validate_accepted_run_dir(loop_dir)
|
|
|
|
def test_save_gate_refuses_real_write_without_validation(self) -> None:
|
|
args = SimpleNamespace(
|
|
validated_run_dir=None,
|
|
dry_run=False,
|
|
allow_unvalidated=False,
|
|
unvalidated_reason=None,
|
|
)
|
|
|
|
with self.assertRaisesRegex(RuntimeError, "Refusing to save AGENT autorun"):
|
|
saver.build_save_gate_metadata(args, {}, Path("demo.json"))
|
|
|
|
def test_save_gate_requires_reason_for_unvalidated_draft(self) -> None:
|
|
args = SimpleNamespace(
|
|
validated_run_dir=None,
|
|
dry_run=False,
|
|
allow_unvalidated=True,
|
|
unvalidated_reason="",
|
|
)
|
|
|
|
with self.assertRaisesRegex(RuntimeError, "--unvalidated-reason"):
|
|
saver.build_save_gate_metadata(args, {}, Path("demo.json"))
|
|
|
|
def test_save_gate_marks_explicit_unvalidated_draft(self) -> None:
|
|
args = SimpleNamespace(
|
|
validated_run_dir=None,
|
|
dry_run=False,
|
|
allow_unvalidated=True,
|
|
unvalidated_reason="manual GUI canary before live replay",
|
|
)
|
|
|
|
metadata = saver.build_save_gate_metadata(args, {}, Path("demo.json"))
|
|
|
|
self.assertEqual(metadata["validation_status"], "explicitly_unvalidated")
|
|
self.assertFalse(metadata["saved_after_validated_replay"])
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|