from __future__ import annotations import json import sys import tempfile import unittest from pathlib import Path sys.path.insert(0, str(Path(__file__).resolve().parent)) import domain_case_loop as dcl class DomainCaseLoopLeadHandoffTests(unittest.TestCase): def test_normalize_repair_mode_defaults_to_lead_handoff(self) -> None: self.assertEqual(dcl.normalize_repair_mode(None), "lead-handoff") self.assertEqual(dcl.normalize_repair_mode("lead_codex"), "lead-handoff") self.assertEqual(dcl.normalize_repair_mode("auto_coder"), "auto-coder") def test_lead_handoff_captures_business_audit_and_primary_focus(self) -> None: with tempfile.TemporaryDirectory() as tmp: root = Path(tmp) pack_dir = root / "pack" iteration_dir = root / "loop" / "iterations" / "iteration_00" loop_dir = root / "loop" business_audit_path = iteration_dir / "business_audit.md" analyst_verdict_path = iteration_dir / "analyst_verdict.json" repair_targets_path = pack_dir / "repair_targets.json" repair_targets = { "target_count": 1, "severity_counts": {"P0": 1}, "priority_foci": [ { "focus_id": "answer_shape", "severity": "P0", "issue_code": "business_direct_answer_missing", "summary": "Direct answer is buried below service scaffolding.", "candidate_files": [ "llm_normalizer/backend/src/services/address_runtime/composeStage.ts" ], } ], "targets": [ { "severity": "P0", "issue_code": "business_direct_answer_missing", "step_id": "q01", } ], } analyst_verdict = { "quality_score": 42, "loop_decision": "partial", "user_intent_summary": "User asked for a direct business answer.", "expected_direct_answer": "Direct first-line answer.", "actual_direct_answer": "Scaffolded long answer.", "root_cause_layers": ["answer_shape_mismatch"], } handoff = dcl.build_lead_coder_handoff( loop_state={"loop_id": "demo"}, iteration_id="iteration_00", pack_dir=pack_dir, analyst_verdict_path=analyst_verdict_path, repair_targets_path=repair_targets_path, business_audit_path=business_audit_path, business_audit_json_path=iteration_dir / "business_audit.json", issue_catalog_snapshot_path=iteration_dir / "issue_catalog_snapshot.json", rerun_matrix_path=iteration_dir / "rerun_matrix.json", detector_candidates_path=iteration_dir / "detector_candidates.json", analyst_verdict=analyst_verdict, repair_targets=repair_targets, target_score=88, loop_decision="partial", analyst_accepted_gate=False, accepted_gate=False, deterministic_gate_ok=False, deterministic_gate_reason="repair_targets_remaining=P0:1", requires_user_decision=False, user_decision_type="none", user_decision_prompt=None, ) paths = dcl.save_lead_coder_handoff( loop_dir=loop_dir, iteration_dir=iteration_dir, handoff=handoff, ) saved = json.loads((iteration_dir / "lead_coder_handoff.json").read_text(encoding="utf-8")) latest_handoff_exists = Path(paths["latest_lead_coder_handoff_path"]).exists() self.assertEqual(saved["repair_mode"], "lead-handoff") self.assertEqual(saved["status"], "lead_coder_repair_required") self.assertEqual(saved["assigned_primary_focus"]["focus_id"], "answer_shape") self.assertIn("business_audit", saved["artifact_refs"]) self.assertIn("business_audit_json", saved["artifact_refs"]) self.assertIn("issue_catalog_snapshot", saved["artifact_refs"]) self.assertIn("business_direct_answer_missing", saved["issue_codes"]) self.assertIn("failed_scenario", saved["rerun_matrix"]) self.assertTrue(latest_handoff_exists) def test_business_audit_contract_exposes_repair_issue_contract(self) -> None: repair_targets = { "target_count": 1, "severity_counts": {"P0": 1}, "priority_foci": [], "targets": [ { "target_id": "margin_pack:s01", "scenario_id": "margin_pack", "step_id": "s01", "severity": "P0", "issue_code": "margin_domain_leak_accounting_route", "question_resolved": "Which item had the best margin?", "fix_goal": "Route the question to margin profitability instead of accounting noise.", "evidence_paths": ["artifacts/domain_runs/margin_pack/steps/s01/output.md"], } ], } contract = dcl.build_business_audit_contract( analyst_verdict={ "quality_score": 31, "loop_decision": "partial", "user_intent_summary": "User needs item margin ranking.", "expected_direct_answer": "Best item by gross margin.", "actual_direct_answer": "Accounting route answer.", }, repair_targets=repair_targets, target_score=88, loop_decision="partial", analyst_accepted_gate=False, accepted_gate=False, deterministic_gate_ok=False, deterministic_gate_reason="P0 repair target remains", business_audit_markdown_path=Path("business_audit.md"), analyst_verdict_path=Path("analyst_verdict.json"), repair_targets_path=Path("repair_targets.json"), business_audit_json_path=Path("business_audit.json"), issue_catalog_snapshot_path=Path("issue_catalog_snapshot.json"), rerun_matrix_path=Path("rerun_matrix.json"), detector_candidates_path=Path("detector_candidates.json"), ) self.assertEqual(contract["overall_status"], "partial") self.assertEqual(contract["blocking_issues"][0]["issue_code"], "margin_domain_leak_accounting_route") self.assertEqual(contract["blocking_issues"][0]["expected_business_answer_contract"], "margin_profitability_v1") self.assertIn("failed_margin_scenario", contract["rerun_matrix"]) self.assertIn("detector_candidates_json", contract["artifact_refs"]) def test_auto_coder_gate_blocks_non_allowlisted_issue_codes(self) -> None: repair_targets = { "targets": [ { "target_id": "margin_pack:s01", "issue_code": "margin_domain_leak_accounting_route", "allowed_patch_targets": ["llm_normalizer/backend/src/services/addressIntentResolver.ts"], "forbidden_patch_targets": ["global orchestration rewrite"], "rerun_matrix": ["failed_margin_scenario", "accepted_smoke_pack"], } ], } assigned_focus = { "focus_id": "route|addressIntentResolver", "issue_codes": ["margin_domain_leak_accounting_route"], "root_cause_layers": ["intent", "route"], "allowed_patch_targets": ["llm_normalizer/backend/src/services/addressIntentResolver.ts"], "forbidden_patch_targets": ["global orchestration rewrite"], "rerun_matrix": ["failed_margin_scenario", "accepted_smoke_pack"], "target_ids": ["margin_pack:s01"], } gate = dcl.evaluate_auto_coder_gate(repair_targets, assigned_focus) self.assertFalse(gate["allowed"]) self.assertIn("issue_code_not_allowlisted:margin_domain_leak_accounting_route", gate["blocking_reasons"]) def test_auto_coder_gate_allows_complete_answer_surface_contract(self) -> None: repair_targets = { "targets": [ { "target_id": "pack:s01", "issue_code": "business_direct_answer_missing", "root_cause_layers": ["answer_surface"], "expected_business_answer_contract": "direct_answer_surface_v1", "evidence_paths": ["artifacts/domain_runs/pack/steps/s01/output.md"], "allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"], "forbidden_patch_targets": ["routing rewrites", "fake evidence", "global runtime rewrite"], "rerun_matrix": ["failed_scenario", "direct_answer_surface_pack", "accepted_smoke_pack"], } ], } assigned_focus = { "focus_id": "answer_shape|composeStage", "issue_codes": ["business_direct_answer_missing"], "root_cause_layers": ["answer_surface"], "allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"], "forbidden_patch_targets": ["routing rewrites", "fake evidence", "global runtime rewrite"], "rerun_matrix": ["failed_scenario", "direct_answer_surface_pack", "accepted_smoke_pack"], "target_ids": ["pack:s01"], } gate = dcl.evaluate_auto_coder_gate(repair_targets, assigned_focus) self.assertTrue(gate["allowed"]) self.assertEqual(gate["reason"], "auto_coder_gate_passed") self.assertEqual( gate["issue_catalog_contracts"]["business_direct_answer_missing"]["expected_answer_contract"], "direct_answer_surface_v1", ) def test_auto_coder_gate_blocks_broad_or_blind_patch_scope(self) -> None: repair_targets = { "targets": [ { "target_id": "pack:s01", "issue_code": "business_direct_answer_missing", "root_cause_layers": ["answer_surface"], "expected_business_answer_contract": "direct_answer_surface_v1", "allowed_patch_targets": ["llm_normalizer/backend/src/services/"], "forbidden_patch_targets": ["routing rewrites", "fake evidence", "global runtime rewrite"], "rerun_matrix": ["failed_scenario", "direct_answer_surface_pack", "accepted_smoke_pack"], } ], } assigned_focus = { "focus_id": "answer_shape|services", "issue_codes": ["business_direct_answer_missing"], "root_cause_layers": ["answer_surface"], "allowed_patch_targets": ["llm_normalizer/backend/src/services/"], "forbidden_patch_targets": ["routing rewrites", "fake evidence", "global runtime rewrite"], "rerun_matrix": ["failed_scenario", "direct_answer_surface_pack", "accepted_smoke_pack"], "target_ids": ["pack:s01"], } gate = dcl.evaluate_auto_coder_gate(repair_targets, assigned_focus) self.assertFalse(gate["allowed"]) self.assertIn("broad_allowed_patch_target:llm_normalizer/backend/src/services/", gate["blocking_reasons"]) self.assertIn("target_missing_evidence_paths:pack:s01", gate["blocking_reasons"]) def test_auto_coder_gate_blocks_catalog_issue_without_answer_contract(self) -> None: original_load_issue_catalog = dcl.load_issue_catalog dcl.load_issue_catalog = lambda: { "schema_version": "agent_issue_catalog_v1", "issues": { "business_direct_answer_missing": { "severity": "P0", "root_layers": ["answer_surface"], "allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"], "forbidden_patch_targets": ["routing rewrites"], "rerun_matrix": ["failed_scenario", "accepted_smoke_pack"], } }, } try: repair_targets = { "targets": [ { "target_id": "pack:s01", "issue_code": "business_direct_answer_missing", "root_cause_layers": ["answer_surface"], "expected_business_answer_contract": "direct_answer_surface_v1", "evidence_paths": ["artifacts/domain_runs/pack/steps/s01/output.md"], "allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"], "forbidden_patch_targets": ["routing rewrites"], "rerun_matrix": ["failed_scenario", "accepted_smoke_pack"], } ], } assigned_focus = { "focus_id": "answer_shape|composeStage", "issue_codes": ["business_direct_answer_missing"], "root_cause_layers": ["answer_surface"], "allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"], "forbidden_patch_targets": ["routing rewrites"], "rerun_matrix": ["failed_scenario", "accepted_smoke_pack"], "target_ids": ["pack:s01"], } gate = dcl.evaluate_auto_coder_gate(repair_targets, assigned_focus) finally: dcl.load_issue_catalog = original_load_issue_catalog self.assertFalse(gate["allowed"]) self.assertIn( "catalog_missing_expected_answer_contract:business_direct_answer_missing", gate["blocking_reasons"], ) def test_lead_handoff_markdown_surfaces_auto_coder_gate_blockers(self) -> None: handoff = { "repair_mode": "lead-handoff", "loop_id": "demo_loop", "iteration_id": "iteration_00", "quality_score": 42, "target_score": 88, "loop_decision": "partial", "deterministic_gate_ok": False, "deterministic_gate_reason": "repair_targets_remaining=P0:1", "artifact_refs": { "business_audit": "artifacts/domain_runs/demo/business_audit.md", "analyst_verdict": "artifacts/domain_runs/demo/analyst_verdict.json", "repair_targets": "artifacts/domain_runs/demo/repair_targets.json", "auto_coder_gate": "artifacts/domain_runs/demo/auto_coder_gate.json", "pack_dir": "artifacts/domain_runs/demo/pack", }, "issue_codes": ["business_direct_answer_missing"], "rerun_matrix": ["failed_scenario", "accepted_smoke_pack"], "human_meaning": {"user_intent_summary": "User needs a direct answer."}, "top_repair_targets": [], "candidate_files": [], "lead_instructions": [], "auto_coder_gate": { "allowed": False, "reason": "target_missing_evidence_paths:pack:s01", "focus_id": "answer_shape|composeStage", "blocking_reasons": ["target_missing_evidence_paths:pack:s01"], "issue_catalog_contracts": { "business_direct_answer_missing": { "expected_answer_contract": "direct_answer_surface_v1", "root_layers": ["answer_surface"], "allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"], "forbidden_patch_targets": ["routing rewrites"], "rerun_matrix": ["failed_scenario", "accepted_smoke_pack"], } }, }, } markdown = dcl.build_lead_coder_handoff_markdown(handoff) self.assertIn("## Auto-Coder Gate", markdown) self.assertIn("target_missing_evidence_paths:pack:s01", markdown) self.assertIn("## Auto-Coder Catalog Contracts", markdown) self.assertIn("direct_answer_surface_v1", markdown) def test_analyst_priority_targets_become_lead_repair_targets(self) -> None: repair_targets = { "pack_id": "demo_pack", "domain": "demo", "target_count": 0, "severity_counts": {"P0": 0, "P1": 0, "P2": 0}, "priority_foci": [], "targets": [], } analyst_verdict = { "priority_targets": [ { "scenario_id": "svk_pivot", "step_id": "s03_summary", "severity": "P0", "problem_type": "bundle_reuse_gap", "fix_goal": "Reuse the confirmed SVK value-flow bundle in the final summary.", }, { "scenario_id": "biz_scope", "step_id": "s02_money", "severity": "P1", "problem_type": "field_mapping_gap", "fix_goal": "Separate cash source/recipient labels from client/supplier labels.", }, ] } merged = dcl.merge_analyst_priority_repair_targets(repair_targets, analyst_verdict) handoff = dcl.build_lead_coder_handoff( loop_state={"loop_id": "demo"}, iteration_id="iteration_00", pack_dir=Path("pack"), analyst_verdict_path=Path("analyst_verdict.json"), repair_targets_path=Path("semantic_repair_targets.json"), business_audit_path=Path("business_audit.md"), analyst_verdict={"quality_score": 73, "loop_decision": "continue"}, repair_targets=merged, target_score=88, loop_decision="continue", analyst_accepted_gate=False, accepted_gate=False, deterministic_gate_ok=True, deterministic_gate_reason="deterministic_gate_passed", requires_user_decision=False, user_decision_type="none", user_decision_prompt=None, ) self.assertEqual(merged["target_count"], 2) self.assertEqual(merged["severity_counts"]["P0"], 1) self.assertEqual(handoff["assigned_primary_focus"]["problem_type"], "bundle_reuse_gap") self.assertEqual(handoff["top_repair_targets"][0]["target_id"], "svk_pivot:s03_summary") self.assertIn( "llm_normalizer/backend/src/services/assistantMcpDiscoveryResponseCandidate.ts", handoff["candidate_files"], ) def test_stale_analyst_validation_target_is_suppressed_by_step_state(self) -> None: repair_targets = { "pack_id": "demo_pack", "domain": "demo", "target_count": 0, "severity_counts": {"P0": 0, "P1": 0, "P2": 0}, "priority_foci": [], "targets": [], "step_validation_index": { "legacy_canaries:s02_acc60": { "acceptance_status": "validated", "violated_invariants": [], "warnings": [], "runtime_factual_answer_validated": False, "guarded_insufficiency_validated": True, } }, } analyst_verdict = { "priority_targets": [ { "scenario_id": "legacy_canaries", "step_id": "s02_acc60", "severity": "P0", "problem_type": "evidence_gap", "fix_goal": ( "partial heuristic answer without runtime_factual_answer_validated " "or guarded_insufficiency_validated must not pass silently" ), }, { "scenario_id": "biz_scope", "step_id": "s03_best_year", "severity": "P2", "problem_type": "presentation_gap", "fix_goal": "Clarify why this year leads without implying pure profit.", }, ] } merged = dcl.merge_analyst_priority_repair_targets(repair_targets, analyst_verdict) self.assertEqual(merged["suppressed_analyst_priority_target_count"], 1) self.assertEqual(merged["target_count"], 1) self.assertEqual(merged["targets"][0]["target_id"], "biz_scope:s03_best_year") self.assertEqual(merged["severity_counts"]["P0"], 0) self.assertEqual(merged["severity_counts"]["P2"], 1) def test_bounded_mcp_evidence_gap_target_is_suppressed_by_step_state(self) -> None: repair_targets = { "pack_id": "demo_pack", "domain": "demo", "target_count": 0, "severity_counts": {"P0": 0, "P1": 0, "P2": 0}, "priority_foci": [], "targets": [], "step_validation_index": { "biz_scope:s03_best_year": { "acceptance_status": "validated", "violated_invariants": [], "warnings": [], "bounded_mcp_answer_validated": True, "mcp_discovery_response_applied": True, "mcp_discovery_response_candidate_status": "ready_for_guarded_use", "assistant_text_excerpt": ( "Коротко: самый доходный год в доступном денежном контуре 1С — 2015. " "Важно: входящие уперлись в лимит выборки MCP; это проверенный срез, " "не чистая бухгалтерская прибыль." ), } }, } analyst_verdict = { "priority_targets": [ { "scenario_id": "biz_scope", "step_id": "s03_best_year", "severity": "P0", "problem_type": "evidence_gap", "fix_goal": ( "Убрать asserted winner-year как подтвержденный факт, пока yearly ranking " "не имеет exact validated compute; legacy metadata says unsupported/blocked." ), } ] } merged = dcl.merge_analyst_priority_repair_targets(repair_targets, analyst_verdict) self.assertEqual(merged["suppressed_analyst_priority_target_count"], 1) self.assertEqual(merged["target_count"], 0) self.assertEqual(merged["severity_counts"], {"P0": 0, "P1": 0, "P2": 0}) def test_runtime_exact_followup_target_is_suppressed_when_focus_is_proven(self) -> None: repair_targets = { "pack_id": "demo_pack", "domain": "demo", "target_count": 0, "severity_counts": {"P0": 0, "P1": 0, "P2": 0}, "priority_foci": [], "targets": [], "step_validation_index": { "svk_pivot:s02_svk_docs": { "acceptance_status": "validated", "violated_invariants": [], "warnings": [], "runtime_factual_answer_validated": True, "assistant_text_excerpt": "Контрагент: Группа СВК. Найдено документов: 19.", "extracted_filters": {"counterparty": "Группа СВК"}, "focus_object": {"label": "Группа СВК"}, } }, } analyst_verdict = { "priority_targets": [ { "scenario_id": "svk_pivot", "step_id": "s02_svk_docs", "severity": "P1", "problem_type": "followup_action_resolution_gap", "fix_goal": ( "Добавить pack-level validation на object-centric carryover: docs follow-up " "и bundle reuse должны быть явно проверены через stable counterparty/focus." ), } ] } merged = dcl.merge_analyst_priority_repair_targets(repair_targets, analyst_verdict) self.assertEqual(merged["suppressed_analyst_priority_target_count"], 1) self.assertEqual(merged["target_count"], 0) self.assertEqual(merged["severity_counts"], {"P0": 0, "P1": 0, "P2": 0}) if __name__ == "__main__": unittest.main()