Учитывать результаты детекторов в repair gate

2026-05-24 15:03:41 +03:00 · 2026-05-24 15:03:41 +03:00 · 21bc9e953b
parent 55e2eee12e
commit 21bc9e953b
5 changed files with 225 additions and 10 deletions
--- a/docs/orchestration/schemas/auto_coder_gate.schema.json
+++ b/docs/orchestration/schemas/auto_coder_gate.schema.json
@ -81,6 +81,18 @@
        ]
      }
    },
+    "detector_results_summary": {
+      "type": "object",
+      "additionalProperties": true,
+      "properties": {
+        "status": {
+          "type": "string"
+        },
+        "signal_ok_for_auto_coder": {
+          "type": "boolean"
+        }
+      }
+    },
    "blocking_reasons": {
      "type": "array",
      "items": {
--- a/docs/orchestration/schemas/business_audit_contract.schema.json
+++ b/docs/orchestration/schemas/business_audit_contract.schema.json
@ -103,6 +103,18 @@
      "type": "object",
      "additionalProperties": true
    },
+    "detector_results_summary": {
+      "type": "object",
+      "additionalProperties": true,
+      "properties": {
+        "status": {
+          "type": "string"
+        },
+        "signal_ok_for_auto_coder": {
+          "type": "boolean"
+        }
+      }
+    },
    "rerun_matrix": {
      "type": "array",
      "items": {
--- a/docs/orchestration/schemas/domain_loop_lead_coder_handoff.schema.json
+++ b/docs/orchestration/schemas/domain_loop_lead_coder_handoff.schema.json
@ -94,6 +94,18 @@
        "type": "string"
      }
    },
+    "detector_results_summary": {
+      "type": "object",
+      "additionalProperties": true,
+      "properties": {
+        "status": {
+          "type": "string"
+        },
+        "signal_ok_for_auto_coder": {
+          "type": "boolean"
+        }
+      }
+    },
    "human_meaning": {
      "type": "object",
      "additionalProperties": true
--- a/scripts/domain_case_loop.py
+++ b/scripts/domain_case_loop.py
@ -4581,6 +4581,7 @@ def select_primary_repair_focus(repair_targets: dict[str, Any]) -> dict[str, Any
 def evaluate_auto_coder_gate(
    repair_targets: dict[str, Any],
    assigned_focus: dict[str, Any] | None,
+    detector_results: dict[str, Any] | None = None,
 ) -> dict[str, Any]:
    catalog = load_issue_catalog()
    issue_codes = normalize_string_list((assigned_focus or {}).get("issue_codes"))
@ -4593,6 +4594,7 @@ def evaluate_auto_coder_gate(
    catalog_allowed_patch_targets: list[str] = []
    catalog_forbidden_patch_targets: list[str] = []
    issue_catalog_contracts: dict[str, Any] = {}
+    detector_results_summary = summarize_detector_results(detector_results)

    if not assigned_focus:
        blocking_reasons.append("missing_assigned_focus")
@ -4703,6 +4705,8 @@ def evaluate_auto_coder_gate(
            blocking_reasons.append(f"target_missing_rerun_matrix:{target_id}")
        elif "accepted_smoke_pack" not in normalize_string_list(target.get("rerun_matrix")):
            blocking_reasons.append(f"target_missing_accepted_smoke_pack:{target_id}")
+    if detector_results is not None and not detector_results_summary["signal_ok_for_auto_coder"]:
+        blocking_reasons.append(f"detector_results_no_repair_signal:{detector_results_summary['status']}")

    allowed = not blocking_reasons
    return {
@ -4717,6 +4721,7 @@ def evaluate_auto_coder_gate(
        "rerun_matrix": rerun_matrix,
        "allowlisted_issue_codes": sorted(AUTO_CODER_ALLOWED_ISSUE_CODES),
        "issue_catalog_contracts": issue_catalog_contracts,
+        "detector_results_summary": detector_results_summary,
        "blocking_reasons": blocking_reasons,
        "reason": "auto_coder_gate_passed" if allowed else ";".join(blocking_reasons),
        "policy": {
@ -4726,6 +4731,7 @@ def evaluate_auto_coder_gate(
            "requires_target_evidence_paths": True,
            "requires_accepted_smoke_pack": True,
            "requires_catalog_limited_patch_scope": True,
+            "requires_detector_results_signal_when_available": True,
            "lead_owns_merge_and_acceptance": True,
        },
    }
@ -5213,6 +5219,48 @@ def build_detector_candidates(repair_targets: dict[str, Any], catalog: dict[str,
    }


+def summarize_detector_results(detector_results: dict[str, Any] | None, *, limit: int = 8) -> dict[str, Any]:
+    if not isinstance(detector_results, dict):
+        return {
+            "status": "not_run",
+            "detector_count": 0,
+            "pass": 0,
+            "fail": 0,
+            "review": 0,
+            "skipped": 0,
+            "failed_detectors": [],
+            "review_detectors": [],
+            "skipped_detectors": [],
+            "signal_ok_for_auto_coder": False,
+        }
+    summary = detector_results.get("summary") if isinstance(detector_results.get("summary"), dict) else {}
+    results = detector_results.get("results") if isinstance(detector_results.get("results"), list) else []
+
+    def _detectors_with_status(status: str) -> list[str]:
+        names: list[str] = []
+        for item in results:
+            if not isinstance(item, dict) or str(item.get("status") or "") != status:
+                continue
+            detector_name = str(item.get("detector") or "").strip()
+            if detector_name and detector_name not in names:
+                names.append(detector_name)
+        return names[:limit]
+
+    status = str(summary.get("status") or "skipped")
+    return {
+        "status": status,
+        "detector_count": int(summary.get("detector_count") or len(results)),
+        "pass": int(summary.get("pass") or 0),
+        "fail": int(summary.get("fail") or 0),
+        "review": int(summary.get("review") or 0),
+        "skipped": int(summary.get("skipped") or 0),
+        "failed_detectors": _detectors_with_status("fail"),
+        "review_detectors": _detectors_with_status("review"),
+        "skipped_detectors": _detectors_with_status("skipped"),
+        "signal_ok_for_auto_coder": status in {"fail", "review"},
+    }
+
+
 def build_blocking_issue_contract(target: dict[str, Any], catalog: dict[str, Any]) -> dict[str, Any]:
    issue_code = str(target.get("issue_code") or target.get("problem_type") or "other").strip()
    entry = issue_catalog_entry(issue_code, catalog)
@ -5257,6 +5305,7 @@ def build_business_audit_contract(
    rerun_matrix_path: Path | None = None,
    detector_candidates_path: Path | None = None,
    detector_results_path: Path | None = None,
+    detector_results: dict[str, Any] | None = None,
 ) -> dict[str, Any]:
    catalog = load_issue_catalog()
    targets = repair_targets.get("targets") if isinstance(repair_targets.get("targets"), list) else []
@ -5298,6 +5347,7 @@ def build_business_audit_contract(
            "severity_counts": repair_targets.get("severity_counts") or {},
            "priority_foci": _limited_dict_items(repair_targets.get("priority_foci"), limit=8),
        },
+        "detector_results_summary": summarize_detector_results(detector_results),
        "rerun_matrix": rerun_matrix,
        "artifact_refs": {
            "business_audit_md": repo_relative(business_audit_markdown_path),
@ -5410,6 +5460,7 @@ def build_lead_coder_handoff(
    rerun_matrix_path: Path | None = None,
    detector_candidates_path: Path | None = None,
    detector_results_path: Path | None = None,
+    detector_results: dict[str, Any] | None = None,
    analyst_verdict: dict[str, Any],
    repair_targets: dict[str, Any],
    target_score: int,
@ -5485,6 +5536,7 @@ def build_lead_coder_handoff(
        "artifact_refs": artifact_refs,
        "issue_codes": issue_codes,
        "rerun_matrix": rerun_matrix,
+        "detector_results_summary": summarize_detector_results(detector_results),
        "human_meaning": {
            "user_intent_summary": analyst_verdict.get("user_intent_summary"),
            "expected_direct_answer": analyst_verdict.get("expected_direct_answer"),
@ -5501,7 +5553,7 @@ def build_lead_coder_handoff(
        "candidate_files": candidate_files,
        "lead_instructions": [
            "Read business_audit.md first and judge the user-facing answer before debug metadata.",
-            "Use business_audit.json, issue_catalog_snapshot.json, rerun_matrix.json, and detector_candidates.json as the repair contract.",
+            "Use business_audit.json, issue_catalog_snapshot.json, rerun_matrix.json, detector_candidates.json, and detector_results.json as the repair contract.",
            "Inspect analyst_verdict.json and repair_targets.json only after the semantic defect is clear.",
            "Patch only inside allowed_patch_targets for the issue_code unless Lead Codex explicitly expands scope.",
            "Do not touch forbidden_patch_targets and do not repair by masking detector symptoms.",
@ -5516,6 +5568,9 @@ def build_lead_coder_handoff_markdown(handoff: dict[str, Any]) -> str:
    artifact_refs = handoff.get("artifact_refs") if isinstance(handoff.get("artifact_refs"), dict) else {}
    human_meaning = handoff.get("human_meaning") if isinstance(handoff.get("human_meaning"), dict) else {}
    auto_coder_gate = handoff.get("auto_coder_gate") if isinstance(handoff.get("auto_coder_gate"), dict) else {}
+    detector_summary = (
+        handoff.get("detector_results_summary") if isinstance(handoff.get("detector_results_summary"), dict) else {}
+    )
    lines = [
        "# Lead Codex repair handoff",
        "",
@ -5544,6 +5599,18 @@ def build_lead_coder_handoff_markdown(handoff: dict[str, Any]) -> str:
        f"- rerun_matrix: `{', '.join(normalize_string_list(handoff.get('rerun_matrix'))) or 'n/a'}`",
        "",
    ]
+    if detector_summary:
+        lines.extend(
+            [
+                "## Detector Results",
+                f"- status: `{detector_summary.get('status') or 'n/a'}`",
+                f"- counts: `pass={detector_summary.get('pass') or 0}, fail={detector_summary.get('fail') or 0}, review={detector_summary.get('review') or 0}, skipped={detector_summary.get('skipped') or 0}`",
+                f"- failed_detectors: `{', '.join(normalize_string_list(detector_summary.get('failed_detectors'))) or 'none'}`",
+                f"- review_detectors: `{', '.join(normalize_string_list(detector_summary.get('review_detectors'))) or 'none'}`",
+                f"- skipped_detectors: `{', '.join(normalize_string_list(detector_summary.get('skipped_detectors'))) or 'none'}`",
+                "",
+            ]
+        )
    if auto_coder_gate:
        lines.extend(
            [
@ -5799,6 +5866,7 @@ def build_loop_summary(loop_state: dict[str, Any]) -> str:
        "## Iterations",
    ]
    for item in loop_state.get("iterations", []):
+        detector_summary = item.get("detector_results_summary") if isinstance(item.get("detector_results_summary"), dict) else {}
        lines.extend(
            [
                f"- `{item['iteration_id']}`",
@ -5822,6 +5890,8 @@ def build_loop_summary(loop_state: dict[str, Any]) -> str:
                f"  detector_candidates: `{item.get('detector_candidates_path') or 'n/a'}`",
                f"  detector_results: `{item.get('detector_results_path') or 'n/a'}`",
                f"  detector_results_status: `{item.get('detector_results_status') or 'n/a'}`",
+                f"  detector_failed: `{', '.join(normalize_string_list(detector_summary.get('failed_detectors'))) or 'none'}`",
+                f"  detector_review: `{', '.join(normalize_string_list(detector_summary.get('review_detectors'))) or 'none'}`",
                f"  auto_coder_gate: `{item.get('auto_coder_gate_path') or 'n/a'}`",
                f"  lead_coder_handoff: `{item.get('lead_coder_handoff_path') or 'n/a'}`",
                f"  repair_target_count: `{item.get('repair_target_count')}`",
@ -5987,6 +6057,14 @@ def handle_run_pack_loop(args: argparse.Namespace) -> int:
            "items": collect_rerun_matrix(repair_targets),
        }
        detector_candidates = build_detector_candidates(repair_targets)
+        write_json(detector_candidates_path, detector_candidates)
+        detector_results = agent_detector_runner.build_detector_results(
+            pack_dir,
+            detector_candidates_path=detector_candidates_path,
+            include_default_global=False,
+        )
+        write_json(detector_results_path, detector_results)
+        detector_results_summary = summarize_detector_results(detector_results)
        business_audit_contract = build_business_audit_contract(
            analyst_verdict=analyst_verdict,
            repair_targets=repair_targets,
@ -6004,17 +6082,11 @@ def handle_run_pack_loop(args: argparse.Namespace) -> int:
            rerun_matrix_path=rerun_matrix_path,
            detector_candidates_path=detector_candidates_path,
            detector_results_path=detector_results_path,
+            detector_results=detector_results,
        )
        write_json(business_audit_json_path, business_audit_contract)
        write_json(issue_catalog_snapshot_path, issue_catalog_snapshot)
        write_json(rerun_matrix_path, rerun_matrix_contract)
-        write_json(detector_candidates_path, detector_candidates)
-        detector_results = agent_detector_runner.build_detector_results(
-            pack_dir,
-            detector_candidates_path=detector_candidates_path,
-            include_default_global=False,
-        )
-        write_json(detector_results_path, detector_results)
        repair_target_count = int(repair_targets.get("target_count") or 0) if isinstance(repair_targets, dict) else 0
        repair_target_severity_counts = (
            repair_targets.get("severity_counts")
@ -6051,7 +6123,8 @@ def handle_run_pack_loop(args: argparse.Namespace) -> int:
            "rerun_matrix_path": str(rerun_matrix_path),
            "detector_candidates_path": str(detector_candidates_path),
            "detector_results_path": str(detector_results_path),
-            "detector_results_status": detector_results.get("summary", {}).get("status"),
+            "detector_results_status": detector_results_summary.get("status"),
+            "detector_results_summary": detector_results_summary,
            "repair_target_count": repair_target_count,
            "repair_target_severity_counts": repair_target_severity_counts,
            "coder_status": None,
@ -6098,6 +6171,7 @@ def handle_run_pack_loop(args: argparse.Namespace) -> int:
                rerun_matrix_path=rerun_matrix_path,
                detector_candidates_path=detector_candidates_path,
                detector_results_path=detector_results_path,
+                detector_results=detector_results,
                analyst_verdict=analyst_verdict,
                repair_targets=repair_targets,
                target_score=target_score,
@ -6137,7 +6211,7 @@ def handle_run_pack_loop(args: argparse.Namespace) -> int:

        assigned_focus = select_primary_repair_focus(repair_targets)
        auto_coder_gate_path = iteration_dir / "auto_coder_gate.json"
-        auto_coder_gate = evaluate_auto_coder_gate(repair_targets, assigned_focus)
+        auto_coder_gate = evaluate_auto_coder_gate(repair_targets, assigned_focus, detector_results=detector_results)
        write_json(auto_coder_gate_path, auto_coder_gate)
        iteration_record["auto_coder_gate_path"] = str(auto_coder_gate_path)
        if not bool(auto_coder_gate.get("allowed")):
@ -6153,6 +6227,7 @@ def handle_run_pack_loop(args: argparse.Namespace) -> int:
                rerun_matrix_path=rerun_matrix_path,
                detector_candidates_path=detector_candidates_path,
                detector_results_path=detector_results_path,
+                detector_results=detector_results,
                analyst_verdict=analyst_verdict,
                repair_targets=repair_targets,
                target_score=target_score,
--- a/scripts/test_domain_case_loop_lead_handoff.py
+++ b/scripts/test_domain_case_loop_lead_handoff.py
@ -69,6 +69,11 @@ class DomainCaseLoopLeadHandoffTests(unittest.TestCase):
                issue_catalog_snapshot_path=iteration_dir / "issue_catalog_snapshot.json",
                rerun_matrix_path=iteration_dir / "rerun_matrix.json",
                detector_candidates_path=iteration_dir / "detector_candidates.json",
+                detector_results_path=iteration_dir / "detector_results.json",
+                detector_results={
+                    "summary": {"status": "fail", "detector_count": 1, "pass": 0, "fail": 1, "review": 0, "skipped": 0},
+                    "results": [{"detector": "first_line_not_direct_answer", "status": "fail"}],
+                },
                analyst_verdict=analyst_verdict,
                repair_targets=repair_targets,
                target_score=88,
@ -96,6 +101,9 @@ class DomainCaseLoopLeadHandoffTests(unittest.TestCase):
        self.assertIn("business_audit", saved["artifact_refs"])
        self.assertIn("business_audit_json", saved["artifact_refs"])
        self.assertIn("issue_catalog_snapshot", saved["artifact_refs"])
+        self.assertIn("detector_results", saved["artifact_refs"])
+        self.assertEqual(saved["detector_results_summary"]["status"], "fail")
+        self.assertEqual(saved["detector_results_summary"]["failed_detectors"], ["first_line_not_direct_answer"])
        self.assertIn("business_direct_answer_missing", saved["issue_codes"])
        self.assertIn("failed_scenario", saved["rerun_matrix"])
        self.assertTrue(latest_handoff_exists)
@ -140,6 +148,11 @@ class DomainCaseLoopLeadHandoffTests(unittest.TestCase):
            issue_catalog_snapshot_path=Path("issue_catalog_snapshot.json"),
            rerun_matrix_path=Path("rerun_matrix.json"),
            detector_candidates_path=Path("detector_candidates.json"),
+            detector_results_path=Path("detector_results.json"),
+            detector_results={
+                "summary": {"status": "review", "detector_count": 1, "pass": 0, "fail": 0, "review": 1, "skipped": 0},
+                "results": [{"detector": "missing_revenue_cogs_margin_fields", "status": "review"}],
+            },
        )

        self.assertEqual(contract["overall_status"], "partial")
@ -147,6 +160,9 @@ class DomainCaseLoopLeadHandoffTests(unittest.TestCase):
        self.assertEqual(contract["blocking_issues"][0]["expected_business_answer_contract"], "margin_profitability_v1")
        self.assertIn("failed_margin_scenario", contract["rerun_matrix"])
        self.assertIn("detector_candidates_json", contract["artifact_refs"])
+        self.assertIn("detector_results_json", contract["artifact_refs"])
+        self.assertEqual(contract["detector_results_summary"]["status"], "review")
+        self.assertEqual(contract["detector_results_summary"]["review_detectors"], ["missing_revenue_cogs_margin_fields"])

    def test_auto_coder_gate_blocks_non_allowlisted_issue_codes(self) -> None:
        repair_targets = {
@ -209,6 +225,81 @@ class DomainCaseLoopLeadHandoffTests(unittest.TestCase):
            "direct_answer_surface_v1",
        )

+    def test_auto_coder_gate_blocks_when_detector_results_have_no_repair_signal(self) -> None:
+        repair_targets = {
+            "targets": [
+                {
+                    "target_id": "pack:s01",
+                    "issue_code": "business_direct_answer_missing",
+                    "root_cause_layers": ["answer_surface"],
+                    "expected_business_answer_contract": "direct_answer_surface_v1",
+                    "evidence_paths": ["artifacts/domain_runs/pack/steps/s01/output.md"],
+                    "allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"],
+                    "forbidden_patch_targets": ["routing rewrites", "fake evidence", "global runtime rewrite"],
+                    "rerun_matrix": ["failed_scenario", "direct_answer_surface_pack", "accepted_smoke_pack"],
+                }
+            ],
+        }
+        assigned_focus = {
+            "focus_id": "answer_shape|composeStage",
+            "issue_codes": ["business_direct_answer_missing"],
+            "root_cause_layers": ["answer_surface"],
+            "allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"],
+            "forbidden_patch_targets": ["routing rewrites", "fake evidence", "global runtime rewrite"],
+            "rerun_matrix": ["failed_scenario", "direct_answer_surface_pack", "accepted_smoke_pack"],
+            "target_ids": ["pack:s01"],
+        }
+
+        gate = dcl.evaluate_auto_coder_gate(
+            repair_targets,
+            assigned_focus,
+            detector_results={
+                "summary": {"status": "pass", "detector_count": 1, "pass": 1, "fail": 0, "review": 0, "skipped": 0},
+                "results": [{"detector": "first_line_not_direct_answer", "status": "pass"}],
+            },
+        )
+
+        self.assertFalse(gate["allowed"])
+        self.assertEqual(gate["detector_results_summary"]["status"], "pass")
+        self.assertIn("detector_results_no_repair_signal:pass", gate["blocking_reasons"])
+
+    def test_auto_coder_gate_allows_when_detector_results_confirm_failure(self) -> None:
+        repair_targets = {
+            "targets": [
+                {
+                    "target_id": "pack:s01",
+                    "issue_code": "business_direct_answer_missing",
+                    "root_cause_layers": ["answer_surface"],
+                    "expected_business_answer_contract": "direct_answer_surface_v1",
+                    "evidence_paths": ["artifacts/domain_runs/pack/steps/s01/output.md"],
+                    "allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"],
+                    "forbidden_patch_targets": ["routing rewrites", "fake evidence", "global runtime rewrite"],
+                    "rerun_matrix": ["failed_scenario", "direct_answer_surface_pack", "accepted_smoke_pack"],
+                }
+            ],
+        }
+        assigned_focus = {
+            "focus_id": "answer_shape|composeStage",
+            "issue_codes": ["business_direct_answer_missing"],
+            "root_cause_layers": ["answer_surface"],
+            "allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"],
+            "forbidden_patch_targets": ["routing rewrites", "fake evidence", "global runtime rewrite"],
+            "rerun_matrix": ["failed_scenario", "direct_answer_surface_pack", "accepted_smoke_pack"],
+            "target_ids": ["pack:s01"],
+        }
+
+        gate = dcl.evaluate_auto_coder_gate(
+            repair_targets,
+            assigned_focus,
+            detector_results={
+                "summary": {"status": "fail", "detector_count": 1, "pass": 0, "fail": 1, "review": 0, "skipped": 0},
+                "results": [{"detector": "first_line_not_direct_answer", "status": "fail"}],
+            },
+        )
+
+        self.assertTrue(gate["allowed"])
+        self.assertEqual(gate["detector_results_summary"]["failed_detectors"], ["first_line_not_direct_answer"])
+
    def test_auto_coder_gate_blocks_broad_or_blind_patch_scope(self) -> None:
        repair_targets = {
            "targets": [
@ -302,6 +393,7 @@ class DomainCaseLoopLeadHandoffTests(unittest.TestCase):
                "business_audit": "artifacts/domain_runs/demo/business_audit.md",
                "analyst_verdict": "artifacts/domain_runs/demo/analyst_verdict.json",
                "repair_targets": "artifacts/domain_runs/demo/repair_targets.json",
+                "detector_results": "artifacts/domain_runs/demo/detector_results.json",
                "auto_coder_gate": "artifacts/domain_runs/demo/auto_coder_gate.json",
                "pack_dir": "artifacts/domain_runs/demo/pack",
            },
@ -311,6 +403,16 @@ class DomainCaseLoopLeadHandoffTests(unittest.TestCase):
            "top_repair_targets": [],
            "candidate_files": [],
            "lead_instructions": [],
+            "detector_results_summary": {
+                "status": "fail",
+                "pass": 0,
+                "fail": 1,
+                "review": 0,
+                "skipped": 0,
+                "failed_detectors": ["first_line_not_direct_answer"],
+                "review_detectors": [],
+                "skipped_detectors": [],
+            },
            "auto_coder_gate": {
                "allowed": False,
                "reason": "target_missing_evidence_paths:pack:s01",
@ -331,6 +433,8 @@ class DomainCaseLoopLeadHandoffTests(unittest.TestCase):
        markdown = dcl.build_lead_coder_handoff_markdown(handoff)

        self.assertIn("## Auto-Coder Gate", markdown)
+        self.assertIn("## Detector Results", markdown)
+        self.assertIn("first_line_not_direct_answer", markdown)
        self.assertIn("target_missing_evidence_paths:pack:s01", markdown)
        self.assertIn("## Auto-Coder Catalog Contracts", markdown)
        self.assertIn("direct_answer_surface_v1", markdown)