Учитывать результаты детекторов в repair gate
This commit is contained in:
parent
55e2eee12e
commit
21bc9e953b
|
|
@ -81,6 +81,18 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"detector_results_summary": {
|
||||
"type": "object",
|
||||
"additionalProperties": true,
|
||||
"properties": {
|
||||
"status": {
|
||||
"type": "string"
|
||||
},
|
||||
"signal_ok_for_auto_coder": {
|
||||
"type": "boolean"
|
||||
}
|
||||
}
|
||||
},
|
||||
"blocking_reasons": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
|
|
|
|||
|
|
@ -103,6 +103,18 @@
|
|||
"type": "object",
|
||||
"additionalProperties": true
|
||||
},
|
||||
"detector_results_summary": {
|
||||
"type": "object",
|
||||
"additionalProperties": true,
|
||||
"properties": {
|
||||
"status": {
|
||||
"type": "string"
|
||||
},
|
||||
"signal_ok_for_auto_coder": {
|
||||
"type": "boolean"
|
||||
}
|
||||
}
|
||||
},
|
||||
"rerun_matrix": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
|
|
|
|||
|
|
@ -94,6 +94,18 @@
|
|||
"type": "string"
|
||||
}
|
||||
},
|
||||
"detector_results_summary": {
|
||||
"type": "object",
|
||||
"additionalProperties": true,
|
||||
"properties": {
|
||||
"status": {
|
||||
"type": "string"
|
||||
},
|
||||
"signal_ok_for_auto_coder": {
|
||||
"type": "boolean"
|
||||
}
|
||||
}
|
||||
},
|
||||
"human_meaning": {
|
||||
"type": "object",
|
||||
"additionalProperties": true
|
||||
|
|
|
|||
|
|
@ -4581,6 +4581,7 @@ def select_primary_repair_focus(repair_targets: dict[str, Any]) -> dict[str, Any
|
|||
def evaluate_auto_coder_gate(
|
||||
repair_targets: dict[str, Any],
|
||||
assigned_focus: dict[str, Any] | None,
|
||||
detector_results: dict[str, Any] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
catalog = load_issue_catalog()
|
||||
issue_codes = normalize_string_list((assigned_focus or {}).get("issue_codes"))
|
||||
|
|
@ -4593,6 +4594,7 @@ def evaluate_auto_coder_gate(
|
|||
catalog_allowed_patch_targets: list[str] = []
|
||||
catalog_forbidden_patch_targets: list[str] = []
|
||||
issue_catalog_contracts: dict[str, Any] = {}
|
||||
detector_results_summary = summarize_detector_results(detector_results)
|
||||
|
||||
if not assigned_focus:
|
||||
blocking_reasons.append("missing_assigned_focus")
|
||||
|
|
@ -4703,6 +4705,8 @@ def evaluate_auto_coder_gate(
|
|||
blocking_reasons.append(f"target_missing_rerun_matrix:{target_id}")
|
||||
elif "accepted_smoke_pack" not in normalize_string_list(target.get("rerun_matrix")):
|
||||
blocking_reasons.append(f"target_missing_accepted_smoke_pack:{target_id}")
|
||||
if detector_results is not None and not detector_results_summary["signal_ok_for_auto_coder"]:
|
||||
blocking_reasons.append(f"detector_results_no_repair_signal:{detector_results_summary['status']}")
|
||||
|
||||
allowed = not blocking_reasons
|
||||
return {
|
||||
|
|
@ -4717,6 +4721,7 @@ def evaluate_auto_coder_gate(
|
|||
"rerun_matrix": rerun_matrix,
|
||||
"allowlisted_issue_codes": sorted(AUTO_CODER_ALLOWED_ISSUE_CODES),
|
||||
"issue_catalog_contracts": issue_catalog_contracts,
|
||||
"detector_results_summary": detector_results_summary,
|
||||
"blocking_reasons": blocking_reasons,
|
||||
"reason": "auto_coder_gate_passed" if allowed else ";".join(blocking_reasons),
|
||||
"policy": {
|
||||
|
|
@ -4726,6 +4731,7 @@ def evaluate_auto_coder_gate(
|
|||
"requires_target_evidence_paths": True,
|
||||
"requires_accepted_smoke_pack": True,
|
||||
"requires_catalog_limited_patch_scope": True,
|
||||
"requires_detector_results_signal_when_available": True,
|
||||
"lead_owns_merge_and_acceptance": True,
|
||||
},
|
||||
}
|
||||
|
|
@ -5213,6 +5219,48 @@ def build_detector_candidates(repair_targets: dict[str, Any], catalog: dict[str,
|
|||
}
|
||||
|
||||
|
||||
def summarize_detector_results(detector_results: dict[str, Any] | None, *, limit: int = 8) -> dict[str, Any]:
|
||||
if not isinstance(detector_results, dict):
|
||||
return {
|
||||
"status": "not_run",
|
||||
"detector_count": 0,
|
||||
"pass": 0,
|
||||
"fail": 0,
|
||||
"review": 0,
|
||||
"skipped": 0,
|
||||
"failed_detectors": [],
|
||||
"review_detectors": [],
|
||||
"skipped_detectors": [],
|
||||
"signal_ok_for_auto_coder": False,
|
||||
}
|
||||
summary = detector_results.get("summary") if isinstance(detector_results.get("summary"), dict) else {}
|
||||
results = detector_results.get("results") if isinstance(detector_results.get("results"), list) else []
|
||||
|
||||
def _detectors_with_status(status: str) -> list[str]:
|
||||
names: list[str] = []
|
||||
for item in results:
|
||||
if not isinstance(item, dict) or str(item.get("status") or "") != status:
|
||||
continue
|
||||
detector_name = str(item.get("detector") or "").strip()
|
||||
if detector_name and detector_name not in names:
|
||||
names.append(detector_name)
|
||||
return names[:limit]
|
||||
|
||||
status = str(summary.get("status") or "skipped")
|
||||
return {
|
||||
"status": status,
|
||||
"detector_count": int(summary.get("detector_count") or len(results)),
|
||||
"pass": int(summary.get("pass") or 0),
|
||||
"fail": int(summary.get("fail") or 0),
|
||||
"review": int(summary.get("review") or 0),
|
||||
"skipped": int(summary.get("skipped") or 0),
|
||||
"failed_detectors": _detectors_with_status("fail"),
|
||||
"review_detectors": _detectors_with_status("review"),
|
||||
"skipped_detectors": _detectors_with_status("skipped"),
|
||||
"signal_ok_for_auto_coder": status in {"fail", "review"},
|
||||
}
|
||||
|
||||
|
||||
def build_blocking_issue_contract(target: dict[str, Any], catalog: dict[str, Any]) -> dict[str, Any]:
|
||||
issue_code = str(target.get("issue_code") or target.get("problem_type") or "other").strip()
|
||||
entry = issue_catalog_entry(issue_code, catalog)
|
||||
|
|
@ -5257,6 +5305,7 @@ def build_business_audit_contract(
|
|||
rerun_matrix_path: Path | None = None,
|
||||
detector_candidates_path: Path | None = None,
|
||||
detector_results_path: Path | None = None,
|
||||
detector_results: dict[str, Any] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
catalog = load_issue_catalog()
|
||||
targets = repair_targets.get("targets") if isinstance(repair_targets.get("targets"), list) else []
|
||||
|
|
@ -5298,6 +5347,7 @@ def build_business_audit_contract(
|
|||
"severity_counts": repair_targets.get("severity_counts") or {},
|
||||
"priority_foci": _limited_dict_items(repair_targets.get("priority_foci"), limit=8),
|
||||
},
|
||||
"detector_results_summary": summarize_detector_results(detector_results),
|
||||
"rerun_matrix": rerun_matrix,
|
||||
"artifact_refs": {
|
||||
"business_audit_md": repo_relative(business_audit_markdown_path),
|
||||
|
|
@ -5410,6 +5460,7 @@ def build_lead_coder_handoff(
|
|||
rerun_matrix_path: Path | None = None,
|
||||
detector_candidates_path: Path | None = None,
|
||||
detector_results_path: Path | None = None,
|
||||
detector_results: dict[str, Any] | None = None,
|
||||
analyst_verdict: dict[str, Any],
|
||||
repair_targets: dict[str, Any],
|
||||
target_score: int,
|
||||
|
|
@ -5485,6 +5536,7 @@ def build_lead_coder_handoff(
|
|||
"artifact_refs": artifact_refs,
|
||||
"issue_codes": issue_codes,
|
||||
"rerun_matrix": rerun_matrix,
|
||||
"detector_results_summary": summarize_detector_results(detector_results),
|
||||
"human_meaning": {
|
||||
"user_intent_summary": analyst_verdict.get("user_intent_summary"),
|
||||
"expected_direct_answer": analyst_verdict.get("expected_direct_answer"),
|
||||
|
|
@ -5501,7 +5553,7 @@ def build_lead_coder_handoff(
|
|||
"candidate_files": candidate_files,
|
||||
"lead_instructions": [
|
||||
"Read business_audit.md first and judge the user-facing answer before debug metadata.",
|
||||
"Use business_audit.json, issue_catalog_snapshot.json, rerun_matrix.json, and detector_candidates.json as the repair contract.",
|
||||
"Use business_audit.json, issue_catalog_snapshot.json, rerun_matrix.json, detector_candidates.json, and detector_results.json as the repair contract.",
|
||||
"Inspect analyst_verdict.json and repair_targets.json only after the semantic defect is clear.",
|
||||
"Patch only inside allowed_patch_targets for the issue_code unless Lead Codex explicitly expands scope.",
|
||||
"Do not touch forbidden_patch_targets and do not repair by masking detector symptoms.",
|
||||
|
|
@ -5516,6 +5568,9 @@ def build_lead_coder_handoff_markdown(handoff: dict[str, Any]) -> str:
|
|||
artifact_refs = handoff.get("artifact_refs") if isinstance(handoff.get("artifact_refs"), dict) else {}
|
||||
human_meaning = handoff.get("human_meaning") if isinstance(handoff.get("human_meaning"), dict) else {}
|
||||
auto_coder_gate = handoff.get("auto_coder_gate") if isinstance(handoff.get("auto_coder_gate"), dict) else {}
|
||||
detector_summary = (
|
||||
handoff.get("detector_results_summary") if isinstance(handoff.get("detector_results_summary"), dict) else {}
|
||||
)
|
||||
lines = [
|
||||
"# Lead Codex repair handoff",
|
||||
"",
|
||||
|
|
@ -5544,6 +5599,18 @@ def build_lead_coder_handoff_markdown(handoff: dict[str, Any]) -> str:
|
|||
f"- rerun_matrix: `{', '.join(normalize_string_list(handoff.get('rerun_matrix'))) or 'n/a'}`",
|
||||
"",
|
||||
]
|
||||
if detector_summary:
|
||||
lines.extend(
|
||||
[
|
||||
"## Detector Results",
|
||||
f"- status: `{detector_summary.get('status') or 'n/a'}`",
|
||||
f"- counts: `pass={detector_summary.get('pass') or 0}, fail={detector_summary.get('fail') or 0}, review={detector_summary.get('review') or 0}, skipped={detector_summary.get('skipped') or 0}`",
|
||||
f"- failed_detectors: `{', '.join(normalize_string_list(detector_summary.get('failed_detectors'))) or 'none'}`",
|
||||
f"- review_detectors: `{', '.join(normalize_string_list(detector_summary.get('review_detectors'))) or 'none'}`",
|
||||
f"- skipped_detectors: `{', '.join(normalize_string_list(detector_summary.get('skipped_detectors'))) or 'none'}`",
|
||||
"",
|
||||
]
|
||||
)
|
||||
if auto_coder_gate:
|
||||
lines.extend(
|
||||
[
|
||||
|
|
@ -5799,6 +5866,7 @@ def build_loop_summary(loop_state: dict[str, Any]) -> str:
|
|||
"## Iterations",
|
||||
]
|
||||
for item in loop_state.get("iterations", []):
|
||||
detector_summary = item.get("detector_results_summary") if isinstance(item.get("detector_results_summary"), dict) else {}
|
||||
lines.extend(
|
||||
[
|
||||
f"- `{item['iteration_id']}`",
|
||||
|
|
@ -5822,6 +5890,8 @@ def build_loop_summary(loop_state: dict[str, Any]) -> str:
|
|||
f" detector_candidates: `{item.get('detector_candidates_path') or 'n/a'}`",
|
||||
f" detector_results: `{item.get('detector_results_path') or 'n/a'}`",
|
||||
f" detector_results_status: `{item.get('detector_results_status') or 'n/a'}`",
|
||||
f" detector_failed: `{', '.join(normalize_string_list(detector_summary.get('failed_detectors'))) or 'none'}`",
|
||||
f" detector_review: `{', '.join(normalize_string_list(detector_summary.get('review_detectors'))) or 'none'}`",
|
||||
f" auto_coder_gate: `{item.get('auto_coder_gate_path') or 'n/a'}`",
|
||||
f" lead_coder_handoff: `{item.get('lead_coder_handoff_path') or 'n/a'}`",
|
||||
f" repair_target_count: `{item.get('repair_target_count')}`",
|
||||
|
|
@ -5987,6 +6057,14 @@ def handle_run_pack_loop(args: argparse.Namespace) -> int:
|
|||
"items": collect_rerun_matrix(repair_targets),
|
||||
}
|
||||
detector_candidates = build_detector_candidates(repair_targets)
|
||||
write_json(detector_candidates_path, detector_candidates)
|
||||
detector_results = agent_detector_runner.build_detector_results(
|
||||
pack_dir,
|
||||
detector_candidates_path=detector_candidates_path,
|
||||
include_default_global=False,
|
||||
)
|
||||
write_json(detector_results_path, detector_results)
|
||||
detector_results_summary = summarize_detector_results(detector_results)
|
||||
business_audit_contract = build_business_audit_contract(
|
||||
analyst_verdict=analyst_verdict,
|
||||
repair_targets=repair_targets,
|
||||
|
|
@ -6004,17 +6082,11 @@ def handle_run_pack_loop(args: argparse.Namespace) -> int:
|
|||
rerun_matrix_path=rerun_matrix_path,
|
||||
detector_candidates_path=detector_candidates_path,
|
||||
detector_results_path=detector_results_path,
|
||||
detector_results=detector_results,
|
||||
)
|
||||
write_json(business_audit_json_path, business_audit_contract)
|
||||
write_json(issue_catalog_snapshot_path, issue_catalog_snapshot)
|
||||
write_json(rerun_matrix_path, rerun_matrix_contract)
|
||||
write_json(detector_candidates_path, detector_candidates)
|
||||
detector_results = agent_detector_runner.build_detector_results(
|
||||
pack_dir,
|
||||
detector_candidates_path=detector_candidates_path,
|
||||
include_default_global=False,
|
||||
)
|
||||
write_json(detector_results_path, detector_results)
|
||||
repair_target_count = int(repair_targets.get("target_count") or 0) if isinstance(repair_targets, dict) else 0
|
||||
repair_target_severity_counts = (
|
||||
repair_targets.get("severity_counts")
|
||||
|
|
@ -6051,7 +6123,8 @@ def handle_run_pack_loop(args: argparse.Namespace) -> int:
|
|||
"rerun_matrix_path": str(rerun_matrix_path),
|
||||
"detector_candidates_path": str(detector_candidates_path),
|
||||
"detector_results_path": str(detector_results_path),
|
||||
"detector_results_status": detector_results.get("summary", {}).get("status"),
|
||||
"detector_results_status": detector_results_summary.get("status"),
|
||||
"detector_results_summary": detector_results_summary,
|
||||
"repair_target_count": repair_target_count,
|
||||
"repair_target_severity_counts": repair_target_severity_counts,
|
||||
"coder_status": None,
|
||||
|
|
@ -6098,6 +6171,7 @@ def handle_run_pack_loop(args: argparse.Namespace) -> int:
|
|||
rerun_matrix_path=rerun_matrix_path,
|
||||
detector_candidates_path=detector_candidates_path,
|
||||
detector_results_path=detector_results_path,
|
||||
detector_results=detector_results,
|
||||
analyst_verdict=analyst_verdict,
|
||||
repair_targets=repair_targets,
|
||||
target_score=target_score,
|
||||
|
|
@ -6137,7 +6211,7 @@ def handle_run_pack_loop(args: argparse.Namespace) -> int:
|
|||
|
||||
assigned_focus = select_primary_repair_focus(repair_targets)
|
||||
auto_coder_gate_path = iteration_dir / "auto_coder_gate.json"
|
||||
auto_coder_gate = evaluate_auto_coder_gate(repair_targets, assigned_focus)
|
||||
auto_coder_gate = evaluate_auto_coder_gate(repair_targets, assigned_focus, detector_results=detector_results)
|
||||
write_json(auto_coder_gate_path, auto_coder_gate)
|
||||
iteration_record["auto_coder_gate_path"] = str(auto_coder_gate_path)
|
||||
if not bool(auto_coder_gate.get("allowed")):
|
||||
|
|
@ -6153,6 +6227,7 @@ def handle_run_pack_loop(args: argparse.Namespace) -> int:
|
|||
rerun_matrix_path=rerun_matrix_path,
|
||||
detector_candidates_path=detector_candidates_path,
|
||||
detector_results_path=detector_results_path,
|
||||
detector_results=detector_results,
|
||||
analyst_verdict=analyst_verdict,
|
||||
repair_targets=repair_targets,
|
||||
target_score=target_score,
|
||||
|
|
|
|||
|
|
@ -69,6 +69,11 @@ class DomainCaseLoopLeadHandoffTests(unittest.TestCase):
|
|||
issue_catalog_snapshot_path=iteration_dir / "issue_catalog_snapshot.json",
|
||||
rerun_matrix_path=iteration_dir / "rerun_matrix.json",
|
||||
detector_candidates_path=iteration_dir / "detector_candidates.json",
|
||||
detector_results_path=iteration_dir / "detector_results.json",
|
||||
detector_results={
|
||||
"summary": {"status": "fail", "detector_count": 1, "pass": 0, "fail": 1, "review": 0, "skipped": 0},
|
||||
"results": [{"detector": "first_line_not_direct_answer", "status": "fail"}],
|
||||
},
|
||||
analyst_verdict=analyst_verdict,
|
||||
repair_targets=repair_targets,
|
||||
target_score=88,
|
||||
|
|
@ -96,6 +101,9 @@ class DomainCaseLoopLeadHandoffTests(unittest.TestCase):
|
|||
self.assertIn("business_audit", saved["artifact_refs"])
|
||||
self.assertIn("business_audit_json", saved["artifact_refs"])
|
||||
self.assertIn("issue_catalog_snapshot", saved["artifact_refs"])
|
||||
self.assertIn("detector_results", saved["artifact_refs"])
|
||||
self.assertEqual(saved["detector_results_summary"]["status"], "fail")
|
||||
self.assertEqual(saved["detector_results_summary"]["failed_detectors"], ["first_line_not_direct_answer"])
|
||||
self.assertIn("business_direct_answer_missing", saved["issue_codes"])
|
||||
self.assertIn("failed_scenario", saved["rerun_matrix"])
|
||||
self.assertTrue(latest_handoff_exists)
|
||||
|
|
@ -140,6 +148,11 @@ class DomainCaseLoopLeadHandoffTests(unittest.TestCase):
|
|||
issue_catalog_snapshot_path=Path("issue_catalog_snapshot.json"),
|
||||
rerun_matrix_path=Path("rerun_matrix.json"),
|
||||
detector_candidates_path=Path("detector_candidates.json"),
|
||||
detector_results_path=Path("detector_results.json"),
|
||||
detector_results={
|
||||
"summary": {"status": "review", "detector_count": 1, "pass": 0, "fail": 0, "review": 1, "skipped": 0},
|
||||
"results": [{"detector": "missing_revenue_cogs_margin_fields", "status": "review"}],
|
||||
},
|
||||
)
|
||||
|
||||
self.assertEqual(contract["overall_status"], "partial")
|
||||
|
|
@ -147,6 +160,9 @@ class DomainCaseLoopLeadHandoffTests(unittest.TestCase):
|
|||
self.assertEqual(contract["blocking_issues"][0]["expected_business_answer_contract"], "margin_profitability_v1")
|
||||
self.assertIn("failed_margin_scenario", contract["rerun_matrix"])
|
||||
self.assertIn("detector_candidates_json", contract["artifact_refs"])
|
||||
self.assertIn("detector_results_json", contract["artifact_refs"])
|
||||
self.assertEqual(contract["detector_results_summary"]["status"], "review")
|
||||
self.assertEqual(contract["detector_results_summary"]["review_detectors"], ["missing_revenue_cogs_margin_fields"])
|
||||
|
||||
def test_auto_coder_gate_blocks_non_allowlisted_issue_codes(self) -> None:
|
||||
repair_targets = {
|
||||
|
|
@ -209,6 +225,81 @@ class DomainCaseLoopLeadHandoffTests(unittest.TestCase):
|
|||
"direct_answer_surface_v1",
|
||||
)
|
||||
|
||||
def test_auto_coder_gate_blocks_when_detector_results_have_no_repair_signal(self) -> None:
|
||||
repair_targets = {
|
||||
"targets": [
|
||||
{
|
||||
"target_id": "pack:s01",
|
||||
"issue_code": "business_direct_answer_missing",
|
||||
"root_cause_layers": ["answer_surface"],
|
||||
"expected_business_answer_contract": "direct_answer_surface_v1",
|
||||
"evidence_paths": ["artifacts/domain_runs/pack/steps/s01/output.md"],
|
||||
"allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"],
|
||||
"forbidden_patch_targets": ["routing rewrites", "fake evidence", "global runtime rewrite"],
|
||||
"rerun_matrix": ["failed_scenario", "direct_answer_surface_pack", "accepted_smoke_pack"],
|
||||
}
|
||||
],
|
||||
}
|
||||
assigned_focus = {
|
||||
"focus_id": "answer_shape|composeStage",
|
||||
"issue_codes": ["business_direct_answer_missing"],
|
||||
"root_cause_layers": ["answer_surface"],
|
||||
"allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"],
|
||||
"forbidden_patch_targets": ["routing rewrites", "fake evidence", "global runtime rewrite"],
|
||||
"rerun_matrix": ["failed_scenario", "direct_answer_surface_pack", "accepted_smoke_pack"],
|
||||
"target_ids": ["pack:s01"],
|
||||
}
|
||||
|
||||
gate = dcl.evaluate_auto_coder_gate(
|
||||
repair_targets,
|
||||
assigned_focus,
|
||||
detector_results={
|
||||
"summary": {"status": "pass", "detector_count": 1, "pass": 1, "fail": 0, "review": 0, "skipped": 0},
|
||||
"results": [{"detector": "first_line_not_direct_answer", "status": "pass"}],
|
||||
},
|
||||
)
|
||||
|
||||
self.assertFalse(gate["allowed"])
|
||||
self.assertEqual(gate["detector_results_summary"]["status"], "pass")
|
||||
self.assertIn("detector_results_no_repair_signal:pass", gate["blocking_reasons"])
|
||||
|
||||
def test_auto_coder_gate_allows_when_detector_results_confirm_failure(self) -> None:
|
||||
repair_targets = {
|
||||
"targets": [
|
||||
{
|
||||
"target_id": "pack:s01",
|
||||
"issue_code": "business_direct_answer_missing",
|
||||
"root_cause_layers": ["answer_surface"],
|
||||
"expected_business_answer_contract": "direct_answer_surface_v1",
|
||||
"evidence_paths": ["artifacts/domain_runs/pack/steps/s01/output.md"],
|
||||
"allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"],
|
||||
"forbidden_patch_targets": ["routing rewrites", "fake evidence", "global runtime rewrite"],
|
||||
"rerun_matrix": ["failed_scenario", "direct_answer_surface_pack", "accepted_smoke_pack"],
|
||||
}
|
||||
],
|
||||
}
|
||||
assigned_focus = {
|
||||
"focus_id": "answer_shape|composeStage",
|
||||
"issue_codes": ["business_direct_answer_missing"],
|
||||
"root_cause_layers": ["answer_surface"],
|
||||
"allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"],
|
||||
"forbidden_patch_targets": ["routing rewrites", "fake evidence", "global runtime rewrite"],
|
||||
"rerun_matrix": ["failed_scenario", "direct_answer_surface_pack", "accepted_smoke_pack"],
|
||||
"target_ids": ["pack:s01"],
|
||||
}
|
||||
|
||||
gate = dcl.evaluate_auto_coder_gate(
|
||||
repair_targets,
|
||||
assigned_focus,
|
||||
detector_results={
|
||||
"summary": {"status": "fail", "detector_count": 1, "pass": 0, "fail": 1, "review": 0, "skipped": 0},
|
||||
"results": [{"detector": "first_line_not_direct_answer", "status": "fail"}],
|
||||
},
|
||||
)
|
||||
|
||||
self.assertTrue(gate["allowed"])
|
||||
self.assertEqual(gate["detector_results_summary"]["failed_detectors"], ["first_line_not_direct_answer"])
|
||||
|
||||
def test_auto_coder_gate_blocks_broad_or_blind_patch_scope(self) -> None:
|
||||
repair_targets = {
|
||||
"targets": [
|
||||
|
|
@ -302,6 +393,7 @@ class DomainCaseLoopLeadHandoffTests(unittest.TestCase):
|
|||
"business_audit": "artifacts/domain_runs/demo/business_audit.md",
|
||||
"analyst_verdict": "artifacts/domain_runs/demo/analyst_verdict.json",
|
||||
"repair_targets": "artifacts/domain_runs/demo/repair_targets.json",
|
||||
"detector_results": "artifacts/domain_runs/demo/detector_results.json",
|
||||
"auto_coder_gate": "artifacts/domain_runs/demo/auto_coder_gate.json",
|
||||
"pack_dir": "artifacts/domain_runs/demo/pack",
|
||||
},
|
||||
|
|
@ -311,6 +403,16 @@ class DomainCaseLoopLeadHandoffTests(unittest.TestCase):
|
|||
"top_repair_targets": [],
|
||||
"candidate_files": [],
|
||||
"lead_instructions": [],
|
||||
"detector_results_summary": {
|
||||
"status": "fail",
|
||||
"pass": 0,
|
||||
"fail": 1,
|
||||
"review": 0,
|
||||
"skipped": 0,
|
||||
"failed_detectors": ["first_line_not_direct_answer"],
|
||||
"review_detectors": [],
|
||||
"skipped_detectors": [],
|
||||
},
|
||||
"auto_coder_gate": {
|
||||
"allowed": False,
|
||||
"reason": "target_missing_evidence_paths:pack:s01",
|
||||
|
|
@ -331,6 +433,8 @@ class DomainCaseLoopLeadHandoffTests(unittest.TestCase):
|
|||
markdown = dcl.build_lead_coder_handoff_markdown(handoff)
|
||||
|
||||
self.assertIn("## Auto-Coder Gate", markdown)
|
||||
self.assertIn("## Detector Results", markdown)
|
||||
self.assertIn("first_line_not_direct_answer", markdown)
|
||||
self.assertIn("target_missing_evidence_paths:pack:s01", markdown)
|
||||
self.assertIn("## Auto-Coder Catalog Contracts", markdown)
|
||||
self.assertIn("direct_answer_surface_v1", markdown)
|
||||
|
|
|
|||
Loading…
Reference in New Issue