637 lines
30 KiB
Python
637 lines
30 KiB
Python
from __future__ import annotations
|
||
|
||
import json
|
||
import sys
|
||
import tempfile
|
||
import unittest
|
||
from pathlib import Path
|
||
|
||
|
||
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
||
|
||
import domain_case_loop as dcl
|
||
|
||
|
||
class DomainCaseLoopLeadHandoffTests(unittest.TestCase):
|
||
def test_normalize_repair_mode_defaults_to_lead_handoff(self) -> None:
|
||
self.assertEqual(dcl.normalize_repair_mode(None), "lead-handoff")
|
||
self.assertEqual(dcl.normalize_repair_mode("lead_codex"), "lead-handoff")
|
||
self.assertEqual(dcl.normalize_repair_mode("auto_coder"), "auto-coder")
|
||
|
||
def test_lead_handoff_captures_business_audit_and_primary_focus(self) -> None:
|
||
with tempfile.TemporaryDirectory() as tmp:
|
||
root = Path(tmp)
|
||
pack_dir = root / "pack"
|
||
iteration_dir = root / "loop" / "iterations" / "iteration_00"
|
||
loop_dir = root / "loop"
|
||
business_audit_path = iteration_dir / "business_audit.md"
|
||
analyst_verdict_path = iteration_dir / "analyst_verdict.json"
|
||
repair_targets_path = pack_dir / "repair_targets.json"
|
||
repair_targets = {
|
||
"target_count": 1,
|
||
"severity_counts": {"P0": 1},
|
||
"priority_foci": [
|
||
{
|
||
"focus_id": "answer_shape",
|
||
"severity": "P0",
|
||
"issue_code": "business_direct_answer_missing",
|
||
"summary": "Direct answer is buried below service scaffolding.",
|
||
"candidate_files": [
|
||
"llm_normalizer/backend/src/services/address_runtime/composeStage.ts"
|
||
],
|
||
}
|
||
],
|
||
"targets": [
|
||
{
|
||
"severity": "P0",
|
||
"issue_code": "business_direct_answer_missing",
|
||
"step_id": "q01",
|
||
}
|
||
],
|
||
}
|
||
analyst_verdict = {
|
||
"quality_score": 42,
|
||
"loop_decision": "partial",
|
||
"user_intent_summary": "User asked for a direct business answer.",
|
||
"expected_direct_answer": "Direct first-line answer.",
|
||
"actual_direct_answer": "Scaffolded long answer.",
|
||
"root_cause_layers": ["answer_shape_mismatch"],
|
||
}
|
||
|
||
handoff = dcl.build_lead_coder_handoff(
|
||
loop_state={"loop_id": "demo"},
|
||
iteration_id="iteration_00",
|
||
pack_dir=pack_dir,
|
||
analyst_verdict_path=analyst_verdict_path,
|
||
repair_targets_path=repair_targets_path,
|
||
business_audit_path=business_audit_path,
|
||
business_audit_json_path=iteration_dir / "business_audit.json",
|
||
issue_catalog_snapshot_path=iteration_dir / "issue_catalog_snapshot.json",
|
||
rerun_matrix_path=iteration_dir / "rerun_matrix.json",
|
||
detector_candidates_path=iteration_dir / "detector_candidates.json",
|
||
detector_results_path=iteration_dir / "detector_results.json",
|
||
detector_results={
|
||
"summary": {"status": "fail", "detector_count": 1, "pass": 0, "fail": 1, "review": 0, "skipped": 0},
|
||
"results": [{"detector": "first_line_not_direct_answer", "status": "fail"}],
|
||
},
|
||
analyst_verdict=analyst_verdict,
|
||
repair_targets=repair_targets,
|
||
target_score=88,
|
||
loop_decision="partial",
|
||
analyst_accepted_gate=False,
|
||
accepted_gate=False,
|
||
deterministic_gate_ok=False,
|
||
deterministic_gate_reason="repair_targets_remaining=P0:1",
|
||
requires_user_decision=False,
|
||
user_decision_type="none",
|
||
user_decision_prompt=None,
|
||
)
|
||
paths = dcl.save_lead_coder_handoff(
|
||
loop_dir=loop_dir,
|
||
iteration_dir=iteration_dir,
|
||
handoff=handoff,
|
||
)
|
||
|
||
saved = json.loads((iteration_dir / "lead_coder_handoff.json").read_text(encoding="utf-8"))
|
||
latest_handoff_exists = Path(paths["latest_lead_coder_handoff_path"]).exists()
|
||
|
||
self.assertEqual(saved["repair_mode"], "lead-handoff")
|
||
self.assertEqual(saved["status"], "lead_coder_repair_required")
|
||
self.assertEqual(saved["assigned_primary_focus"]["focus_id"], "answer_shape")
|
||
self.assertIn("business_audit", saved["artifact_refs"])
|
||
self.assertIn("business_audit_json", saved["artifact_refs"])
|
||
self.assertIn("issue_catalog_snapshot", saved["artifact_refs"])
|
||
self.assertIn("detector_results", saved["artifact_refs"])
|
||
self.assertEqual(saved["detector_results_summary"]["status"], "fail")
|
||
self.assertEqual(saved["detector_results_summary"]["failed_detectors"], ["first_line_not_direct_answer"])
|
||
self.assertIn("business_direct_answer_missing", saved["issue_codes"])
|
||
self.assertIn("failed_scenario", saved["rerun_matrix"])
|
||
self.assertTrue(latest_handoff_exists)
|
||
|
||
def test_business_audit_contract_exposes_repair_issue_contract(self) -> None:
|
||
repair_targets = {
|
||
"target_count": 1,
|
||
"severity_counts": {"P0": 1},
|
||
"priority_foci": [],
|
||
"targets": [
|
||
{
|
||
"target_id": "margin_pack:s01",
|
||
"scenario_id": "margin_pack",
|
||
"step_id": "s01",
|
||
"severity": "P0",
|
||
"issue_code": "margin_domain_leak_accounting_route",
|
||
"question_resolved": "Which item had the best margin?",
|
||
"fix_goal": "Route the question to margin profitability instead of accounting noise.",
|
||
"evidence_paths": ["artifacts/domain_runs/margin_pack/steps/s01/output.md"],
|
||
}
|
||
],
|
||
}
|
||
contract = dcl.build_business_audit_contract(
|
||
analyst_verdict={
|
||
"quality_score": 31,
|
||
"loop_decision": "partial",
|
||
"user_intent_summary": "User needs item margin ranking.",
|
||
"expected_direct_answer": "Best item by gross margin.",
|
||
"actual_direct_answer": "Accounting route answer.",
|
||
},
|
||
repair_targets=repair_targets,
|
||
target_score=88,
|
||
loop_decision="partial",
|
||
analyst_accepted_gate=False,
|
||
accepted_gate=False,
|
||
deterministic_gate_ok=False,
|
||
deterministic_gate_reason="P0 repair target remains",
|
||
business_audit_markdown_path=Path("business_audit.md"),
|
||
analyst_verdict_path=Path("analyst_verdict.json"),
|
||
repair_targets_path=Path("repair_targets.json"),
|
||
business_audit_json_path=Path("business_audit.json"),
|
||
issue_catalog_snapshot_path=Path("issue_catalog_snapshot.json"),
|
||
rerun_matrix_path=Path("rerun_matrix.json"),
|
||
detector_candidates_path=Path("detector_candidates.json"),
|
||
detector_results_path=Path("detector_results.json"),
|
||
detector_results={
|
||
"summary": {"status": "review", "detector_count": 1, "pass": 0, "fail": 0, "review": 1, "skipped": 0},
|
||
"results": [{"detector": "missing_revenue_cogs_margin_fields", "status": "review"}],
|
||
},
|
||
)
|
||
|
||
self.assertEqual(contract["overall_status"], "partial")
|
||
self.assertEqual(contract["blocking_issues"][0]["issue_code"], "margin_domain_leak_accounting_route")
|
||
self.assertEqual(contract["blocking_issues"][0]["expected_business_answer_contract"], "margin_profitability_v1")
|
||
self.assertIn("failed_margin_scenario", contract["rerun_matrix"])
|
||
self.assertIn("detector_candidates_json", contract["artifact_refs"])
|
||
self.assertIn("detector_results_json", contract["artifact_refs"])
|
||
self.assertEqual(contract["detector_results_summary"]["status"], "review")
|
||
self.assertEqual(contract["detector_results_summary"]["review_detectors"], ["missing_revenue_cogs_margin_fields"])
|
||
|
||
def test_auto_coder_gate_blocks_non_allowlisted_issue_codes(self) -> None:
|
||
repair_targets = {
|
||
"targets": [
|
||
{
|
||
"target_id": "margin_pack:s01",
|
||
"issue_code": "margin_domain_leak_accounting_route",
|
||
"allowed_patch_targets": ["llm_normalizer/backend/src/services/addressIntentResolver.ts"],
|
||
"forbidden_patch_targets": ["global orchestration rewrite"],
|
||
"rerun_matrix": ["failed_margin_scenario", "accepted_smoke_pack"],
|
||
}
|
||
],
|
||
}
|
||
assigned_focus = {
|
||
"focus_id": "route|addressIntentResolver",
|
||
"issue_codes": ["margin_domain_leak_accounting_route"],
|
||
"root_cause_layers": ["intent", "route"],
|
||
"allowed_patch_targets": ["llm_normalizer/backend/src/services/addressIntentResolver.ts"],
|
||
"forbidden_patch_targets": ["global orchestration rewrite"],
|
||
"rerun_matrix": ["failed_margin_scenario", "accepted_smoke_pack"],
|
||
"target_ids": ["margin_pack:s01"],
|
||
}
|
||
|
||
gate = dcl.evaluate_auto_coder_gate(repair_targets, assigned_focus)
|
||
|
||
self.assertFalse(gate["allowed"])
|
||
self.assertIn("issue_code_not_allowlisted:margin_domain_leak_accounting_route", gate["blocking_reasons"])
|
||
|
||
def test_auto_coder_gate_allows_complete_answer_surface_contract(self) -> None:
|
||
repair_targets = {
|
||
"targets": [
|
||
{
|
||
"target_id": "pack:s01",
|
||
"issue_code": "business_direct_answer_missing",
|
||
"root_cause_layers": ["answer_surface"],
|
||
"expected_business_answer_contract": "direct_answer_surface_v1",
|
||
"evidence_paths": ["artifacts/domain_runs/pack/steps/s01/output.md"],
|
||
"allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"],
|
||
"forbidden_patch_targets": ["routing rewrites", "fake evidence", "global runtime rewrite"],
|
||
"rerun_matrix": ["failed_scenario", "direct_answer_surface_pack", "accepted_smoke_pack"],
|
||
}
|
||
],
|
||
}
|
||
assigned_focus = {
|
||
"focus_id": "answer_shape|composeStage",
|
||
"issue_codes": ["business_direct_answer_missing"],
|
||
"root_cause_layers": ["answer_surface"],
|
||
"allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"],
|
||
"forbidden_patch_targets": ["routing rewrites", "fake evidence", "global runtime rewrite"],
|
||
"rerun_matrix": ["failed_scenario", "direct_answer_surface_pack", "accepted_smoke_pack"],
|
||
"target_ids": ["pack:s01"],
|
||
}
|
||
|
||
gate = dcl.evaluate_auto_coder_gate(repair_targets, assigned_focus)
|
||
|
||
self.assertTrue(gate["allowed"])
|
||
self.assertEqual(gate["reason"], "auto_coder_gate_passed")
|
||
self.assertEqual(
|
||
gate["issue_catalog_contracts"]["business_direct_answer_missing"]["expected_answer_contract"],
|
||
"direct_answer_surface_v1",
|
||
)
|
||
|
||
def test_auto_coder_gate_blocks_when_detector_results_have_no_repair_signal(self) -> None:
|
||
repair_targets = {
|
||
"targets": [
|
||
{
|
||
"target_id": "pack:s01",
|
||
"issue_code": "business_direct_answer_missing",
|
||
"root_cause_layers": ["answer_surface"],
|
||
"expected_business_answer_contract": "direct_answer_surface_v1",
|
||
"evidence_paths": ["artifacts/domain_runs/pack/steps/s01/output.md"],
|
||
"allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"],
|
||
"forbidden_patch_targets": ["routing rewrites", "fake evidence", "global runtime rewrite"],
|
||
"rerun_matrix": ["failed_scenario", "direct_answer_surface_pack", "accepted_smoke_pack"],
|
||
}
|
||
],
|
||
}
|
||
assigned_focus = {
|
||
"focus_id": "answer_shape|composeStage",
|
||
"issue_codes": ["business_direct_answer_missing"],
|
||
"root_cause_layers": ["answer_surface"],
|
||
"allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"],
|
||
"forbidden_patch_targets": ["routing rewrites", "fake evidence", "global runtime rewrite"],
|
||
"rerun_matrix": ["failed_scenario", "direct_answer_surface_pack", "accepted_smoke_pack"],
|
||
"target_ids": ["pack:s01"],
|
||
}
|
||
|
||
gate = dcl.evaluate_auto_coder_gate(
|
||
repair_targets,
|
||
assigned_focus,
|
||
detector_results={
|
||
"summary": {"status": "pass", "detector_count": 1, "pass": 1, "fail": 0, "review": 0, "skipped": 0},
|
||
"results": [{"detector": "first_line_not_direct_answer", "status": "pass"}],
|
||
},
|
||
)
|
||
|
||
self.assertFalse(gate["allowed"])
|
||
self.assertEqual(gate["detector_results_summary"]["status"], "pass")
|
||
self.assertIn("detector_results_no_repair_signal:pass", gate["blocking_reasons"])
|
||
|
||
def test_auto_coder_gate_allows_when_detector_results_confirm_failure(self) -> None:
|
||
repair_targets = {
|
||
"targets": [
|
||
{
|
||
"target_id": "pack:s01",
|
||
"issue_code": "business_direct_answer_missing",
|
||
"root_cause_layers": ["answer_surface"],
|
||
"expected_business_answer_contract": "direct_answer_surface_v1",
|
||
"evidence_paths": ["artifacts/domain_runs/pack/steps/s01/output.md"],
|
||
"allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"],
|
||
"forbidden_patch_targets": ["routing rewrites", "fake evidence", "global runtime rewrite"],
|
||
"rerun_matrix": ["failed_scenario", "direct_answer_surface_pack", "accepted_smoke_pack"],
|
||
}
|
||
],
|
||
}
|
||
assigned_focus = {
|
||
"focus_id": "answer_shape|composeStage",
|
||
"issue_codes": ["business_direct_answer_missing"],
|
||
"root_cause_layers": ["answer_surface"],
|
||
"allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"],
|
||
"forbidden_patch_targets": ["routing rewrites", "fake evidence", "global runtime rewrite"],
|
||
"rerun_matrix": ["failed_scenario", "direct_answer_surface_pack", "accepted_smoke_pack"],
|
||
"target_ids": ["pack:s01"],
|
||
}
|
||
|
||
gate = dcl.evaluate_auto_coder_gate(
|
||
repair_targets,
|
||
assigned_focus,
|
||
detector_results={
|
||
"summary": {"status": "fail", "detector_count": 1, "pass": 0, "fail": 1, "review": 0, "skipped": 0},
|
||
"results": [{"detector": "first_line_not_direct_answer", "status": "fail"}],
|
||
},
|
||
)
|
||
|
||
self.assertTrue(gate["allowed"])
|
||
self.assertEqual(gate["detector_results_summary"]["failed_detectors"], ["first_line_not_direct_answer"])
|
||
|
||
def test_auto_coder_gate_blocks_broad_or_blind_patch_scope(self) -> None:
|
||
repair_targets = {
|
||
"targets": [
|
||
{
|
||
"target_id": "pack:s01",
|
||
"issue_code": "business_direct_answer_missing",
|
||
"root_cause_layers": ["answer_surface"],
|
||
"expected_business_answer_contract": "direct_answer_surface_v1",
|
||
"allowed_patch_targets": ["llm_normalizer/backend/src/services/"],
|
||
"forbidden_patch_targets": ["routing rewrites", "fake evidence", "global runtime rewrite"],
|
||
"rerun_matrix": ["failed_scenario", "direct_answer_surface_pack", "accepted_smoke_pack"],
|
||
}
|
||
],
|
||
}
|
||
assigned_focus = {
|
||
"focus_id": "answer_shape|services",
|
||
"issue_codes": ["business_direct_answer_missing"],
|
||
"root_cause_layers": ["answer_surface"],
|
||
"allowed_patch_targets": ["llm_normalizer/backend/src/services/"],
|
||
"forbidden_patch_targets": ["routing rewrites", "fake evidence", "global runtime rewrite"],
|
||
"rerun_matrix": ["failed_scenario", "direct_answer_surface_pack", "accepted_smoke_pack"],
|
||
"target_ids": ["pack:s01"],
|
||
}
|
||
|
||
gate = dcl.evaluate_auto_coder_gate(repair_targets, assigned_focus)
|
||
|
||
self.assertFalse(gate["allowed"])
|
||
self.assertIn("broad_allowed_patch_target:llm_normalizer/backend/src/services/", gate["blocking_reasons"])
|
||
self.assertIn("target_missing_evidence_paths:pack:s01", gate["blocking_reasons"])
|
||
|
||
def test_auto_coder_gate_blocks_catalog_issue_without_answer_contract(self) -> None:
|
||
original_load_issue_catalog = dcl.load_issue_catalog
|
||
dcl.load_issue_catalog = lambda: {
|
||
"schema_version": "agent_issue_catalog_v1",
|
||
"issues": {
|
||
"business_direct_answer_missing": {
|
||
"severity": "P0",
|
||
"root_layers": ["answer_surface"],
|
||
"allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"],
|
||
"forbidden_patch_targets": ["routing rewrites"],
|
||
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
|
||
}
|
||
},
|
||
}
|
||
try:
|
||
repair_targets = {
|
||
"targets": [
|
||
{
|
||
"target_id": "pack:s01",
|
||
"issue_code": "business_direct_answer_missing",
|
||
"root_cause_layers": ["answer_surface"],
|
||
"expected_business_answer_contract": "direct_answer_surface_v1",
|
||
"evidence_paths": ["artifacts/domain_runs/pack/steps/s01/output.md"],
|
||
"allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"],
|
||
"forbidden_patch_targets": ["routing rewrites"],
|
||
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
|
||
}
|
||
],
|
||
}
|
||
assigned_focus = {
|
||
"focus_id": "answer_shape|composeStage",
|
||
"issue_codes": ["business_direct_answer_missing"],
|
||
"root_cause_layers": ["answer_surface"],
|
||
"allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"],
|
||
"forbidden_patch_targets": ["routing rewrites"],
|
||
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
|
||
"target_ids": ["pack:s01"],
|
||
}
|
||
|
||
gate = dcl.evaluate_auto_coder_gate(repair_targets, assigned_focus)
|
||
finally:
|
||
dcl.load_issue_catalog = original_load_issue_catalog
|
||
|
||
self.assertFalse(gate["allowed"])
|
||
self.assertIn(
|
||
"catalog_missing_expected_answer_contract:business_direct_answer_missing",
|
||
gate["blocking_reasons"],
|
||
)
|
||
|
||
def test_lead_handoff_markdown_surfaces_auto_coder_gate_blockers(self) -> None:
|
||
handoff = {
|
||
"repair_mode": "lead-handoff",
|
||
"loop_id": "demo_loop",
|
||
"iteration_id": "iteration_00",
|
||
"quality_score": 42,
|
||
"target_score": 88,
|
||
"loop_decision": "partial",
|
||
"deterministic_gate_ok": False,
|
||
"deterministic_gate_reason": "repair_targets_remaining=P0:1",
|
||
"artifact_refs": {
|
||
"business_audit": "artifacts/domain_runs/demo/business_audit.md",
|
||
"analyst_verdict": "artifacts/domain_runs/demo/analyst_verdict.json",
|
||
"repair_targets": "artifacts/domain_runs/demo/repair_targets.json",
|
||
"detector_results": "artifacts/domain_runs/demo/detector_results.json",
|
||
"auto_coder_gate": "artifacts/domain_runs/demo/auto_coder_gate.json",
|
||
"pack_dir": "artifacts/domain_runs/demo/pack",
|
||
},
|
||
"issue_codes": ["business_direct_answer_missing"],
|
||
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
|
||
"human_meaning": {"user_intent_summary": "User needs a direct answer."},
|
||
"top_repair_targets": [],
|
||
"candidate_files": [],
|
||
"lead_instructions": [],
|
||
"detector_results_summary": {
|
||
"status": "fail",
|
||
"pass": 0,
|
||
"fail": 1,
|
||
"review": 0,
|
||
"skipped": 0,
|
||
"failed_detectors": ["first_line_not_direct_answer"],
|
||
"review_detectors": [],
|
||
"skipped_detectors": [],
|
||
},
|
||
"auto_coder_gate": {
|
||
"allowed": False,
|
||
"reason": "target_missing_evidence_paths:pack:s01",
|
||
"focus_id": "answer_shape|composeStage",
|
||
"blocking_reasons": ["target_missing_evidence_paths:pack:s01"],
|
||
"issue_catalog_contracts": {
|
||
"business_direct_answer_missing": {
|
||
"expected_answer_contract": "direct_answer_surface_v1",
|
||
"root_layers": ["answer_surface"],
|
||
"allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"],
|
||
"forbidden_patch_targets": ["routing rewrites"],
|
||
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
|
||
}
|
||
},
|
||
},
|
||
}
|
||
|
||
markdown = dcl.build_lead_coder_handoff_markdown(handoff)
|
||
|
||
self.assertIn("## Auto-Coder Gate", markdown)
|
||
self.assertIn("## Detector Results", markdown)
|
||
self.assertIn("first_line_not_direct_answer", markdown)
|
||
self.assertIn("target_missing_evidence_paths:pack:s01", markdown)
|
||
self.assertIn("## Auto-Coder Catalog Contracts", markdown)
|
||
self.assertIn("direct_answer_surface_v1", markdown)
|
||
|
||
def test_analyst_priority_targets_become_lead_repair_targets(self) -> None:
|
||
repair_targets = {
|
||
"pack_id": "demo_pack",
|
||
"domain": "demo",
|
||
"target_count": 0,
|
||
"severity_counts": {"P0": 0, "P1": 0, "P2": 0},
|
||
"priority_foci": [],
|
||
"targets": [],
|
||
}
|
||
analyst_verdict = {
|
||
"priority_targets": [
|
||
{
|
||
"scenario_id": "svk_pivot",
|
||
"step_id": "s03_summary",
|
||
"severity": "P0",
|
||
"problem_type": "bundle_reuse_gap",
|
||
"fix_goal": "Reuse the confirmed SVK value-flow bundle in the final summary.",
|
||
},
|
||
{
|
||
"scenario_id": "biz_scope",
|
||
"step_id": "s02_money",
|
||
"severity": "P1",
|
||
"problem_type": "field_mapping_gap",
|
||
"fix_goal": "Separate cash source/recipient labels from client/supplier labels.",
|
||
},
|
||
]
|
||
}
|
||
|
||
merged = dcl.merge_analyst_priority_repair_targets(repair_targets, analyst_verdict)
|
||
handoff = dcl.build_lead_coder_handoff(
|
||
loop_state={"loop_id": "demo"},
|
||
iteration_id="iteration_00",
|
||
pack_dir=Path("pack"),
|
||
analyst_verdict_path=Path("analyst_verdict.json"),
|
||
repair_targets_path=Path("semantic_repair_targets.json"),
|
||
business_audit_path=Path("business_audit.md"),
|
||
analyst_verdict={"quality_score": 73, "loop_decision": "continue"},
|
||
repair_targets=merged,
|
||
target_score=88,
|
||
loop_decision="continue",
|
||
analyst_accepted_gate=False,
|
||
accepted_gate=False,
|
||
deterministic_gate_ok=True,
|
||
deterministic_gate_reason="deterministic_gate_passed",
|
||
requires_user_decision=False,
|
||
user_decision_type="none",
|
||
user_decision_prompt=None,
|
||
)
|
||
|
||
self.assertEqual(merged["target_count"], 2)
|
||
self.assertEqual(merged["severity_counts"]["P0"], 1)
|
||
self.assertEqual(handoff["assigned_primary_focus"]["problem_type"], "bundle_reuse_gap")
|
||
self.assertEqual(handoff["top_repair_targets"][0]["target_id"], "svk_pivot:s03_summary")
|
||
self.assertIn(
|
||
"llm_normalizer/backend/src/services/assistantMcpDiscoveryResponseCandidate.ts",
|
||
handoff["candidate_files"],
|
||
)
|
||
|
||
def test_stale_analyst_validation_target_is_suppressed_by_step_state(self) -> None:
|
||
repair_targets = {
|
||
"pack_id": "demo_pack",
|
||
"domain": "demo",
|
||
"target_count": 0,
|
||
"severity_counts": {"P0": 0, "P1": 0, "P2": 0},
|
||
"priority_foci": [],
|
||
"targets": [],
|
||
"step_validation_index": {
|
||
"legacy_canaries:s02_acc60": {
|
||
"acceptance_status": "validated",
|
||
"violated_invariants": [],
|
||
"warnings": [],
|
||
"runtime_factual_answer_validated": False,
|
||
"guarded_insufficiency_validated": True,
|
||
}
|
||
},
|
||
}
|
||
analyst_verdict = {
|
||
"priority_targets": [
|
||
{
|
||
"scenario_id": "legacy_canaries",
|
||
"step_id": "s02_acc60",
|
||
"severity": "P0",
|
||
"problem_type": "evidence_gap",
|
||
"fix_goal": (
|
||
"partial heuristic answer without runtime_factual_answer_validated "
|
||
"or guarded_insufficiency_validated must not pass silently"
|
||
),
|
||
},
|
||
{
|
||
"scenario_id": "biz_scope",
|
||
"step_id": "s03_best_year",
|
||
"severity": "P2",
|
||
"problem_type": "presentation_gap",
|
||
"fix_goal": "Clarify why this year leads without implying pure profit.",
|
||
},
|
||
]
|
||
}
|
||
|
||
merged = dcl.merge_analyst_priority_repair_targets(repair_targets, analyst_verdict)
|
||
|
||
self.assertEqual(merged["suppressed_analyst_priority_target_count"], 1)
|
||
self.assertEqual(merged["target_count"], 1)
|
||
self.assertEqual(merged["targets"][0]["target_id"], "biz_scope:s03_best_year")
|
||
self.assertEqual(merged["severity_counts"]["P0"], 0)
|
||
self.assertEqual(merged["severity_counts"]["P2"], 1)
|
||
|
||
def test_bounded_mcp_evidence_gap_target_is_suppressed_by_step_state(self) -> None:
|
||
repair_targets = {
|
||
"pack_id": "demo_pack",
|
||
"domain": "demo",
|
||
"target_count": 0,
|
||
"severity_counts": {"P0": 0, "P1": 0, "P2": 0},
|
||
"priority_foci": [],
|
||
"targets": [],
|
||
"step_validation_index": {
|
||
"biz_scope:s03_best_year": {
|
||
"acceptance_status": "validated",
|
||
"violated_invariants": [],
|
||
"warnings": [],
|
||
"bounded_mcp_answer_validated": True,
|
||
"mcp_discovery_response_applied": True,
|
||
"mcp_discovery_response_candidate_status": "ready_for_guarded_use",
|
||
"assistant_text_excerpt": (
|
||
"Коротко: самый доходный год в доступном денежном контуре 1С — 2015. "
|
||
"Важно: входящие уперлись в лимит выборки MCP; это проверенный срез, "
|
||
"не чистая бухгалтерская прибыль."
|
||
),
|
||
}
|
||
},
|
||
}
|
||
analyst_verdict = {
|
||
"priority_targets": [
|
||
{
|
||
"scenario_id": "biz_scope",
|
||
"step_id": "s03_best_year",
|
||
"severity": "P0",
|
||
"problem_type": "evidence_gap",
|
||
"fix_goal": (
|
||
"Убрать asserted winner-year как подтвержденный факт, пока yearly ranking "
|
||
"не имеет exact validated compute; legacy metadata says unsupported/blocked."
|
||
),
|
||
}
|
||
]
|
||
}
|
||
|
||
merged = dcl.merge_analyst_priority_repair_targets(repair_targets, analyst_verdict)
|
||
|
||
self.assertEqual(merged["suppressed_analyst_priority_target_count"], 1)
|
||
self.assertEqual(merged["target_count"], 0)
|
||
self.assertEqual(merged["severity_counts"], {"P0": 0, "P1": 0, "P2": 0})
|
||
|
||
def test_runtime_exact_followup_target_is_suppressed_when_focus_is_proven(self) -> None:
|
||
repair_targets = {
|
||
"pack_id": "demo_pack",
|
||
"domain": "demo",
|
||
"target_count": 0,
|
||
"severity_counts": {"P0": 0, "P1": 0, "P2": 0},
|
||
"priority_foci": [],
|
||
"targets": [],
|
||
"step_validation_index": {
|
||
"svk_pivot:s02_svk_docs": {
|
||
"acceptance_status": "validated",
|
||
"violated_invariants": [],
|
||
"warnings": [],
|
||
"runtime_factual_answer_validated": True,
|
||
"assistant_text_excerpt": "Контрагент: Группа СВК. Найдено документов: 19.",
|
||
"extracted_filters": {"counterparty": "Группа СВК"},
|
||
"focus_object": {"label": "Группа СВК"},
|
||
}
|
||
},
|
||
}
|
||
analyst_verdict = {
|
||
"priority_targets": [
|
||
{
|
||
"scenario_id": "svk_pivot",
|
||
"step_id": "s02_svk_docs",
|
||
"severity": "P1",
|
||
"problem_type": "followup_action_resolution_gap",
|
||
"fix_goal": (
|
||
"Добавить pack-level validation на object-centric carryover: docs follow-up "
|
||
"и bundle reuse должны быть явно проверены через stable counterparty/focus."
|
||
),
|
||
}
|
||
]
|
||
}
|
||
|
||
merged = dcl.merge_analyst_priority_repair_targets(repair_targets, analyst_verdict)
|
||
|
||
self.assertEqual(merged["suppressed_analyst_priority_target_count"], 1)
|
||
self.assertEqual(merged["target_count"], 0)
|
||
self.assertEqual(merged["severity_counts"], {"P0": 0, "P1": 0, "P2": 0})
|
||
|
||
|
||
if __name__ == "__main__":
|
||
unittest.main()
|