290 lines
12 KiB
Python
290 lines
12 KiB
Python
from __future__ import annotations
|
||
|
||
import json
|
||
import sys
|
||
import tempfile
|
||
import unittest
|
||
from pathlib import Path
|
||
|
||
|
||
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
||
|
||
import domain_case_loop as dcl
|
||
|
||
|
||
class DomainCaseLoopLeadHandoffTests(unittest.TestCase):
|
||
def test_normalize_repair_mode_defaults_to_lead_handoff(self) -> None:
|
||
self.assertEqual(dcl.normalize_repair_mode(None), "lead-handoff")
|
||
self.assertEqual(dcl.normalize_repair_mode("lead_codex"), "lead-handoff")
|
||
self.assertEqual(dcl.normalize_repair_mode("auto_coder"), "auto-coder")
|
||
|
||
def test_lead_handoff_captures_business_audit_and_primary_focus(self) -> None:
|
||
with tempfile.TemporaryDirectory() as tmp:
|
||
root = Path(tmp)
|
||
pack_dir = root / "pack"
|
||
iteration_dir = root / "loop" / "iterations" / "iteration_00"
|
||
loop_dir = root / "loop"
|
||
business_audit_path = iteration_dir / "business_audit.md"
|
||
analyst_verdict_path = iteration_dir / "analyst_verdict.json"
|
||
repair_targets_path = pack_dir / "repair_targets.json"
|
||
repair_targets = {
|
||
"target_count": 1,
|
||
"severity_counts": {"P0": 1},
|
||
"priority_foci": [
|
||
{
|
||
"focus_id": "answer_shape",
|
||
"severity": "P0",
|
||
"issue_code": "business_direct_answer_missing",
|
||
"summary": "Direct answer is buried below service scaffolding.",
|
||
"candidate_files": [
|
||
"llm_normalizer/backend/src/services/address_runtime/composeStage.ts"
|
||
],
|
||
}
|
||
],
|
||
"targets": [
|
||
{
|
||
"severity": "P0",
|
||
"issue_code": "business_direct_answer_missing",
|
||
"step_id": "q01",
|
||
}
|
||
],
|
||
}
|
||
analyst_verdict = {
|
||
"quality_score": 42,
|
||
"loop_decision": "partial",
|
||
"user_intent_summary": "User asked for a direct business answer.",
|
||
"expected_direct_answer": "Direct first-line answer.",
|
||
"actual_direct_answer": "Scaffolded long answer.",
|
||
"root_cause_layers": ["answer_shape_mismatch"],
|
||
}
|
||
|
||
handoff = dcl.build_lead_coder_handoff(
|
||
loop_state={"loop_id": "demo"},
|
||
iteration_id="iteration_00",
|
||
pack_dir=pack_dir,
|
||
analyst_verdict_path=analyst_verdict_path,
|
||
repair_targets_path=repair_targets_path,
|
||
business_audit_path=business_audit_path,
|
||
analyst_verdict=analyst_verdict,
|
||
repair_targets=repair_targets,
|
||
target_score=88,
|
||
loop_decision="partial",
|
||
analyst_accepted_gate=False,
|
||
accepted_gate=False,
|
||
deterministic_gate_ok=False,
|
||
deterministic_gate_reason="repair_targets_remaining=P0:1",
|
||
requires_user_decision=False,
|
||
user_decision_type="none",
|
||
user_decision_prompt=None,
|
||
)
|
||
paths = dcl.save_lead_coder_handoff(
|
||
loop_dir=loop_dir,
|
||
iteration_dir=iteration_dir,
|
||
handoff=handoff,
|
||
)
|
||
|
||
saved = json.loads((iteration_dir / "lead_coder_handoff.json").read_text(encoding="utf-8"))
|
||
latest_handoff_exists = Path(paths["latest_lead_coder_handoff_path"]).exists()
|
||
|
||
self.assertEqual(saved["repair_mode"], "lead-handoff")
|
||
self.assertEqual(saved["status"], "lead_coder_repair_required")
|
||
self.assertEqual(saved["assigned_primary_focus"]["focus_id"], "answer_shape")
|
||
self.assertIn("business_audit", saved["artifact_refs"])
|
||
self.assertTrue(latest_handoff_exists)
|
||
|
||
def test_analyst_priority_targets_become_lead_repair_targets(self) -> None:
|
||
repair_targets = {
|
||
"pack_id": "demo_pack",
|
||
"domain": "demo",
|
||
"target_count": 0,
|
||
"severity_counts": {"P0": 0, "P1": 0, "P2": 0},
|
||
"priority_foci": [],
|
||
"targets": [],
|
||
}
|
||
analyst_verdict = {
|
||
"priority_targets": [
|
||
{
|
||
"scenario_id": "svk_pivot",
|
||
"step_id": "s03_summary",
|
||
"severity": "P0",
|
||
"problem_type": "bundle_reuse_gap",
|
||
"fix_goal": "Reuse the confirmed SVK value-flow bundle in the final summary.",
|
||
},
|
||
{
|
||
"scenario_id": "biz_scope",
|
||
"step_id": "s02_money",
|
||
"severity": "P1",
|
||
"problem_type": "field_mapping_gap",
|
||
"fix_goal": "Separate cash source/recipient labels from client/supplier labels.",
|
||
},
|
||
]
|
||
}
|
||
|
||
merged = dcl.merge_analyst_priority_repair_targets(repair_targets, analyst_verdict)
|
||
handoff = dcl.build_lead_coder_handoff(
|
||
loop_state={"loop_id": "demo"},
|
||
iteration_id="iteration_00",
|
||
pack_dir=Path("pack"),
|
||
analyst_verdict_path=Path("analyst_verdict.json"),
|
||
repair_targets_path=Path("semantic_repair_targets.json"),
|
||
business_audit_path=Path("business_audit.md"),
|
||
analyst_verdict={"quality_score": 73, "loop_decision": "continue"},
|
||
repair_targets=merged,
|
||
target_score=88,
|
||
loop_decision="continue",
|
||
analyst_accepted_gate=False,
|
||
accepted_gate=False,
|
||
deterministic_gate_ok=True,
|
||
deterministic_gate_reason="deterministic_gate_passed",
|
||
requires_user_decision=False,
|
||
user_decision_type="none",
|
||
user_decision_prompt=None,
|
||
)
|
||
|
||
self.assertEqual(merged["target_count"], 2)
|
||
self.assertEqual(merged["severity_counts"]["P0"], 1)
|
||
self.assertEqual(handoff["assigned_primary_focus"]["problem_type"], "bundle_reuse_gap")
|
||
self.assertEqual(handoff["top_repair_targets"][0]["target_id"], "svk_pivot:s03_summary")
|
||
self.assertIn(
|
||
"llm_normalizer/backend/src/services/assistantMcpDiscoveryResponseCandidate.ts",
|
||
handoff["candidate_files"],
|
||
)
|
||
|
||
def test_stale_analyst_validation_target_is_suppressed_by_step_state(self) -> None:
|
||
repair_targets = {
|
||
"pack_id": "demo_pack",
|
||
"domain": "demo",
|
||
"target_count": 0,
|
||
"severity_counts": {"P0": 0, "P1": 0, "P2": 0},
|
||
"priority_foci": [],
|
||
"targets": [],
|
||
"step_validation_index": {
|
||
"legacy_canaries:s02_acc60": {
|
||
"acceptance_status": "validated",
|
||
"violated_invariants": [],
|
||
"warnings": [],
|
||
"runtime_factual_answer_validated": False,
|
||
"guarded_insufficiency_validated": True,
|
||
}
|
||
},
|
||
}
|
||
analyst_verdict = {
|
||
"priority_targets": [
|
||
{
|
||
"scenario_id": "legacy_canaries",
|
||
"step_id": "s02_acc60",
|
||
"severity": "P0",
|
||
"problem_type": "evidence_gap",
|
||
"fix_goal": (
|
||
"partial heuristic answer without runtime_factual_answer_validated "
|
||
"or guarded_insufficiency_validated must not pass silently"
|
||
),
|
||
},
|
||
{
|
||
"scenario_id": "biz_scope",
|
||
"step_id": "s03_best_year",
|
||
"severity": "P2",
|
||
"problem_type": "presentation_gap",
|
||
"fix_goal": "Clarify why this year leads without implying pure profit.",
|
||
},
|
||
]
|
||
}
|
||
|
||
merged = dcl.merge_analyst_priority_repair_targets(repair_targets, analyst_verdict)
|
||
|
||
self.assertEqual(merged["suppressed_analyst_priority_target_count"], 1)
|
||
self.assertEqual(merged["target_count"], 1)
|
||
self.assertEqual(merged["targets"][0]["target_id"], "biz_scope:s03_best_year")
|
||
self.assertEqual(merged["severity_counts"]["P0"], 0)
|
||
self.assertEqual(merged["severity_counts"]["P2"], 1)
|
||
|
||
def test_bounded_mcp_evidence_gap_target_is_suppressed_by_step_state(self) -> None:
|
||
repair_targets = {
|
||
"pack_id": "demo_pack",
|
||
"domain": "demo",
|
||
"target_count": 0,
|
||
"severity_counts": {"P0": 0, "P1": 0, "P2": 0},
|
||
"priority_foci": [],
|
||
"targets": [],
|
||
"step_validation_index": {
|
||
"biz_scope:s03_best_year": {
|
||
"acceptance_status": "validated",
|
||
"violated_invariants": [],
|
||
"warnings": [],
|
||
"bounded_mcp_answer_validated": True,
|
||
"mcp_discovery_response_applied": True,
|
||
"mcp_discovery_response_candidate_status": "ready_for_guarded_use",
|
||
"assistant_text_excerpt": (
|
||
"Коротко: самый доходный год в доступном денежном контуре 1С — 2015. "
|
||
"Важно: входящие уперлись в лимит выборки MCP; это проверенный срез, "
|
||
"не чистая бухгалтерская прибыль."
|
||
),
|
||
}
|
||
},
|
||
}
|
||
analyst_verdict = {
|
||
"priority_targets": [
|
||
{
|
||
"scenario_id": "biz_scope",
|
||
"step_id": "s03_best_year",
|
||
"severity": "P0",
|
||
"problem_type": "evidence_gap",
|
||
"fix_goal": (
|
||
"Убрать asserted winner-year как подтвержденный факт, пока yearly ranking "
|
||
"не имеет exact validated compute; legacy metadata says unsupported/blocked."
|
||
),
|
||
}
|
||
]
|
||
}
|
||
|
||
merged = dcl.merge_analyst_priority_repair_targets(repair_targets, analyst_verdict)
|
||
|
||
self.assertEqual(merged["suppressed_analyst_priority_target_count"], 1)
|
||
self.assertEqual(merged["target_count"], 0)
|
||
self.assertEqual(merged["severity_counts"], {"P0": 0, "P1": 0, "P2": 0})
|
||
|
||
def test_runtime_exact_followup_target_is_suppressed_when_focus_is_proven(self) -> None:
|
||
repair_targets = {
|
||
"pack_id": "demo_pack",
|
||
"domain": "demo",
|
||
"target_count": 0,
|
||
"severity_counts": {"P0": 0, "P1": 0, "P2": 0},
|
||
"priority_foci": [],
|
||
"targets": [],
|
||
"step_validation_index": {
|
||
"svk_pivot:s02_svk_docs": {
|
||
"acceptance_status": "validated",
|
||
"violated_invariants": [],
|
||
"warnings": [],
|
||
"runtime_factual_answer_validated": True,
|
||
"assistant_text_excerpt": "Контрагент: Группа СВК. Найдено документов: 19.",
|
||
"extracted_filters": {"counterparty": "Группа СВК"},
|
||
"focus_object": {"label": "Группа СВК"},
|
||
}
|
||
},
|
||
}
|
||
analyst_verdict = {
|
||
"priority_targets": [
|
||
{
|
||
"scenario_id": "svk_pivot",
|
||
"step_id": "s02_svk_docs",
|
||
"severity": "P1",
|
||
"problem_type": "followup_action_resolution_gap",
|
||
"fix_goal": (
|
||
"Добавить pack-level validation на object-centric carryover: docs follow-up "
|
||
"и bundle reuse должны быть явно проверены через stable counterparty/focus."
|
||
),
|
||
}
|
||
]
|
||
}
|
||
|
||
merged = dcl.merge_analyst_priority_repair_targets(repair_targets, analyst_verdict)
|
||
|
||
self.assertEqual(merged["suppressed_analyst_priority_target_count"], 1)
|
||
self.assertEqual(merged["target_count"], 0)
|
||
self.assertEqual(merged["severity_counts"], {"P0": 0, "P1": 0, "P2": 0})
|
||
|
||
|
||
if __name__ == "__main__":
|
||
unittest.main()
|