NODEDC_1C/scripts/test_domain_case_loop_lead_...

486 lines
22 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
import json
import sys
import tempfile
import unittest
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent))
import domain_case_loop as dcl
class DomainCaseLoopLeadHandoffTests(unittest.TestCase):
def test_normalize_repair_mode_defaults_to_lead_handoff(self) -> None:
self.assertEqual(dcl.normalize_repair_mode(None), "lead-handoff")
self.assertEqual(dcl.normalize_repair_mode("lead_codex"), "lead-handoff")
self.assertEqual(dcl.normalize_repair_mode("auto_coder"), "auto-coder")
def test_lead_handoff_captures_business_audit_and_primary_focus(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
pack_dir = root / "pack"
iteration_dir = root / "loop" / "iterations" / "iteration_00"
loop_dir = root / "loop"
business_audit_path = iteration_dir / "business_audit.md"
analyst_verdict_path = iteration_dir / "analyst_verdict.json"
repair_targets_path = pack_dir / "repair_targets.json"
repair_targets = {
"target_count": 1,
"severity_counts": {"P0": 1},
"priority_foci": [
{
"focus_id": "answer_shape",
"severity": "P0",
"issue_code": "business_direct_answer_missing",
"summary": "Direct answer is buried below service scaffolding.",
"candidate_files": [
"llm_normalizer/backend/src/services/address_runtime/composeStage.ts"
],
}
],
"targets": [
{
"severity": "P0",
"issue_code": "business_direct_answer_missing",
"step_id": "q01",
}
],
}
analyst_verdict = {
"quality_score": 42,
"loop_decision": "partial",
"user_intent_summary": "User asked for a direct business answer.",
"expected_direct_answer": "Direct first-line answer.",
"actual_direct_answer": "Scaffolded long answer.",
"root_cause_layers": ["answer_shape_mismatch"],
}
handoff = dcl.build_lead_coder_handoff(
loop_state={"loop_id": "demo"},
iteration_id="iteration_00",
pack_dir=pack_dir,
analyst_verdict_path=analyst_verdict_path,
repair_targets_path=repair_targets_path,
business_audit_path=business_audit_path,
business_audit_json_path=iteration_dir / "business_audit.json",
issue_catalog_snapshot_path=iteration_dir / "issue_catalog_snapshot.json",
rerun_matrix_path=iteration_dir / "rerun_matrix.json",
detector_candidates_path=iteration_dir / "detector_candidates.json",
analyst_verdict=analyst_verdict,
repair_targets=repair_targets,
target_score=88,
loop_decision="partial",
analyst_accepted_gate=False,
accepted_gate=False,
deterministic_gate_ok=False,
deterministic_gate_reason="repair_targets_remaining=P0:1",
requires_user_decision=False,
user_decision_type="none",
user_decision_prompt=None,
)
paths = dcl.save_lead_coder_handoff(
loop_dir=loop_dir,
iteration_dir=iteration_dir,
handoff=handoff,
)
saved = json.loads((iteration_dir / "lead_coder_handoff.json").read_text(encoding="utf-8"))
latest_handoff_exists = Path(paths["latest_lead_coder_handoff_path"]).exists()
self.assertEqual(saved["repair_mode"], "lead-handoff")
self.assertEqual(saved["status"], "lead_coder_repair_required")
self.assertEqual(saved["assigned_primary_focus"]["focus_id"], "answer_shape")
self.assertIn("business_audit", saved["artifact_refs"])
self.assertIn("business_audit_json", saved["artifact_refs"])
self.assertIn("issue_catalog_snapshot", saved["artifact_refs"])
self.assertIn("business_direct_answer_missing", saved["issue_codes"])
self.assertIn("failed_scenario", saved["rerun_matrix"])
self.assertTrue(latest_handoff_exists)
def test_business_audit_contract_exposes_repair_issue_contract(self) -> None:
repair_targets = {
"target_count": 1,
"severity_counts": {"P0": 1},
"priority_foci": [],
"targets": [
{
"target_id": "margin_pack:s01",
"scenario_id": "margin_pack",
"step_id": "s01",
"severity": "P0",
"issue_code": "margin_domain_leak_accounting_route",
"question_resolved": "Which item had the best margin?",
"fix_goal": "Route the question to margin profitability instead of accounting noise.",
"evidence_paths": ["artifacts/domain_runs/margin_pack/steps/s01/output.md"],
}
],
}
contract = dcl.build_business_audit_contract(
analyst_verdict={
"quality_score": 31,
"loop_decision": "partial",
"user_intent_summary": "User needs item margin ranking.",
"expected_direct_answer": "Best item by gross margin.",
"actual_direct_answer": "Accounting route answer.",
},
repair_targets=repair_targets,
target_score=88,
loop_decision="partial",
analyst_accepted_gate=False,
accepted_gate=False,
deterministic_gate_ok=False,
deterministic_gate_reason="P0 repair target remains",
business_audit_markdown_path=Path("business_audit.md"),
analyst_verdict_path=Path("analyst_verdict.json"),
repair_targets_path=Path("repair_targets.json"),
business_audit_json_path=Path("business_audit.json"),
issue_catalog_snapshot_path=Path("issue_catalog_snapshot.json"),
rerun_matrix_path=Path("rerun_matrix.json"),
detector_candidates_path=Path("detector_candidates.json"),
)
self.assertEqual(contract["overall_status"], "partial")
self.assertEqual(contract["blocking_issues"][0]["issue_code"], "margin_domain_leak_accounting_route")
self.assertEqual(contract["blocking_issues"][0]["expected_business_answer_contract"], "margin_profitability_v1")
self.assertIn("failed_margin_scenario", contract["rerun_matrix"])
self.assertIn("detector_candidates_json", contract["artifact_refs"])
def test_auto_coder_gate_blocks_non_allowlisted_issue_codes(self) -> None:
repair_targets = {
"targets": [
{
"target_id": "margin_pack:s01",
"issue_code": "margin_domain_leak_accounting_route",
"allowed_patch_targets": ["llm_normalizer/backend/src/services/addressIntentResolver.ts"],
"forbidden_patch_targets": ["global orchestration rewrite"],
"rerun_matrix": ["failed_margin_scenario", "accepted_smoke_pack"],
}
],
}
assigned_focus = {
"focus_id": "route|addressIntentResolver",
"issue_codes": ["margin_domain_leak_accounting_route"],
"root_cause_layers": ["intent", "route"],
"allowed_patch_targets": ["llm_normalizer/backend/src/services/addressIntentResolver.ts"],
"forbidden_patch_targets": ["global orchestration rewrite"],
"rerun_matrix": ["failed_margin_scenario", "accepted_smoke_pack"],
"target_ids": ["margin_pack:s01"],
}
gate = dcl.evaluate_auto_coder_gate(repair_targets, assigned_focus)
self.assertFalse(gate["allowed"])
self.assertIn("issue_code_not_allowlisted:margin_domain_leak_accounting_route", gate["blocking_reasons"])
def test_auto_coder_gate_allows_complete_answer_surface_contract(self) -> None:
repair_targets = {
"targets": [
{
"target_id": "pack:s01",
"issue_code": "business_direct_answer_missing",
"root_cause_layers": ["answer_surface"],
"expected_business_answer_contract": "direct_answer_surface_v1",
"evidence_paths": ["artifacts/domain_runs/pack/steps/s01/output.md"],
"allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"],
"forbidden_patch_targets": ["routing rewrites", "fake evidence", "global runtime rewrite"],
"rerun_matrix": ["failed_scenario", "direct_answer_surface_pack", "accepted_smoke_pack"],
}
],
}
assigned_focus = {
"focus_id": "answer_shape|composeStage",
"issue_codes": ["business_direct_answer_missing"],
"root_cause_layers": ["answer_surface"],
"allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"],
"forbidden_patch_targets": ["routing rewrites", "fake evidence", "global runtime rewrite"],
"rerun_matrix": ["failed_scenario", "direct_answer_surface_pack", "accepted_smoke_pack"],
"target_ids": ["pack:s01"],
}
gate = dcl.evaluate_auto_coder_gate(repair_targets, assigned_focus)
self.assertTrue(gate["allowed"])
self.assertEqual(gate["reason"], "auto_coder_gate_passed")
self.assertEqual(
gate["issue_catalog_contracts"]["business_direct_answer_missing"]["expected_answer_contract"],
"direct_answer_surface_v1",
)
def test_auto_coder_gate_blocks_broad_or_blind_patch_scope(self) -> None:
repair_targets = {
"targets": [
{
"target_id": "pack:s01",
"issue_code": "business_direct_answer_missing",
"root_cause_layers": ["answer_surface"],
"expected_business_answer_contract": "direct_answer_surface_v1",
"allowed_patch_targets": ["llm_normalizer/backend/src/services/"],
"forbidden_patch_targets": ["routing rewrites", "fake evidence", "global runtime rewrite"],
"rerun_matrix": ["failed_scenario", "direct_answer_surface_pack", "accepted_smoke_pack"],
}
],
}
assigned_focus = {
"focus_id": "answer_shape|services",
"issue_codes": ["business_direct_answer_missing"],
"root_cause_layers": ["answer_surface"],
"allowed_patch_targets": ["llm_normalizer/backend/src/services/"],
"forbidden_patch_targets": ["routing rewrites", "fake evidence", "global runtime rewrite"],
"rerun_matrix": ["failed_scenario", "direct_answer_surface_pack", "accepted_smoke_pack"],
"target_ids": ["pack:s01"],
}
gate = dcl.evaluate_auto_coder_gate(repair_targets, assigned_focus)
self.assertFalse(gate["allowed"])
self.assertIn("broad_allowed_patch_target:llm_normalizer/backend/src/services/", gate["blocking_reasons"])
self.assertIn("target_missing_evidence_paths:pack:s01", gate["blocking_reasons"])
def test_auto_coder_gate_blocks_catalog_issue_without_answer_contract(self) -> None:
original_load_issue_catalog = dcl.load_issue_catalog
dcl.load_issue_catalog = lambda: {
"schema_version": "agent_issue_catalog_v1",
"issues": {
"business_direct_answer_missing": {
"severity": "P0",
"root_layers": ["answer_surface"],
"allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"],
"forbidden_patch_targets": ["routing rewrites"],
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
}
},
}
try:
repair_targets = {
"targets": [
{
"target_id": "pack:s01",
"issue_code": "business_direct_answer_missing",
"root_cause_layers": ["answer_surface"],
"expected_business_answer_contract": "direct_answer_surface_v1",
"evidence_paths": ["artifacts/domain_runs/pack/steps/s01/output.md"],
"allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"],
"forbidden_patch_targets": ["routing rewrites"],
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
}
],
}
assigned_focus = {
"focus_id": "answer_shape|composeStage",
"issue_codes": ["business_direct_answer_missing"],
"root_cause_layers": ["answer_surface"],
"allowed_patch_targets": ["llm_normalizer/backend/src/services/address_runtime/composeStage.ts"],
"forbidden_patch_targets": ["routing rewrites"],
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
"target_ids": ["pack:s01"],
}
gate = dcl.evaluate_auto_coder_gate(repair_targets, assigned_focus)
finally:
dcl.load_issue_catalog = original_load_issue_catalog
self.assertFalse(gate["allowed"])
self.assertIn(
"catalog_missing_expected_answer_contract:business_direct_answer_missing",
gate["blocking_reasons"],
)
def test_analyst_priority_targets_become_lead_repair_targets(self) -> None:
repair_targets = {
"pack_id": "demo_pack",
"domain": "demo",
"target_count": 0,
"severity_counts": {"P0": 0, "P1": 0, "P2": 0},
"priority_foci": [],
"targets": [],
}
analyst_verdict = {
"priority_targets": [
{
"scenario_id": "svk_pivot",
"step_id": "s03_summary",
"severity": "P0",
"problem_type": "bundle_reuse_gap",
"fix_goal": "Reuse the confirmed SVK value-flow bundle in the final summary.",
},
{
"scenario_id": "biz_scope",
"step_id": "s02_money",
"severity": "P1",
"problem_type": "field_mapping_gap",
"fix_goal": "Separate cash source/recipient labels from client/supplier labels.",
},
]
}
merged = dcl.merge_analyst_priority_repair_targets(repair_targets, analyst_verdict)
handoff = dcl.build_lead_coder_handoff(
loop_state={"loop_id": "demo"},
iteration_id="iteration_00",
pack_dir=Path("pack"),
analyst_verdict_path=Path("analyst_verdict.json"),
repair_targets_path=Path("semantic_repair_targets.json"),
business_audit_path=Path("business_audit.md"),
analyst_verdict={"quality_score": 73, "loop_decision": "continue"},
repair_targets=merged,
target_score=88,
loop_decision="continue",
analyst_accepted_gate=False,
accepted_gate=False,
deterministic_gate_ok=True,
deterministic_gate_reason="deterministic_gate_passed",
requires_user_decision=False,
user_decision_type="none",
user_decision_prompt=None,
)
self.assertEqual(merged["target_count"], 2)
self.assertEqual(merged["severity_counts"]["P0"], 1)
self.assertEqual(handoff["assigned_primary_focus"]["problem_type"], "bundle_reuse_gap")
self.assertEqual(handoff["top_repair_targets"][0]["target_id"], "svk_pivot:s03_summary")
self.assertIn(
"llm_normalizer/backend/src/services/assistantMcpDiscoveryResponseCandidate.ts",
handoff["candidate_files"],
)
def test_stale_analyst_validation_target_is_suppressed_by_step_state(self) -> None:
repair_targets = {
"pack_id": "demo_pack",
"domain": "demo",
"target_count": 0,
"severity_counts": {"P0": 0, "P1": 0, "P2": 0},
"priority_foci": [],
"targets": [],
"step_validation_index": {
"legacy_canaries:s02_acc60": {
"acceptance_status": "validated",
"violated_invariants": [],
"warnings": [],
"runtime_factual_answer_validated": False,
"guarded_insufficiency_validated": True,
}
},
}
analyst_verdict = {
"priority_targets": [
{
"scenario_id": "legacy_canaries",
"step_id": "s02_acc60",
"severity": "P0",
"problem_type": "evidence_gap",
"fix_goal": (
"partial heuristic answer without runtime_factual_answer_validated "
"or guarded_insufficiency_validated must not pass silently"
),
},
{
"scenario_id": "biz_scope",
"step_id": "s03_best_year",
"severity": "P2",
"problem_type": "presentation_gap",
"fix_goal": "Clarify why this year leads without implying pure profit.",
},
]
}
merged = dcl.merge_analyst_priority_repair_targets(repair_targets, analyst_verdict)
self.assertEqual(merged["suppressed_analyst_priority_target_count"], 1)
self.assertEqual(merged["target_count"], 1)
self.assertEqual(merged["targets"][0]["target_id"], "biz_scope:s03_best_year")
self.assertEqual(merged["severity_counts"]["P0"], 0)
self.assertEqual(merged["severity_counts"]["P2"], 1)
def test_bounded_mcp_evidence_gap_target_is_suppressed_by_step_state(self) -> None:
repair_targets = {
"pack_id": "demo_pack",
"domain": "demo",
"target_count": 0,
"severity_counts": {"P0": 0, "P1": 0, "P2": 0},
"priority_foci": [],
"targets": [],
"step_validation_index": {
"biz_scope:s03_best_year": {
"acceptance_status": "validated",
"violated_invariants": [],
"warnings": [],
"bounded_mcp_answer_validated": True,
"mcp_discovery_response_applied": True,
"mcp_discovery_response_candidate_status": "ready_for_guarded_use",
"assistant_text_excerpt": (
"Коротко: самый доходный год в доступном денежном контуре 1С — 2015. "
"Важно: входящие уперлись в лимит выборки MCP; это проверенный срез, "
"не чистая бухгалтерская прибыль."
),
}
},
}
analyst_verdict = {
"priority_targets": [
{
"scenario_id": "biz_scope",
"step_id": "s03_best_year",
"severity": "P0",
"problem_type": "evidence_gap",
"fix_goal": (
"Убрать asserted winner-year как подтвержденный факт, пока yearly ranking "
"не имеет exact validated compute; legacy metadata says unsupported/blocked."
),
}
]
}
merged = dcl.merge_analyst_priority_repair_targets(repair_targets, analyst_verdict)
self.assertEqual(merged["suppressed_analyst_priority_target_count"], 1)
self.assertEqual(merged["target_count"], 0)
self.assertEqual(merged["severity_counts"], {"P0": 0, "P1": 0, "P2": 0})
def test_runtime_exact_followup_target_is_suppressed_when_focus_is_proven(self) -> None:
repair_targets = {
"pack_id": "demo_pack",
"domain": "demo",
"target_count": 0,
"severity_counts": {"P0": 0, "P1": 0, "P2": 0},
"priority_foci": [],
"targets": [],
"step_validation_index": {
"svk_pivot:s02_svk_docs": {
"acceptance_status": "validated",
"violated_invariants": [],
"warnings": [],
"runtime_factual_answer_validated": True,
"assistant_text_excerpt": "Контрагент: Группа СВК. Найдено документов: 19.",
"extracted_filters": {"counterparty": "Группа СВК"},
"focus_object": {"label": "Группа СВК"},
}
},
}
analyst_verdict = {
"priority_targets": [
{
"scenario_id": "svk_pivot",
"step_id": "s02_svk_docs",
"severity": "P1",
"problem_type": "followup_action_resolution_gap",
"fix_goal": (
"Добавить pack-level validation на object-centric carryover: docs follow-up "
"и bundle reuse должны быть явно проверены через stable counterparty/focus."
),
}
]
}
merged = dcl.merge_analyst_priority_repair_targets(repair_targets, analyst_verdict)
self.assertEqual(merged["suppressed_analyst_priority_target_count"], 1)
self.assertEqual(merged["target_count"], 0)
self.assertEqual(merged["severity_counts"], {"P0": 0, "P1": 0, "P2": 0})
if __name__ == "__main__":
unittest.main()