NODEDC_1C/scripts/test_agent_reliability_cont...

349 lines
15 KiB
Python

from __future__ import annotations
import json
import sys
import tempfile
import unittest
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent))
import agent_reliability_contract_healthcheck as health
class AgentReliabilityContractHealthcheckTests(unittest.TestCase):
def test_repo_contract_healthcheck_passes(self) -> None:
result = health.build_healthcheck()
self.assertEqual(result["status"], "pass")
self.assertEqual(result["failures"], [])
self.assertIn("margin_profitability_v1", result["answer_contracts"]["contract_ids"])
self.assertIn(
"agent_margin_profitability_reliability_20260524",
result["domain_scenario_packs"]["pack_ids"],
)
def test_issue_catalog_healthcheck_blocks_auto_coder_issue_without_contract(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
catalog_path = Path(tmp) / "issue_catalog.json"
catalog_path.write_text(
json.dumps(
{
"schema_version": "agent_issue_catalog_v1",
"issues": {
"business_direct_answer_missing": {
"severity": "P0",
"business_meaning": "Direct answer is missing.",
"root_layers": ["answer_surface"],
"detectors": ["first_line_not_direct_answer"],
"allowed_patch_targets": [
"llm_normalizer/backend/src/services/address_runtime/composeStage.ts"
],
"forbidden_patch_targets": ["routing rewrites"],
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
},
"business_next_step_missing": {
"severity": "P2",
"business_meaning": "Next step is missing.",
"root_layers": ["answer_surface"],
"expected_answer_contract": "limited_answer_next_action_v1",
"detectors": ["limited_answer_without_next_action"],
"allowed_patch_targets": [
"llm_normalizer/backend/src/services/address_runtime/composeStage.ts"
],
"forbidden_patch_targets": ["route masking"],
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
},
"technical_garbage_in_answer": {
"severity": "P0",
"business_meaning": "Debug text leaked.",
"root_layers": ["answer_surface"],
"expected_answer_contract": "technical_garbage_free_answer_v1",
"detectors": ["runtime_tokens_in_user_answer"],
"allowed_patch_targets": [
"llm_normalizer/backend/src/services/address_runtime/composeStage.ts"
],
"forbidden_patch_targets": ["route masking"],
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
},
},
}
),
encoding="utf-8",
)
_, failures, _ = health.check_issue_catalog(catalog_path)
self.assertIn(
"auto_coder_issue_missing_answer_contract:business_direct_answer_missing",
failures,
)
def test_issue_catalog_healthcheck_blocks_broad_auto_coder_patch_scope(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
catalog_path = Path(tmp) / "issue_catalog.json"
catalog_path.write_text(
json.dumps(
{
"schema_version": "agent_issue_catalog_v1",
"issues": {
"business_direct_answer_missing": {
"severity": "P0",
"business_meaning": "Direct answer is missing.",
"root_layers": ["answer_surface"],
"expected_answer_contract": "direct_answer_surface_v1",
"detectors": ["first_line_not_direct_answer"],
"allowed_patch_targets": ["llm_normalizer/backend/src/services/"],
"forbidden_patch_targets": ["routing rewrites"],
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
},
"business_next_step_missing": {
"severity": "P2",
"business_meaning": "Next step is missing.",
"root_layers": ["answer_surface"],
"expected_answer_contract": "limited_answer_next_action_v1",
"detectors": ["limited_answer_without_next_action"],
"allowed_patch_targets": [
"llm_normalizer/backend/src/services/address_runtime/composeStage.ts"
],
"forbidden_patch_targets": ["route masking"],
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
},
"technical_garbage_in_answer": {
"severity": "P0",
"business_meaning": "Debug text leaked.",
"root_layers": ["answer_surface"],
"expected_answer_contract": "technical_garbage_free_answer_v1",
"detectors": ["runtime_tokens_in_user_answer"],
"allowed_patch_targets": [
"llm_normalizer/backend/src/services/address_runtime/composeStage.ts"
],
"forbidden_patch_targets": ["route masking"],
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
},
},
}
),
encoding="utf-8",
)
_, failures, _ = health.check_issue_catalog(catalog_path)
self.assertIn(
"auto_coder_issue_broad_allowed_patch_target:business_direct_answer_missing:llm_normalizer/backend/src/services/",
failures,
)
def test_detector_registry_blocks_missing_catalog_detector(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
registry_path = Path(tmp) / "detector_registry.json"
registry_path.write_text(
json.dumps(
{
"schema_version": "agent_detector_registry_v1",
"detectors": {
"known_detector": {
"kind": "answer_text_shape",
"automation_level": "semi_automatic",
"description": "Known detector.",
"issue_codes": ["business_direct_answer_missing"],
"inputs": ["output.md"],
"check": {"first_line_should_be": "business_answer"},
}
},
}
),
encoding="utf-8",
)
issue_catalog = {
"schema_version": "agent_issue_catalog_v1",
"issues": {
"business_direct_answer_missing": {
"detectors": ["missing_detector"],
}
},
}
_, failures, _ = health.check_detector_registry(
registry_path,
issue_catalog,
include_contracts=False,
)
self.assertIn(
"detector_registry_missing_catalog_detector:business_direct_answer_missing:missing_detector",
failures,
)
def test_detector_registry_blocks_unknown_issue_link(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
registry_path = Path(tmp) / "detector_registry.json"
registry_path.write_text(
json.dumps(
{
"schema_version": "agent_detector_registry_v1",
"detectors": {
"first_line_not_direct_answer": {
"kind": "answer_text_shape",
"automation_level": "semi_automatic",
"description": "Direct answer detector.",
"issue_codes": ["unknown_issue_code"],
"inputs": ["output.md"],
"check": {"first_line_should_be": "business_answer"},
}
},
}
),
encoding="utf-8",
)
issue_catalog = {
"schema_version": "agent_issue_catalog_v1",
"issues": {
"business_direct_answer_missing": {
"detectors": ["first_line_not_direct_answer"],
}
},
}
_, failures, _ = health.check_detector_registry(
registry_path,
issue_catalog,
include_contracts=False,
)
self.assertIn(
"detector_registry_unknown_issue_code:first_line_not_direct_answer:unknown_issue_code",
failures,
)
self.assertIn(
"detector_registry_missing_issue_link:first_line_not_direct_answer:business_direct_answer_missing",
failures,
)
def test_answer_contract_healthcheck_blocks_missing_required_fields(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
contracts_dir = Path(tmp) / "contracts"
contracts_dir.mkdir()
(contracts_dir / "demo_contract.json").write_text(
json.dumps(
{
"schema_version": "business_answer_contract_v1",
"contract_id": "demo_contract",
"domain": "demo",
"answer_surface": {"required_fields": []},
"detectors": ["demo_detector"],
}
),
encoding="utf-8",
)
_, failures, _, contract_ids = health.check_answer_contracts(contracts_dir)
self.assertEqual(contract_ids, {"demo_contract"})
self.assertTrue(
any(failure.endswith("contracts\\demo_contract.json") for failure in failures),
failures,
)
def test_domain_scenario_pack_blocks_unknown_source_contract(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
orchestration_dir = Path(tmp)
(orchestration_dir / "demo_pack.json").write_text(
json.dumps(
{
"schema_version": "domain_scenario_pack_v1",
"pack_id": "demo_pack",
"domain": "demo",
"source_contract_id": "missing_contract",
"detectors_under_test": ["demo_detector"],
"acceptance": {"min_score": 80},
"scenarios": [
{
"scenario_id": "demo_scenario",
"steps": [
{
"step_id": "step_01",
"question": "Question?",
"expected_business_answer_contract": "missing_contract",
}
],
}
],
}
),
encoding="utf-8",
)
_, failures, _ = health.check_domain_scenario_packs(
orchestration_dir,
{"known_contract"},
{"detectors": {"demo_detector": {}}},
)
self.assertTrue(
any(
failure.startswith("domain_scenario_pack_unknown_source_contract:")
and failure.endswith("demo_pack.json:missing_contract")
for failure in failures
),
failures,
)
self.assertTrue(
any(
failure.startswith("domain_scenario_pack_step_unknown_contract:")
and failure.endswith("demo_pack.json:demo_scenario:step_01:missing_contract")
for failure in failures
),
failures,
)
def test_domain_scenario_pack_blocks_wrong_domain_trap_without_forbidden_patterns(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
orchestration_dir = Path(tmp)
(orchestration_dir / "demo_pack.json").write_text(
json.dumps(
{
"schema_version": "domain_scenario_pack_v1",
"pack_id": "demo_pack",
"domain": "demo",
"source_contract_id": "demo_contract",
"detectors_under_test": ["demo_detector"],
"acceptance": {"min_score": 80},
"scenarios": [
{
"scenario_id": "demo_scenario",
"steps": [
{
"step_id": "step_01",
"question": "Question?",
"expected_business_answer_contract": "demo_contract",
"semantic_tags": ["wrong_domain_trap"],
}
],
}
],
}
),
encoding="utf-8",
)
_, failures, _ = health.check_domain_scenario_packs(
orchestration_dir,
{"demo_contract"},
{"detectors": {"demo_detector": {}}},
)
self.assertTrue(
any(
failure.startswith("domain_scenario_pack_wrong_domain_trap_missing_forbidden_patterns:")
and failure.endswith("demo_pack.json:demo_scenario:step_01")
for failure in failures
),
failures,
)
if __name__ == "__main__":
unittest.main()