222 lines
10 KiB
Python
222 lines
10 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import sys
|
|
import tempfile
|
|
import unittest
|
|
from pathlib import Path
|
|
|
|
|
|
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
|
|
|
import agent_reliability_contract_healthcheck as health
|
|
|
|
|
|
class AgentReliabilityContractHealthcheckTests(unittest.TestCase):
|
|
def test_repo_contract_healthcheck_passes(self) -> None:
|
|
result = health.build_healthcheck()
|
|
|
|
self.assertEqual(result["status"], "pass")
|
|
self.assertEqual(result["failures"], [])
|
|
|
|
def test_issue_catalog_healthcheck_blocks_auto_coder_issue_without_contract(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
catalog_path = Path(tmp) / "issue_catalog.json"
|
|
catalog_path.write_text(
|
|
json.dumps(
|
|
{
|
|
"schema_version": "agent_issue_catalog_v1",
|
|
"issues": {
|
|
"business_direct_answer_missing": {
|
|
"severity": "P0",
|
|
"business_meaning": "Direct answer is missing.",
|
|
"root_layers": ["answer_surface"],
|
|
"detectors": ["first_line_not_direct_answer"],
|
|
"allowed_patch_targets": [
|
|
"llm_normalizer/backend/src/services/address_runtime/composeStage.ts"
|
|
],
|
|
"forbidden_patch_targets": ["routing rewrites"],
|
|
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
|
|
},
|
|
"business_next_step_missing": {
|
|
"severity": "P2",
|
|
"business_meaning": "Next step is missing.",
|
|
"root_layers": ["answer_surface"],
|
|
"expected_answer_contract": "limited_answer_next_action_v1",
|
|
"detectors": ["limited_answer_without_next_action"],
|
|
"allowed_patch_targets": [
|
|
"llm_normalizer/backend/src/services/address_runtime/composeStage.ts"
|
|
],
|
|
"forbidden_patch_targets": ["route masking"],
|
|
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
|
|
},
|
|
"technical_garbage_in_answer": {
|
|
"severity": "P0",
|
|
"business_meaning": "Debug text leaked.",
|
|
"root_layers": ["answer_surface"],
|
|
"expected_answer_contract": "technical_garbage_free_answer_v1",
|
|
"detectors": ["runtime_tokens_in_user_answer"],
|
|
"allowed_patch_targets": [
|
|
"llm_normalizer/backend/src/services/address_runtime/composeStage.ts"
|
|
],
|
|
"forbidden_patch_targets": ["route masking"],
|
|
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
|
|
},
|
|
},
|
|
}
|
|
),
|
|
encoding="utf-8",
|
|
)
|
|
|
|
_, failures, _ = health.check_issue_catalog(catalog_path)
|
|
|
|
self.assertIn(
|
|
"auto_coder_issue_missing_answer_contract:business_direct_answer_missing",
|
|
failures,
|
|
)
|
|
|
|
def test_issue_catalog_healthcheck_blocks_broad_auto_coder_patch_scope(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
catalog_path = Path(tmp) / "issue_catalog.json"
|
|
catalog_path.write_text(
|
|
json.dumps(
|
|
{
|
|
"schema_version": "agent_issue_catalog_v1",
|
|
"issues": {
|
|
"business_direct_answer_missing": {
|
|
"severity": "P0",
|
|
"business_meaning": "Direct answer is missing.",
|
|
"root_layers": ["answer_surface"],
|
|
"expected_answer_contract": "direct_answer_surface_v1",
|
|
"detectors": ["first_line_not_direct_answer"],
|
|
"allowed_patch_targets": ["llm_normalizer/backend/src/services/"],
|
|
"forbidden_patch_targets": ["routing rewrites"],
|
|
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
|
|
},
|
|
"business_next_step_missing": {
|
|
"severity": "P2",
|
|
"business_meaning": "Next step is missing.",
|
|
"root_layers": ["answer_surface"],
|
|
"expected_answer_contract": "limited_answer_next_action_v1",
|
|
"detectors": ["limited_answer_without_next_action"],
|
|
"allowed_patch_targets": [
|
|
"llm_normalizer/backend/src/services/address_runtime/composeStage.ts"
|
|
],
|
|
"forbidden_patch_targets": ["route masking"],
|
|
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
|
|
},
|
|
"technical_garbage_in_answer": {
|
|
"severity": "P0",
|
|
"business_meaning": "Debug text leaked.",
|
|
"root_layers": ["answer_surface"],
|
|
"expected_answer_contract": "technical_garbage_free_answer_v1",
|
|
"detectors": ["runtime_tokens_in_user_answer"],
|
|
"allowed_patch_targets": [
|
|
"llm_normalizer/backend/src/services/address_runtime/composeStage.ts"
|
|
],
|
|
"forbidden_patch_targets": ["route masking"],
|
|
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
|
|
},
|
|
},
|
|
}
|
|
),
|
|
encoding="utf-8",
|
|
)
|
|
|
|
_, failures, _ = health.check_issue_catalog(catalog_path)
|
|
|
|
self.assertIn(
|
|
"auto_coder_issue_broad_allowed_patch_target:business_direct_answer_missing:llm_normalizer/backend/src/services/",
|
|
failures,
|
|
)
|
|
|
|
def test_detector_registry_blocks_missing_catalog_detector(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
registry_path = Path(tmp) / "detector_registry.json"
|
|
registry_path.write_text(
|
|
json.dumps(
|
|
{
|
|
"schema_version": "agent_detector_registry_v1",
|
|
"detectors": {
|
|
"known_detector": {
|
|
"kind": "answer_text_shape",
|
|
"automation_level": "semi_automatic",
|
|
"description": "Known detector.",
|
|
"issue_codes": ["business_direct_answer_missing"],
|
|
"inputs": ["output.md"],
|
|
"check": {"first_line_should_be": "business_answer"},
|
|
}
|
|
},
|
|
}
|
|
),
|
|
encoding="utf-8",
|
|
)
|
|
issue_catalog = {
|
|
"schema_version": "agent_issue_catalog_v1",
|
|
"issues": {
|
|
"business_direct_answer_missing": {
|
|
"detectors": ["missing_detector"],
|
|
}
|
|
},
|
|
}
|
|
|
|
_, failures, _ = health.check_detector_registry(
|
|
registry_path,
|
|
issue_catalog,
|
|
include_contracts=False,
|
|
)
|
|
|
|
self.assertIn(
|
|
"detector_registry_missing_catalog_detector:business_direct_answer_missing:missing_detector",
|
|
failures,
|
|
)
|
|
|
|
def test_detector_registry_blocks_unknown_issue_link(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
registry_path = Path(tmp) / "detector_registry.json"
|
|
registry_path.write_text(
|
|
json.dumps(
|
|
{
|
|
"schema_version": "agent_detector_registry_v1",
|
|
"detectors": {
|
|
"first_line_not_direct_answer": {
|
|
"kind": "answer_text_shape",
|
|
"automation_level": "semi_automatic",
|
|
"description": "Direct answer detector.",
|
|
"issue_codes": ["unknown_issue_code"],
|
|
"inputs": ["output.md"],
|
|
"check": {"first_line_should_be": "business_answer"},
|
|
}
|
|
},
|
|
}
|
|
),
|
|
encoding="utf-8",
|
|
)
|
|
issue_catalog = {
|
|
"schema_version": "agent_issue_catalog_v1",
|
|
"issues": {
|
|
"business_direct_answer_missing": {
|
|
"detectors": ["first_line_not_direct_answer"],
|
|
}
|
|
},
|
|
}
|
|
|
|
_, failures, _ = health.check_detector_registry(
|
|
registry_path,
|
|
issue_catalog,
|
|
include_contracts=False,
|
|
)
|
|
|
|
self.assertIn(
|
|
"detector_registry_unknown_issue_code:first_line_not_direct_answer:unknown_issue_code",
|
|
failures,
|
|
)
|
|
self.assertIn(
|
|
"detector_registry_missing_issue_link:first_line_not_direct_answer:business_direct_answer_missing",
|
|
failures,
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|