136 lines
6.7 KiB
Python
136 lines
6.7 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import sys
|
|
import tempfile
|
|
import unittest
|
|
from pathlib import Path
|
|
|
|
|
|
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
|
|
|
import agent_reliability_contract_healthcheck as health
|
|
|
|
|
|
class AgentReliabilityContractHealthcheckTests(unittest.TestCase):
|
|
def test_repo_contract_healthcheck_passes(self) -> None:
|
|
result = health.build_healthcheck()
|
|
|
|
self.assertEqual(result["status"], "pass")
|
|
self.assertEqual(result["failures"], [])
|
|
|
|
def test_issue_catalog_healthcheck_blocks_auto_coder_issue_without_contract(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
catalog_path = Path(tmp) / "issue_catalog.json"
|
|
catalog_path.write_text(
|
|
json.dumps(
|
|
{
|
|
"schema_version": "agent_issue_catalog_v1",
|
|
"issues": {
|
|
"business_direct_answer_missing": {
|
|
"severity": "P0",
|
|
"business_meaning": "Direct answer is missing.",
|
|
"root_layers": ["answer_surface"],
|
|
"detectors": ["first_line_not_direct_answer"],
|
|
"allowed_patch_targets": [
|
|
"llm_normalizer/backend/src/services/address_runtime/composeStage.ts"
|
|
],
|
|
"forbidden_patch_targets": ["routing rewrites"],
|
|
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
|
|
},
|
|
"business_next_step_missing": {
|
|
"severity": "P2",
|
|
"business_meaning": "Next step is missing.",
|
|
"root_layers": ["answer_surface"],
|
|
"expected_answer_contract": "limited_answer_next_action_v1",
|
|
"detectors": ["limited_answer_without_next_action"],
|
|
"allowed_patch_targets": [
|
|
"llm_normalizer/backend/src/services/address_runtime/composeStage.ts"
|
|
],
|
|
"forbidden_patch_targets": ["route masking"],
|
|
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
|
|
},
|
|
"technical_garbage_in_answer": {
|
|
"severity": "P0",
|
|
"business_meaning": "Debug text leaked.",
|
|
"root_layers": ["answer_surface"],
|
|
"expected_answer_contract": "technical_garbage_free_answer_v1",
|
|
"detectors": ["runtime_tokens_in_user_answer"],
|
|
"allowed_patch_targets": [
|
|
"llm_normalizer/backend/src/services/address_runtime/composeStage.ts"
|
|
],
|
|
"forbidden_patch_targets": ["route masking"],
|
|
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
|
|
},
|
|
},
|
|
}
|
|
),
|
|
encoding="utf-8",
|
|
)
|
|
|
|
_, failures, _ = health.check_issue_catalog(catalog_path)
|
|
|
|
self.assertIn(
|
|
"auto_coder_issue_missing_answer_contract:business_direct_answer_missing",
|
|
failures,
|
|
)
|
|
|
|
def test_issue_catalog_healthcheck_blocks_broad_auto_coder_patch_scope(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
catalog_path = Path(tmp) / "issue_catalog.json"
|
|
catalog_path.write_text(
|
|
json.dumps(
|
|
{
|
|
"schema_version": "agent_issue_catalog_v1",
|
|
"issues": {
|
|
"business_direct_answer_missing": {
|
|
"severity": "P0",
|
|
"business_meaning": "Direct answer is missing.",
|
|
"root_layers": ["answer_surface"],
|
|
"expected_answer_contract": "direct_answer_surface_v1",
|
|
"detectors": ["first_line_not_direct_answer"],
|
|
"allowed_patch_targets": ["llm_normalizer/backend/src/services/"],
|
|
"forbidden_patch_targets": ["routing rewrites"],
|
|
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
|
|
},
|
|
"business_next_step_missing": {
|
|
"severity": "P2",
|
|
"business_meaning": "Next step is missing.",
|
|
"root_layers": ["answer_surface"],
|
|
"expected_answer_contract": "limited_answer_next_action_v1",
|
|
"detectors": ["limited_answer_without_next_action"],
|
|
"allowed_patch_targets": [
|
|
"llm_normalizer/backend/src/services/address_runtime/composeStage.ts"
|
|
],
|
|
"forbidden_patch_targets": ["route masking"],
|
|
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
|
|
},
|
|
"technical_garbage_in_answer": {
|
|
"severity": "P0",
|
|
"business_meaning": "Debug text leaked.",
|
|
"root_layers": ["answer_surface"],
|
|
"expected_answer_contract": "technical_garbage_free_answer_v1",
|
|
"detectors": ["runtime_tokens_in_user_answer"],
|
|
"allowed_patch_targets": [
|
|
"llm_normalizer/backend/src/services/address_runtime/composeStage.ts"
|
|
],
|
|
"forbidden_patch_targets": ["route masking"],
|
|
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
|
|
},
|
|
},
|
|
}
|
|
),
|
|
encoding="utf-8",
|
|
)
|
|
|
|
_, failures, _ = health.check_issue_catalog(catalog_path)
|
|
|
|
self.assertIn(
|
|
"auto_coder_issue_broad_allowed_patch_target:business_direct_answer_missing:llm_normalizer/backend/src/services/",
|
|
failures,
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|