NODEDC_1C/scripts/test_agent_reliability_cont...

136 lines
6.7 KiB
Python

from __future__ import annotations
import json
import sys
import tempfile
import unittest
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent))
import agent_reliability_contract_healthcheck as health
class AgentReliabilityContractHealthcheckTests(unittest.TestCase):
def test_repo_contract_healthcheck_passes(self) -> None:
result = health.build_healthcheck()
self.assertEqual(result["status"], "pass")
self.assertEqual(result["failures"], [])
def test_issue_catalog_healthcheck_blocks_auto_coder_issue_without_contract(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
catalog_path = Path(tmp) / "issue_catalog.json"
catalog_path.write_text(
json.dumps(
{
"schema_version": "agent_issue_catalog_v1",
"issues": {
"business_direct_answer_missing": {
"severity": "P0",
"business_meaning": "Direct answer is missing.",
"root_layers": ["answer_surface"],
"detectors": ["first_line_not_direct_answer"],
"allowed_patch_targets": [
"llm_normalizer/backend/src/services/address_runtime/composeStage.ts"
],
"forbidden_patch_targets": ["routing rewrites"],
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
},
"business_next_step_missing": {
"severity": "P2",
"business_meaning": "Next step is missing.",
"root_layers": ["answer_surface"],
"expected_answer_contract": "limited_answer_next_action_v1",
"detectors": ["limited_answer_without_next_action"],
"allowed_patch_targets": [
"llm_normalizer/backend/src/services/address_runtime/composeStage.ts"
],
"forbidden_patch_targets": ["route masking"],
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
},
"technical_garbage_in_answer": {
"severity": "P0",
"business_meaning": "Debug text leaked.",
"root_layers": ["answer_surface"],
"expected_answer_contract": "technical_garbage_free_answer_v1",
"detectors": ["runtime_tokens_in_user_answer"],
"allowed_patch_targets": [
"llm_normalizer/backend/src/services/address_runtime/composeStage.ts"
],
"forbidden_patch_targets": ["route masking"],
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
},
},
}
),
encoding="utf-8",
)
_, failures, _ = health.check_issue_catalog(catalog_path)
self.assertIn(
"auto_coder_issue_missing_answer_contract:business_direct_answer_missing",
failures,
)
def test_issue_catalog_healthcheck_blocks_broad_auto_coder_patch_scope(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
catalog_path = Path(tmp) / "issue_catalog.json"
catalog_path.write_text(
json.dumps(
{
"schema_version": "agent_issue_catalog_v1",
"issues": {
"business_direct_answer_missing": {
"severity": "P0",
"business_meaning": "Direct answer is missing.",
"root_layers": ["answer_surface"],
"expected_answer_contract": "direct_answer_surface_v1",
"detectors": ["first_line_not_direct_answer"],
"allowed_patch_targets": ["llm_normalizer/backend/src/services/"],
"forbidden_patch_targets": ["routing rewrites"],
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
},
"business_next_step_missing": {
"severity": "P2",
"business_meaning": "Next step is missing.",
"root_layers": ["answer_surface"],
"expected_answer_contract": "limited_answer_next_action_v1",
"detectors": ["limited_answer_without_next_action"],
"allowed_patch_targets": [
"llm_normalizer/backend/src/services/address_runtime/composeStage.ts"
],
"forbidden_patch_targets": ["route masking"],
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
},
"technical_garbage_in_answer": {
"severity": "P0",
"business_meaning": "Debug text leaked.",
"root_layers": ["answer_surface"],
"expected_answer_contract": "technical_garbage_free_answer_v1",
"detectors": ["runtime_tokens_in_user_answer"],
"allowed_patch_targets": [
"llm_normalizer/backend/src/services/address_runtime/composeStage.ts"
],
"forbidden_patch_targets": ["route masking"],
"rerun_matrix": ["failed_scenario", "accepted_smoke_pack"],
},
},
}
),
encoding="utf-8",
)
_, failures, _ = health.check_issue_catalog(catalog_path)
self.assertIn(
"auto_coder_issue_broad_allowed_patch_target:business_direct_answer_missing:llm_normalizer/backend/src/services/",
failures,
)
if __name__ == "__main__":
unittest.main()