NODEDC_1C/scripts/test_scenario_acceptance_po...

273 lines
12 KiB
Python

from __future__ import annotations
import sys
import unittest
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent))
import scenario_acceptance_policy as sap
class ScenarioAcceptancePolicyTests(unittest.TestCase):
def test_marks_partial_when_selected_object_and_temporal_p0_findings_exist(self) -> None:
spec = {
"scenario_id": "demo_phase7",
"domain": "inventory_demo",
"title": "Demo",
"steps": [
{
"step_id": "step_01",
"title": "Selected object supplier",
"question_template": 'По выбранному объекту "Стол": кто поставил?',
"criticality": "critical",
"expected_intents": ["inventory_purchase_provenance_for_item"],
"semantic_tags": ["selected_object"],
}
],
}
scenario_state = {
"session_id": "asst-demo",
"step_outputs": {
"step_01": {
"review_status": "fail",
"reply_type": "factual",
"detected_intent": "inventory_on_hand_as_of_date",
"capability_id": "confirmed_inventory_on_hand_as_of_date",
"review_findings": [
{"code": "wrong_intent", "severity": "critical"},
{"code": "wrong_filter:as_of_date", "severity": "critical"},
],
}
},
}
review_summary = {
"review_source": "live_strict_replay",
"overall_status": "fail",
"steps_total": 1,
"steps_passed": 0,
"steps_with_warning": 0,
"steps_failed": 1,
}
acceptance_matrix = sap.build_scenario_acceptance_matrix(spec, scenario_state, review_summary)
pack_state = sap.derive_truth_harness_pack_state(spec, scenario_state, review_summary, acceptance_matrix)
self.assertEqual(pack_state["final_status"], "partial")
self.assertFalse(pack_state["invariants"]["selected_object_continuity_ok"])
self.assertFalse(pack_state["invariants"]["temporal_honesty_ok"])
self.assertEqual(pack_state["unresolved_p0_count"], 2)
self.assertTrue(pack_state["invariants"]["meta_context_integrity_ok"])
def test_accepts_when_all_review_and_acceptance_invariants_are_green(self) -> None:
spec = {
"scenario_id": "demo_phase7_green",
"domain": "inventory_demo",
"title": "Demo green",
"steps": [
{
"step_id": "step_01",
"title": "Inventory root",
"question_template": "какие остатки на складе на март 2021",
"criticality": "critical",
"expected_intents": ["inventory_on_hand_as_of_date"],
"semantic_tags": ["inventory_root"],
}
],
}
scenario_state = {
"session_id": "asst-green",
"step_outputs": {
"step_01": {
"review_status": "pass",
"reply_type": "factual",
"detected_intent": "inventory_on_hand_as_of_date",
"capability_id": "confirmed_inventory_on_hand_as_of_date",
"mcp_discovery_catalog_chain_alignment_status": "selected_matches_top",
"mcp_discovery_catalog_chain_top_match": "inventory_stock_snapshot",
"mcp_discovery_catalog_chain_selected_matches_top": True,
"mcp_discovery_route_candidate_status": "ready_for_reviewed_execution",
"mcp_discovery_route_candidate_fact_family": "inventory_stock_snapshot",
"mcp_discovery_route_candidate_action_family": "stock_snapshot",
"mcp_discovery_route_candidate_missing_axes": [],
"mcp_discovery_route_candidate_executable_now": True,
"review_findings": [],
}
},
}
review_summary = {
"review_source": "live_strict_replay",
"overall_status": "pass",
"steps_total": 1,
"steps_passed": 1,
"steps_with_warning": 0,
"steps_failed": 0,
}
acceptance_matrix = sap.build_scenario_acceptance_matrix(spec, scenario_state, review_summary)
pack_state = sap.derive_truth_harness_pack_state(spec, scenario_state, review_summary, acceptance_matrix)
self.assertEqual(pack_state["final_status"], "accepted")
self.assertTrue(pack_state["acceptance_gate_passed"])
self.assertTrue(pack_state["critical_path_green"])
self.assertTrue(all(pack_state["invariants"].values()))
self.assertEqual(
acceptance_matrix["rows"][0]["mcp_discovery_catalog_chain_alignment_status"],
"selected_matches_top",
)
self.assertEqual(
acceptance_matrix["rows"][0]["mcp_discovery_catalog_chain_top_match"],
"inventory_stock_snapshot",
)
self.assertTrue(acceptance_matrix["rows"][0]["mcp_discovery_catalog_chain_selected_matches_top"])
self.assertEqual(
acceptance_matrix["rows"][0]["mcp_discovery_route_candidate_status"],
"ready_for_reviewed_execution",
)
self.assertTrue(acceptance_matrix["rows"][0]["mcp_discovery_route_candidate_executable_now"])
def test_flags_meta_context_integrity_when_meta_step_leaks_technical_answer_shape(self) -> None:
spec = {
"scenario_id": "demo_phase7_meta",
"domain": "inventory_demo",
"title": "Demo meta",
"steps": [
{
"step_id": "step_meta",
"title": "Capability meta",
"question_template": "что ты умеешь?",
"criticality": "warning",
"semantic_tags": ["meta_capability"],
}
],
}
scenario_state = {
"session_id": "asst-meta",
"step_outputs": {
"step_meta": {
"review_status": "warning",
"reply_type": "factual_with_explanation",
"detected_intent": None,
"capability_id": None,
"review_findings": [
{"code": "forbidden_answer_pattern_hit", "severity": "warning"},
],
}
},
}
review_summary = {
"review_source": "live_strict_replay",
"overall_status": "warning",
"steps_total": 1,
"steps_passed": 0,
"steps_with_warning": 1,
"steps_failed": 0,
}
acceptance_matrix = sap.build_scenario_acceptance_matrix(spec, scenario_state, review_summary)
pack_state = sap.derive_truth_harness_pack_state(spec, scenario_state, review_summary, acceptance_matrix)
self.assertFalse(pack_state["invariants"]["meta_context_integrity_ok"])
row = acceptance_matrix["rows"][0]
self.assertTrue(row["meta_context_step"])
self.assertIn("meta_context_integrity", row["invariant_failures"])
def test_flags_catalog_alignment_invariant_when_planner_diverges_from_catalog_top(self) -> None:
spec = {
"scenario_id": "demo_planner_alignment",
"domain": "planner_autonomy",
"title": "Planner alignment",
"steps": [
{
"step_id": "step_01",
"title": "Catalog alignment",
"question_template": "проверь цепочку MCP",
"criticality": "critical",
"semantic_tags": ["planner_alignment"],
}
],
}
scenario_state = {
"session_id": "asst-align",
"step_outputs": {
"step_01": {
"review_status": "warning",
"reply_type": "factual",
"detected_intent": "counterparty_turnover",
"capability_id": "confirmed_counterparty_turnover",
"mcp_discovery_catalog_chain_alignment_status": "selected_outside_match_set",
"mcp_discovery_catalog_chain_top_match": "value_flow_comparison",
"mcp_discovery_catalog_chain_selected_matches_top": False,
"review_findings": [
{"code": "catalog_alignment_divergence", "severity": "warning"},
],
}
},
}
review_summary = {
"review_source": "live_strict_replay",
"overall_status": "warning",
"steps_total": 1,
"steps_passed": 0,
"steps_with_warning": 1,
"steps_failed": 0,
}
acceptance_matrix = sap.build_scenario_acceptance_matrix(spec, scenario_state, review_summary)
pack_state = sap.derive_truth_harness_pack_state(spec, scenario_state, review_summary, acceptance_matrix)
self.assertEqual(pack_state["final_status"], "partial")
self.assertFalse(pack_state["invariants"]["catalog_alignment_ok"])
self.assertEqual(pack_state["unresolved_p1_count"], 1)
self.assertIn("catalog_alignment", acceptance_matrix["rows"][0]["invariant_failures"])
def test_flags_route_candidate_handoff_invariant_when_expected_candidate_is_wrong(self) -> None:
spec = {
"scenario_id": "demo_route_candidate",
"domain": "open_world_autonomy",
"title": "Route candidate",
"steps": [
{
"step_id": "step_01",
"title": "Candidate needs scope",
"question_template": "show unfamiliar 1c route candidate",
"criticality": "critical",
"semantic_tags": ["route_candidate_handoff"],
}
],
}
scenario_state = {
"session_id": "asst-route",
"step_outputs": {
"step_01": {
"review_status": "fail",
"reply_type": "clarification_required",
"mcp_discovery_route_candidate_status": "ready_for_reviewed_execution",
"mcp_discovery_route_candidate_missing_axes": [],
"mcp_discovery_route_candidate_executable_now": True,
"review_findings": [
{"code": "wrong_route_candidate_status", "severity": "critical"},
],
}
},
}
review_summary = {
"review_source": "live_strict_replay",
"overall_status": "fail",
"steps_total": 1,
"steps_passed": 0,
"steps_with_warning": 0,
"steps_failed": 1,
}
acceptance_matrix = sap.build_scenario_acceptance_matrix(spec, scenario_state, review_summary)
pack_state = sap.derive_truth_harness_pack_state(spec, scenario_state, review_summary, acceptance_matrix)
self.assertFalse(pack_state["invariants"]["route_candidate_handoff_ok"])
self.assertIn("route_candidate_handoff", acceptance_matrix["rows"][0]["invariant_failures"])
if __name__ == "__main__":
unittest.main()