169 lines
6.7 KiB
Python
169 lines
6.7 KiB
Python
from __future__ import annotations
|
|
|
|
import sys
|
|
import unittest
|
|
from pathlib import Path
|
|
|
|
|
|
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
|
|
|
import scenario_acceptance_policy as sap
|
|
|
|
|
|
class ScenarioAcceptancePolicyTests(unittest.TestCase):
|
|
def test_marks_partial_when_selected_object_and_temporal_p0_findings_exist(self) -> None:
|
|
spec = {
|
|
"scenario_id": "demo_phase7",
|
|
"domain": "inventory_demo",
|
|
"title": "Demo",
|
|
"steps": [
|
|
{
|
|
"step_id": "step_01",
|
|
"title": "Selected object supplier",
|
|
"question_template": 'По выбранному объекту "Стол": кто поставил?',
|
|
"criticality": "critical",
|
|
"expected_intents": ["inventory_purchase_provenance_for_item"],
|
|
"semantic_tags": ["selected_object"],
|
|
}
|
|
],
|
|
}
|
|
scenario_state = {
|
|
"session_id": "asst-demo",
|
|
"step_outputs": {
|
|
"step_01": {
|
|
"review_status": "fail",
|
|
"reply_type": "factual",
|
|
"detected_intent": "inventory_on_hand_as_of_date",
|
|
"capability_id": "confirmed_inventory_on_hand_as_of_date",
|
|
"review_findings": [
|
|
{"code": "wrong_intent", "severity": "critical"},
|
|
{"code": "wrong_filter:as_of_date", "severity": "critical"},
|
|
],
|
|
}
|
|
},
|
|
}
|
|
review_summary = {
|
|
"review_source": "live_strict_replay",
|
|
"overall_status": "fail",
|
|
"steps_total": 1,
|
|
"steps_passed": 0,
|
|
"steps_with_warning": 0,
|
|
"steps_failed": 1,
|
|
}
|
|
|
|
acceptance_matrix = sap.build_scenario_acceptance_matrix(spec, scenario_state, review_summary)
|
|
pack_state = sap.derive_truth_harness_pack_state(spec, scenario_state, review_summary, acceptance_matrix)
|
|
|
|
self.assertEqual(pack_state["final_status"], "partial")
|
|
self.assertFalse(pack_state["invariants"]["selected_object_continuity_ok"])
|
|
self.assertFalse(pack_state["invariants"]["temporal_honesty_ok"])
|
|
self.assertEqual(pack_state["unresolved_p0_count"], 2)
|
|
self.assertTrue(pack_state["invariants"]["meta_context_integrity_ok"])
|
|
|
|
def test_accepts_when_all_review_and_acceptance_invariants_are_green(self) -> None:
|
|
spec = {
|
|
"scenario_id": "demo_phase7_green",
|
|
"domain": "inventory_demo",
|
|
"title": "Demo green",
|
|
"steps": [
|
|
{
|
|
"step_id": "step_01",
|
|
"title": "Inventory root",
|
|
"question_template": "какие остатки на складе на март 2021",
|
|
"criticality": "critical",
|
|
"expected_intents": ["inventory_on_hand_as_of_date"],
|
|
"semantic_tags": ["inventory_root"],
|
|
}
|
|
],
|
|
}
|
|
scenario_state = {
|
|
"session_id": "asst-green",
|
|
"step_outputs": {
|
|
"step_01": {
|
|
"review_status": "pass",
|
|
"reply_type": "factual",
|
|
"detected_intent": "inventory_on_hand_as_of_date",
|
|
"capability_id": "confirmed_inventory_on_hand_as_of_date",
|
|
"mcp_discovery_catalog_chain_alignment_status": "selected_matches_top",
|
|
"mcp_discovery_catalog_chain_top_match": "inventory_stock_snapshot",
|
|
"mcp_discovery_catalog_chain_selected_matches_top": True,
|
|
"review_findings": [],
|
|
}
|
|
},
|
|
}
|
|
review_summary = {
|
|
"review_source": "live_strict_replay",
|
|
"overall_status": "pass",
|
|
"steps_total": 1,
|
|
"steps_passed": 1,
|
|
"steps_with_warning": 0,
|
|
"steps_failed": 0,
|
|
}
|
|
|
|
acceptance_matrix = sap.build_scenario_acceptance_matrix(spec, scenario_state, review_summary)
|
|
pack_state = sap.derive_truth_harness_pack_state(spec, scenario_state, review_summary, acceptance_matrix)
|
|
|
|
self.assertEqual(pack_state["final_status"], "accepted")
|
|
self.assertTrue(pack_state["acceptance_gate_passed"])
|
|
self.assertTrue(pack_state["critical_path_green"])
|
|
self.assertTrue(all(pack_state["invariants"].values()))
|
|
self.assertEqual(
|
|
acceptance_matrix["rows"][0]["mcp_discovery_catalog_chain_alignment_status"],
|
|
"selected_matches_top",
|
|
)
|
|
self.assertEqual(
|
|
acceptance_matrix["rows"][0]["mcp_discovery_catalog_chain_top_match"],
|
|
"inventory_stock_snapshot",
|
|
)
|
|
self.assertTrue(acceptance_matrix["rows"][0]["mcp_discovery_catalog_chain_selected_matches_top"])
|
|
|
|
def test_flags_meta_context_integrity_when_meta_step_leaks_technical_answer_shape(self) -> None:
|
|
spec = {
|
|
"scenario_id": "demo_phase7_meta",
|
|
"domain": "inventory_demo",
|
|
"title": "Demo meta",
|
|
"steps": [
|
|
{
|
|
"step_id": "step_meta",
|
|
"title": "Capability meta",
|
|
"question_template": "что ты умеешь?",
|
|
"criticality": "warning",
|
|
"semantic_tags": ["meta_capability"],
|
|
}
|
|
],
|
|
}
|
|
scenario_state = {
|
|
"session_id": "asst-meta",
|
|
"step_outputs": {
|
|
"step_meta": {
|
|
"review_status": "warning",
|
|
"reply_type": "factual_with_explanation",
|
|
"detected_intent": None,
|
|
"capability_id": None,
|
|
"review_findings": [
|
|
{"code": "forbidden_answer_pattern_hit", "severity": "warning"},
|
|
],
|
|
}
|
|
},
|
|
}
|
|
review_summary = {
|
|
"review_source": "live_strict_replay",
|
|
"overall_status": "warning",
|
|
"steps_total": 1,
|
|
"steps_passed": 0,
|
|
"steps_with_warning": 1,
|
|
"steps_failed": 0,
|
|
}
|
|
|
|
acceptance_matrix = sap.build_scenario_acceptance_matrix(spec, scenario_state, review_summary)
|
|
pack_state = sap.derive_truth_harness_pack_state(spec, scenario_state, review_summary, acceptance_matrix)
|
|
|
|
self.assertFalse(pack_state["invariants"]["meta_context_integrity_ok"])
|
|
row = acceptance_matrix["rows"][0]
|
|
self.assertTrue(row["meta_context_step"])
|
|
self.assertIn("meta_context_integrity", row["invariant_failures"])
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|