from __future__ import annotations import sys import unittest from pathlib import Path sys.path.insert(0, str(Path(__file__).resolve().parent)) import domain_case_loop as dcl import domain_truth_harness as dth class DomainCaseLoopStepStateTests(unittest.TestCase): def test_preserves_mcp_catalog_alignment_debug_fields(self) -> None: step_state = dcl.build_scenario_step_state( scenario_id="planner_alignment_demo", domain="planner_autonomy", step={ "step_id": "step_01", "title": "Alignment visibility", "depends_on": [], "question_template": "show planner alignment", }, step_index=1, question_resolved="show planner alignment", analysis_context={}, turn_artifact={ "assistant_message": { "reply_type": "factual", "text": "Confirmed answer", "message_id": "msg-1", "trace_id": "trace-1", }, "technical_debug_payload": { "detected_mode": "address_query", "detected_intent": "counterparty_turnover", "selected_recipe": "counterparty_turnover_by_period", "capability_id": "confirmed_counterparty_turnover", "mcp_discovery_catalog_chain_alignment_status": "selected_matches_top", "mcp_discovery_catalog_chain_top_match": "value_flow", "mcp_discovery_catalog_chain_selected_matches_top": True, }, "session_summary": {}, }, entries=[], ) self.assertEqual(step_state["mcp_discovery_catalog_chain_alignment_status"], "selected_matches_top") self.assertEqual(step_state["mcp_discovery_catalog_chain_top_match"], "value_flow") self.assertTrue(step_state["mcp_discovery_catalog_chain_selected_matches_top"]) def test_truth_harness_warns_on_catalog_alignment_divergence(self) -> None: reviewed = dth.evaluate_truth_step( step={ "step_id": "step_01", "question_template": "show planner alignment", "criticality": "critical", "allowed_reply_types": [], }, step_state={ "question_resolved": "show planner alignment", "reply_type": "factual", "assistant_text": "Confirmed answer", "actual_direct_answer": "Confirmed answer", "detected_intent": "counterparty_turnover", "selected_recipe": "counterparty_turnover_by_period", "capability_id": "confirmed_counterparty_turnover", "mcp_discovery_catalog_chain_alignment_status": "selected_outside_match_set", "mcp_discovery_catalog_chain_top_match": "value_flow_comparison", "mcp_discovery_catalog_chain_selected_matches_top": False, "extracted_filters": {}, }, step_results={}, bindings={}, runtime_bindings={}, ) self.assertEqual(reviewed["review_status"], "warning") self.assertEqual(reviewed["warning_findings_count"], 1) self.assertEqual(reviewed["review_findings"][0]["code"], "catalog_alignment_divergence") self.assertEqual(reviewed["review_findings"][0]["severity"], "warning") def test_truth_harness_checks_expected_catalog_alignment_fields(self) -> None: reviewed = dth.evaluate_truth_step( step={ "step_id": "step_01", "question_template": "show planner alignment", "criticality": "critical", "allowed_reply_types": [], "expected_catalog_alignment_status": "selected_matches_top", "expected_catalog_chain_top_match": "value_flow_comparison", "expected_catalog_selected_matches_top": True, }, step_state={ "question_resolved": "show planner alignment", "reply_type": "factual", "assistant_text": "Confirmed answer", "actual_direct_answer": "Confirmed answer", "detected_intent": "counterparty_turnover", "selected_recipe": "counterparty_turnover_by_period", "capability_id": "confirmed_counterparty_turnover", "mcp_discovery_catalog_chain_alignment_status": "selected_matches_top", "mcp_discovery_catalog_chain_top_match": "value_flow", "mcp_discovery_catalog_chain_selected_matches_top": True, "extracted_filters": {}, }, step_results={}, bindings={}, runtime_bindings={}, ) self.assertEqual(reviewed["review_status"], "fail") self.assertEqual(reviewed["critical_findings_count"], 1) self.assertEqual(reviewed["review_findings"][0]["code"], "wrong_catalog_chain_top_match") def test_business_first_review_flags_dirty_direct_answer_surface(self) -> None: step_state = dcl.build_scenario_step_state( scenario_id="business_surface_demo", domain="business_overview", step={ "step_id": "step_01", "title": "Top year", "depends_on": [], "question_template": "какой у нас самый доходный год", }, step_index=1, question_resolved="какой у нас самый доходный год", analysis_context={}, turn_artifact={ "assistant_message": { "reply_type": "partial_coverage", "text": "Коротко: Ограниченный бизнес-обзор по подтвержденным строкам 1С. " + ("лишний текст " * 220), "message_id": "msg-1", "trace_id": "trace-1", }, "technical_debug_payload": {}, "session_summary": {}, }, entries=[], ) review = step_state["business_first_review"] self.assertFalse(review["direct_answer_first_ok"]) self.assertFalse(review["business_usefulness_ok"]) self.assertIn("business_direct_answer_missing", review["issue_codes"]) self.assertIn("answer_layering_noise", review["issue_codes"]) self.assertIn("business_answer_too_verbose", review["issue_codes"]) self.assertIn("business_direct_answer_missing", step_state["violated_invariants"]) def test_business_first_review_accepts_compact_direct_answer_surface(self) -> None: step_state = dcl.build_scenario_step_state( scenario_id="business_surface_demo", domain="business_overview", step={ "step_id": "step_01", "title": "Top year", "depends_on": [], "question_template": "какой у нас самый доходный год", }, step_index=1, question_resolved="какой у нас самый доходный год", analysis_context={}, turn_artifact={ "assistant_message": { "reply_type": "partial_coverage", "text": "Коротко: самый доходный год в доступном денежном контуре 1С — 2015: 136 723 459,73 руб.\nМетод: считаю по подтвержденным входящим поступлениям.", "message_id": "msg-1", "trace_id": "trace-1", }, "technical_debug_payload": {}, "session_summary": {}, }, entries=[], ) review = step_state["business_first_review"] self.assertTrue(review["direct_answer_first_ok"]) self.assertTrue(review["business_usefulness_ok"]) self.assertEqual(review["issue_codes"], []) def test_business_first_review_separates_direct_answer_from_later_technical_leak(self) -> None: question = "\u043a\u0430\u043a\u043e\u0439 \u0443 \u043d\u0430\u0441 \u0441\u0430\u043c\u044b\u0439 \u0434\u043e\u0445\u043e\u0434\u043d\u044b\u0439 \u0433\u043e\u0434" step_state = dcl.build_scenario_step_state( scenario_id="business_surface_demo", domain="business_overview", step={ "step_id": "step_01", "title": "Top year", "depends_on": [], "question_template": question, }, step_index=1, question_resolved=question, analysis_context={}, turn_artifact={ "assistant_message": { "reply_type": "partial_coverage", "text": "2015 \u2014 \u0441\u0430\u043c\u044b\u0439 \u0434\u043e\u0445\u043e\u0434\u043d\u044b\u0439 \u0433\u043e\u0434 \u043f\u043e \u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434\u0435\u043d\u043d\u044b\u043c \u0432\u0445\u043e\u0434\u044f\u0449\u0438\u043c \u0434\u0435\u043d\u044c\u0433\u0430\u043c.\nservice: capability_id=business_overview_route_template_v1", "message_id": "msg-1", "trace_id": "trace-1", }, "technical_debug_payload": {}, "session_summary": {}, }, entries=[], ) review = step_state["business_first_review"] self.assertTrue(review["direct_answer_first_ok"]) self.assertTrue(review["technical_garbage_present"]) self.assertIn("technical_garbage_in_answer", review["issue_codes"]) self.assertNotIn("business_direct_answer_missing", review["issue_codes"]) def test_truth_harness_promotes_business_review_issues_to_findings(self) -> None: step_state = dcl.build_scenario_step_state( scenario_id="business_surface_demo", domain="business_overview", step={ "step_id": "step_01", "title": "Top year", "depends_on": [], "question_template": "какой у нас самый доходный год", }, step_index=1, question_resolved="какой у нас самый доходный год", analysis_context={}, turn_artifact={ "assistant_message": { "reply_type": "partial_coverage", "text": "Коротко: Ограниченный бизнес-обзор по подтвержденным строкам 1С. " + ("лишний текст " * 220), "message_id": "msg-1", "trace_id": "trace-1", }, "technical_debug_payload": {}, "session_summary": {}, }, entries=[], ) reviewed = dth.evaluate_truth_step( step={ "step_id": "step_01", "question_template": "какой у нас самый доходный год", "criticality": "critical", "allowed_reply_types": [], }, step_state=step_state, step_results={}, bindings={}, runtime_bindings={}, ) codes = [item["code"] for item in reviewed["review_findings"]] self.assertIn("business_review:business_direct_answer_missing", codes) self.assertIn("business_review:answer_layering_noise", codes) self.assertEqual(reviewed["review_status"], "fail") if __name__ == "__main__": unittest.main()