NODEDC_1C/scripts/test_domain_case_loop_step_...

261 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
import sys
import unittest
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent))
import domain_case_loop as dcl
import domain_truth_harness as dth
class DomainCaseLoopStepStateTests(unittest.TestCase):
def test_preserves_mcp_catalog_alignment_debug_fields(self) -> None:
step_state = dcl.build_scenario_step_state(
scenario_id="planner_alignment_demo",
domain="planner_autonomy",
step={
"step_id": "step_01",
"title": "Alignment visibility",
"depends_on": [],
"question_template": "show planner alignment",
},
step_index=1,
question_resolved="show planner alignment",
analysis_context={},
turn_artifact={
"assistant_message": {
"reply_type": "factual",
"text": "Confirmed answer",
"message_id": "msg-1",
"trace_id": "trace-1",
},
"technical_debug_payload": {
"detected_mode": "address_query",
"detected_intent": "counterparty_turnover",
"selected_recipe": "counterparty_turnover_by_period",
"capability_id": "confirmed_counterparty_turnover",
"mcp_discovery_catalog_chain_alignment_status": "selected_matches_top",
"mcp_discovery_catalog_chain_top_match": "value_flow",
"mcp_discovery_catalog_chain_selected_matches_top": True,
},
"session_summary": {},
},
entries=[],
)
self.assertEqual(step_state["mcp_discovery_catalog_chain_alignment_status"], "selected_matches_top")
self.assertEqual(step_state["mcp_discovery_catalog_chain_top_match"], "value_flow")
self.assertTrue(step_state["mcp_discovery_catalog_chain_selected_matches_top"])
def test_truth_harness_warns_on_catalog_alignment_divergence(self) -> None:
reviewed = dth.evaluate_truth_step(
step={
"step_id": "step_01",
"question_template": "show planner alignment",
"criticality": "critical",
"allowed_reply_types": [],
},
step_state={
"question_resolved": "show planner alignment",
"reply_type": "factual",
"assistant_text": "Confirmed answer",
"actual_direct_answer": "Confirmed answer",
"detected_intent": "counterparty_turnover",
"selected_recipe": "counterparty_turnover_by_period",
"capability_id": "confirmed_counterparty_turnover",
"mcp_discovery_catalog_chain_alignment_status": "selected_outside_match_set",
"mcp_discovery_catalog_chain_top_match": "value_flow_comparison",
"mcp_discovery_catalog_chain_selected_matches_top": False,
"extracted_filters": {},
},
step_results={},
bindings={},
runtime_bindings={},
)
self.assertEqual(reviewed["review_status"], "warning")
self.assertEqual(reviewed["warning_findings_count"], 1)
self.assertEqual(reviewed["review_findings"][0]["code"], "catalog_alignment_divergence")
self.assertEqual(reviewed["review_findings"][0]["severity"], "warning")
def test_truth_harness_checks_expected_catalog_alignment_fields(self) -> None:
reviewed = dth.evaluate_truth_step(
step={
"step_id": "step_01",
"question_template": "show planner alignment",
"criticality": "critical",
"allowed_reply_types": [],
"expected_catalog_alignment_status": "selected_matches_top",
"expected_catalog_chain_top_match": "value_flow_comparison",
"expected_catalog_selected_matches_top": True,
},
step_state={
"question_resolved": "show planner alignment",
"reply_type": "factual",
"assistant_text": "Confirmed answer",
"actual_direct_answer": "Confirmed answer",
"detected_intent": "counterparty_turnover",
"selected_recipe": "counterparty_turnover_by_period",
"capability_id": "confirmed_counterparty_turnover",
"mcp_discovery_catalog_chain_alignment_status": "selected_matches_top",
"mcp_discovery_catalog_chain_top_match": "value_flow",
"mcp_discovery_catalog_chain_selected_matches_top": True,
"extracted_filters": {},
},
step_results={},
bindings={},
runtime_bindings={},
)
self.assertEqual(reviewed["review_status"], "fail")
self.assertEqual(reviewed["critical_findings_count"], 1)
self.assertEqual(reviewed["review_findings"][0]["code"], "wrong_catalog_chain_top_match")
def test_business_first_review_flags_dirty_direct_answer_surface(self) -> None:
step_state = dcl.build_scenario_step_state(
scenario_id="business_surface_demo",
domain="business_overview",
step={
"step_id": "step_01",
"title": "Top year",
"depends_on": [],
"question_template": "какой у нас самый доходный год",
},
step_index=1,
question_resolved="какой у нас самый доходный год",
analysis_context={},
turn_artifact={
"assistant_message": {
"reply_type": "partial_coverage",
"text": "Коротко: Ограниченный бизнес-обзор по подтвержденным строкам 1С. " + ("лишний текст " * 220),
"message_id": "msg-1",
"trace_id": "trace-1",
},
"technical_debug_payload": {},
"session_summary": {},
},
entries=[],
)
review = step_state["business_first_review"]
self.assertFalse(review["direct_answer_first_ok"])
self.assertFalse(review["business_usefulness_ok"])
self.assertIn("business_direct_answer_missing", review["issue_codes"])
self.assertIn("answer_layering_noise", review["issue_codes"])
self.assertIn("business_answer_too_verbose", review["issue_codes"])
self.assertIn("business_direct_answer_missing", step_state["violated_invariants"])
def test_business_first_review_accepts_compact_direct_answer_surface(self) -> None:
step_state = dcl.build_scenario_step_state(
scenario_id="business_surface_demo",
domain="business_overview",
step={
"step_id": "step_01",
"title": "Top year",
"depends_on": [],
"question_template": "какой у нас самый доходный год",
},
step_index=1,
question_resolved="какой у нас самый доходный год",
analysis_context={},
turn_artifact={
"assistant_message": {
"reply_type": "partial_coverage",
"text": "Коротко: самый доходный год в доступном денежном контуре 1С — 2015: 136 723 459,73 руб.\nМетод: считаю по подтвержденным входящим поступлениям.",
"message_id": "msg-1",
"trace_id": "trace-1",
},
"technical_debug_payload": {},
"session_summary": {},
},
entries=[],
)
review = step_state["business_first_review"]
self.assertTrue(review["direct_answer_first_ok"])
self.assertTrue(review["business_usefulness_ok"])
self.assertEqual(review["issue_codes"], [])
def test_business_first_review_separates_direct_answer_from_later_technical_leak(self) -> None:
question = "\u043a\u0430\u043a\u043e\u0439 \u0443 \u043d\u0430\u0441 \u0441\u0430\u043c\u044b\u0439 \u0434\u043e\u0445\u043e\u0434\u043d\u044b\u0439 \u0433\u043e\u0434"
step_state = dcl.build_scenario_step_state(
scenario_id="business_surface_demo",
domain="business_overview",
step={
"step_id": "step_01",
"title": "Top year",
"depends_on": [],
"question_template": question,
},
step_index=1,
question_resolved=question,
analysis_context={},
turn_artifact={
"assistant_message": {
"reply_type": "partial_coverage",
"text": "2015 \u2014 \u0441\u0430\u043c\u044b\u0439 \u0434\u043e\u0445\u043e\u0434\u043d\u044b\u0439 \u0433\u043e\u0434 \u043f\u043e \u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434\u0435\u043d\u043d\u044b\u043c \u0432\u0445\u043e\u0434\u044f\u0449\u0438\u043c \u0434\u0435\u043d\u044c\u0433\u0430\u043c.\nservice: capability_id=business_overview_route_template_v1",
"message_id": "msg-1",
"trace_id": "trace-1",
},
"technical_debug_payload": {},
"session_summary": {},
},
entries=[],
)
review = step_state["business_first_review"]
self.assertTrue(review["direct_answer_first_ok"])
self.assertTrue(review["technical_garbage_present"])
self.assertIn("technical_garbage_in_answer", review["issue_codes"])
self.assertNotIn("business_direct_answer_missing", review["issue_codes"])
def test_truth_harness_promotes_business_review_issues_to_findings(self) -> None:
step_state = dcl.build_scenario_step_state(
scenario_id="business_surface_demo",
domain="business_overview",
step={
"step_id": "step_01",
"title": "Top year",
"depends_on": [],
"question_template": "какой у нас самый доходный год",
},
step_index=1,
question_resolved="какой у нас самый доходный год",
analysis_context={},
turn_artifact={
"assistant_message": {
"reply_type": "partial_coverage",
"text": "Коротко: Ограниченный бизнес-обзор по подтвержденным строкам 1С. " + ("лишний текст " * 220),
"message_id": "msg-1",
"trace_id": "trace-1",
},
"technical_debug_payload": {},
"session_summary": {},
},
entries=[],
)
reviewed = dth.evaluate_truth_step(
step={
"step_id": "step_01",
"question_template": "какой у нас самый доходный год",
"criticality": "critical",
"allowed_reply_types": [],
},
step_state=step_state,
step_results={},
bindings={},
runtime_bindings={},
)
codes = [item["code"] for item in reviewed["review_findings"]]
self.assertIn("business_review:business_direct_answer_missing", codes)
self.assertIn("business_review:answer_layering_noise", codes)
self.assertEqual(reviewed["review_status"], "fail")
if __name__ == "__main__":
unittest.main()