from __future__ import annotations import sys import unittest from pathlib import Path sys.path.insert(0, str(Path(__file__).resolve().parent)) import domain_case_loop as dcl import domain_truth_harness as dth class DomainCaseLoopStepStateTests(unittest.TestCase): def test_preserves_mcp_catalog_alignment_debug_fields(self) -> None: step_state = dcl.build_scenario_step_state( scenario_id="planner_alignment_demo", domain="planner_autonomy", step={ "step_id": "step_01", "title": "Alignment visibility", "depends_on": [], "question_template": "show planner alignment", }, step_index=1, question_resolved="show planner alignment", analysis_context={}, turn_artifact={ "assistant_message": { "reply_type": "factual", "text": "Confirmed answer", "message_id": "msg-1", "trace_id": "trace-1", }, "technical_debug_payload": { "detected_mode": "address_query", "detected_intent": "counterparty_turnover", "selected_recipe": "counterparty_turnover_by_period", "capability_id": "confirmed_counterparty_turnover", "mcp_discovery_catalog_chain_alignment_status": "selected_matches_top", "mcp_discovery_catalog_chain_top_match": "value_flow", "mcp_discovery_catalog_chain_selected_matches_top": True, }, "session_summary": {}, }, entries=[], ) self.assertEqual(step_state["mcp_discovery_catalog_chain_alignment_status"], "selected_matches_top") self.assertEqual(step_state["mcp_discovery_catalog_chain_top_match"], "value_flow") self.assertTrue(step_state["mcp_discovery_catalog_chain_selected_matches_top"]) def test_analysis_context_date_is_not_implicit_business_filter(self) -> None: step_state = dcl.build_scenario_step_state( scenario_id="stage_pack_demo", domain="agentic_loop", step={ "step_id": "step_01", "title": "All-time summary", "depends_on": [], "question_template": "all-time money summary", }, step_index=1, question_resolved="all-time money summary", analysis_context={"as_of_date": "2026-05-09", "source": "stage_pack"}, turn_artifact={ "assistant_message": { "reply_type": "factual_with_explanation", "text": "Short: all-time confirmed money summary.", "message_id": "msg-1", "trace_id": "trace-1", }, "technical_debug_payload": {}, "session_summary": {}, }, entries=[], ) self.assertNotIn("missing_required_filter", step_state["violated_invariants"]) self.assertNotIn("wrong_as_of_date", step_state["violated_invariants"]) def test_analysis_context_date_is_required_for_explicit_date_carryover(self) -> None: step_state = dcl.build_scenario_step_state( scenario_id="date_carryover_demo", domain="inventory", step={ "step_id": "step_01", "title": "Date carryover", "depends_on": [], "question_template": "stock on that date", "required_carryover_invariants": ["date_scope"], }, step_index=1, question_resolved="stock on that date", analysis_context={"as_of_date": "2021-03-31"}, turn_artifact={ "assistant_message": { "reply_type": "factual", "text": "Short: stock confirmed.", "message_id": "msg-1", "trace_id": "trace-1", }, "technical_debug_payload": { "detected_mode": "address_query", "detected_intent": "inventory_on_hand_as_of_date", "selected_recipe": "address_inventory_on_hand_as_of_date_v1", "capability_id": "confirmed_inventory_on_hand_as_of_date", "capability_route_mode": "exact", "fallback_type": "none", "extracted_filters": {"as_of_date": "2020-03-31"}, }, "session_summary": {}, }, entries=[], ) self.assertIn("wrong_as_of_date", step_state["violated_invariants"]) def test_temporal_reset_question_skips_carried_date_scope(self) -> None: self.assertTrue(dcl.question_resets_temporal_scope("show money za all time")) self.assertTrue(dcl.question_resets_temporal_scope("сколько всего денег за все доступное время")) carried = dcl.carry_forward_analysis_context( { "semantic_memory": { "date_scope": { "as_of_date": "2020-12-31", "period_from": "2020-10-01", "period_to": "2020-12-31", }, "organization_scope": {"label": "ООО Альтернатива Плюс"}, } }, {}, prefer_carryover=True, carry_date_scope=False, ) self.assertNotIn("as_of_date", carried) self.assertEqual(carried["organization_scope"], {"label": "ООО Альтернатива Плюс"}) def test_merge_scenario_date_scope_keeps_current_scope_over_stale_previous(self) -> None: merged = dcl.merge_scenario_date_scope( { "as_of_date": "2020-12-31", "period_from": "2020-10-01", "period_to": "2020-12-31", "source": "scenario_state_carryover", }, { "as_of_date": "2021-03-31", "period_from": "2021-03-01", "period_to": "2021-03-31", "source": "current_turn", }, depends_on=["previous_step"], ) self.assertEqual(merged["as_of_date"], "2021-03-31") self.assertEqual(merged["source"], "current_turn") def test_mcp_business_overview_all_time_scope_overrides_stale_session_date(self) -> None: step_state = dcl.build_scenario_step_state( scenario_id="business_overview_demo", domain="agentic_loop", step={ "step_id": "step_01", "title": "All-time money", "depends_on": ["previous_step"], "question_template": "all-time money summary", "expected_intents": ["business_overview"], }, step_index=1, question_resolved="all-time money summary", analysis_context={}, turn_artifact={ "assistant_message": { "reply_type": "partial_coverage", "text": "Short: all-time confirmed money summary.", "message_id": "msg-1", "trace_id": "trace-1", }, "technical_debug_payload": { "detected_mode": "address_query", "detected_intent": "inventory_supplier_stock_overlap_as_of_date", "selected_recipe": "address_inventory_supplier_stock_overlap_as_of_date_v1", "capability_id": "inventory_inventory_supplier_stock_overlap_as_of_date", "mcp_discovery_response_applied": True, "mcp_discovery_selected_chain_id": "business_overview", "mcp_discovery_catalog_chain_top_match": "business_overview", "mcp_discovery_response_candidate_v1": { "candidate_status": "ready_for_guarded_use", "reply_type": "partial_coverage", }, "assistant_mcp_discovery_entry_point_v1": { "bridge": { "pilot": { "derived_business_overview": { "period_scope": None, } } } }, }, "session_summary": { "address_navigation_state": { "session_context": { "active_result_set_id": "rs-stale", "date_scope": { "as_of_date": "2020-12-31", "period_from": "2020-10-01", "period_to": "2020-12-31", }, } } }, }, entries=[], ) self.assertEqual(step_state["date_scope"]["scope"], "all_time") self.assertIsNone(step_state["date_scope"]["as_of_date"]) self.assertEqual(step_state["active_result_set_id"], "mcp-discovery-msg-1") self.assertNotIn("wrong_date_scope_state", step_state["violated_invariants"]) def test_applied_ready_mcp_discovery_chain_satisfies_expected_intent(self) -> None: step_state = dcl.build_scenario_step_state( scenario_id="business_overview_demo", domain="agentic_loop", step={ "step_id": "step_01", "title": "Business overview", "depends_on": [], "question_template": "business overview for 2020", "expected_intents": ["business_overview"], }, step_index=1, question_resolved="business overview for 2020", analysis_context={}, turn_artifact={ "assistant_message": { "reply_type": "partial_coverage", "text": "Short: business overview from confirmed 1C rows.", "message_id": "msg-1", "trace_id": "trace-1", }, "technical_debug_payload": { "detected_mode": "address_query", "detected_intent": "inventory_supplier_stock_overlap_as_of_date", "selected_recipe": "address_inventory_supplier_stock_overlap_as_of_date_v1", "capability_id": "inventory_inventory_supplier_stock_overlap_as_of_date", "mcp_discovery_response_applied": True, "mcp_discovery_selected_chain_id": "business_overview", "mcp_discovery_catalog_chain_top_match": "business_overview", "mcp_discovery_response_candidate_v1": { "candidate_status": "ready_for_guarded_use", "reply_type": "partial_coverage", }, }, "session_summary": {}, }, entries=[], ) self.assertEqual(step_state["mcp_discovery_effective_intents"], ["business_overview"]) self.assertNotIn("wrong_intent", step_state["violated_invariants"]) def test_ready_bounded_mcp_answer_can_validate_without_exact_route(self) -> None: step_state = dcl.build_scenario_step_state( scenario_id="business_overview_demo", domain="agentic_loop", step={ "step_id": "step_01", "title": "Business overview", "depends_on": [], "question_template": "business overview for 2020", "expected_intents": ["business_overview"], "required_answer_shape": "direct_answer_first", }, step_index=1, question_resolved="business overview for 2020", analysis_context={}, turn_artifact={ "assistant_message": { "reply_type": "partial_coverage", "text": "Short: confirmed bounded business overview from 1C rows.", "message_id": "msg-1", "trace_id": "trace-1", }, "technical_debug_payload": { "detected_mode": "address_query", "detected_intent": "inventory_supplier_stock_overlap_as_of_date", "selected_recipe": "address_inventory_supplier_stock_overlap_as_of_date_v1", "capability_id": "inventory_inventory_supplier_stock_overlap_as_of_date", "mcp_discovery_response_applied": True, "mcp_discovery_selected_chain_id": "business_overview", "mcp_discovery_catalog_chain_top_match": "business_overview", "mcp_discovery_response_candidate_v1": { "candidate_status": "ready_for_guarded_use", "reply_type": "partial_coverage", }, }, "session_summary": {}, }, entries=[], ) self.assertEqual(step_state["execution_status"], "partial") self.assertTrue(step_state["bounded_mcp_answer_validated"]) self.assertEqual(step_state["acceptance_status"], "validated") def test_required_answer_patterns_block_generic_bounded_mcp_summary(self) -> None: step_state = dcl.build_scenario_step_state( scenario_id="summary_demo", domain="agentic_loop", step={ "step_id": "step_01", "title": "Summary", "depends_on": [], "question_template": "summarize company and SVK separately", "required_answer_shape": "direct_answer_first", "required_answer_patterns_all": ["SVK", "company"], }, step_index=1, question_resolved="summarize company and SVK separately", analysis_context={}, turn_artifact={ "assistant_message": { "reply_type": "partial_coverage", "text": "Short: company money summary only.", "message_id": "msg-1", "trace_id": "trace-1", }, "technical_debug_payload": { "mcp_discovery_response_applied": True, "mcp_discovery_selected_chain_id": "business_overview", "mcp_discovery_catalog_chain_top_match": "business_overview", "mcp_discovery_response_candidate_v1": { "candidate_status": "ready_for_guarded_use", "reply_type": "partial_coverage", }, }, "session_summary": {}, }, entries=[], ) self.assertIn("required_answer_patterns_all_missing", step_state["violated_invariants"]) self.assertFalse(step_state["bounded_mcp_answer_validated"]) self.assertEqual(step_state["acceptance_status"], "rejected") def test_memory_checkpoint_can_validate_honest_no_scope_answer(self) -> None: step_state = dcl.build_scenario_step_state( scenario_id="memory_demo", domain="agentic_loop", step={ "step_id": "step_01", "title": "Memory checkpoint", "depends_on": [], "question_template": "is any company or counterparty selected in the current dialog?", "semantic_tags": ["memory", "scope_guard"], "required_answer_shape": "direct_answer_first", }, step_index=1, question_resolved="is any company or counterparty selected in the current dialog?", analysis_context={}, turn_artifact={ "assistant_message": { "reply_type": "partial_coverage", "text": "В текущем диалоге не выбрана компания или контрагент; память не выдумываю.", "message_id": "msg-1", "trace_id": "trace-1", }, "technical_debug_payload": { "detected_mode": "address_query", "detected_intent": "customer_revenue_and_payments", "fallback_type": "no_rows", }, "session_summary": {}, }, entries=[], ) self.assertEqual(step_state["execution_status"], "partial") self.assertTrue(step_state["memory_checkpoint_validated"]) self.assertEqual(step_state["acceptance_status"], "validated") def test_deterministic_chat_memory_checkpoint_validates_without_exact_capability(self) -> None: step_state = dcl.build_scenario_step_state( scenario_id="memory_demo", domain="agentic_loop", step={ "step_id": "step_01", "title": "Memory checkpoint", "depends_on": [], "question_template": "current dialog memory checkpoint", "semantic_tags": ["memory", "scope_guard"], "required_answer_shape": "direct_answer_first", }, step_index=1, question_resolved="current dialog memory checkpoint", analysis_context={}, turn_artifact={ "assistant_message": { "reply_type": "factual_with_explanation", "text": ( "Коротко: в текущем диалоге я не вижу выбранной компании, контрагента или позиции. " "Память про «Группа СВК» в этом диалоге не подтверждена." ), "message_id": "msg-1", "trace_id": "trace-1", }, "technical_debug_payload": { "detected_mode": "chat", "fallback_type": "none", "living_router_reason": "memory_recap_followup_detected", "living_chat_response_source": "deterministic_memory_recap_contract", }, "session_summary": {}, }, entries=[], ) self.assertEqual(step_state["execution_status"], "partial") self.assertTrue(step_state["memory_checkpoint_validated"]) self.assertEqual(step_state["acceptance_status"], "validated") def test_confirmed_runtime_factual_answer_can_validate_without_exact_route_mode(self) -> None: step_state = dcl.build_scenario_step_state( scenario_id="runtime_factual_demo", domain="agentic_loop", step={ "step_id": "step_01", "title": "Account 60 tails", "depends_on": [], "question_template": "show account 60 tails", "required_answer_shape": "direct_answer_first", }, step_index=1, question_resolved="show account 60 tails", analysis_context={}, turn_artifact={ "assistant_message": { "reply_type": "factual", "text": "Коротко: по счету 60 найдено 8 строк хвостов; контрагентов с сигналом: 6.", "message_id": "msg-1", "trace_id": "trace-1", }, "technical_debug_payload": { "detected_mode": "address_query", "detected_intent": "open_items_by_counterparty_or_contract", "selected_recipe": "address_open_items_by_party_or_contract_v1", "capability_id": "address_open_items_by_counterparty_or_contract", "capability_route_mode": "heuristic", "fallback_type": "none", "mcp_call_status": "matched_non_empty", "response_type": "FACTUAL_LIST", "result_mode": "confirmed_balance", }, "session_summary": {}, }, entries=[], ) self.assertEqual(step_state["execution_status"], "partial") self.assertTrue(step_state["runtime_factual_answer_validated"]) self.assertEqual(step_state["acceptance_status"], "validated") def test_exact_confirmed_document_followup_sets_runtime_factual_validation(self) -> None: step_state = dcl.build_scenario_step_state( scenario_id="svk_pivot", domain="agentic_loop", step={ "step_id": "s02_svk_docs", "title": "Counterparty documents follow-up", "depends_on": ["s01_svk_money"], "question_template": "show documents by this chain", "semantic_tags": ["counterparty", "documents", "scope_guard"], "required_answer_shape": "direct_answer_first", }, step_index=2, question_resolved="show documents by this chain", analysis_context={"as_of_date": "2026-05-09"}, turn_artifact={ "assistant_message": { "reply_type": "factual", "text": "Контрагент: Группа СВК. Найдено документов: 19.", "message_id": "msg-1", "trace_id": "trace-1", }, "technical_debug_payload": { "detected_mode": "address_query", "detected_intent": "list_documents_by_counterparty", "selected_recipe": "address_documents_by_counterparty_v1", "capability_id": "documents_drilldown", "capability_route_mode": "exact", "fallback_type": "none", "mcp_call_status": "matched_non_empty", "response_type": "FACTUAL_LIST", "truth_mode": "confirmed", "answer_shape": "confirmed_factual", "coverage_status": "full", "evidence_grade": "strong", "extracted_filters": {"counterparty": "Группа СВК", "as_of_date": "2026-05-09"}, "focus_object": { "object_type": "counterparty", "object_id": "counterparty:группа свк", "label": "Группа СВК", }, }, "session_summary": {}, }, entries=[{"item": "2021-11-10T12:00:07Z"}], ) self.assertEqual(step_state["execution_status"], "exact") self.assertTrue(step_state["runtime_factual_answer_validated"]) self.assertEqual(step_state["acceptance_status"], "validated") def test_heuristic_open_items_guarded_insufficiency_validates_separately(self) -> None: answer_text = ( "\u041a\u043e\u0440\u043e\u0442\u043a\u043e: \u0442\u043e\u0447\u043d\u044b\u0439 " "\u043e\u0442\u043a\u0440\u044b\u0442\u044b\u0439 \u043e\u0441\u0442\u0430\u0442\u043e\u043a " "\u043f\u043e \u0441\u0447\u0435\u0442\u0443 60 \u043d\u0435 " "\u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434\u0435\u043d; \u043d\u0438\u0436\u0435 " "\u0442\u043e\u043b\u044c\u043a\u043e \u043f\u0440\u0435\u0434\u0432\u0430\u0440\u0438\u0442\u0435\u043b\u044c\u043d\u044b\u0435 " "\u0441\u0438\u0433\u043d\u0430\u043b\u044b \u043f\u043e \u0434\u0432\u0438\u0436\u0435\u043d\u0438\u044f\u043c: 8 " "\u0441\u0442\u0440\u043e\u043a.\n" "\u042d\u0442\u043e \u043d\u0435 \u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434\u0435\u043d\u043d\u043e\u0435 " "\u0441\u0430\u043b\u044c\u0434\u043e: \u0442\u0435\u043a\u0443\u0449\u0438\u0439 " "\u043a\u043e\u043d\u0442\u0443\u0440 \u0432\u0438\u0434\u0438\u0442 " "\u0434\u0432\u0438\u0436\u0435\u043d\u0438\u044f-\u043a\u0430\u043d\u0434\u0438\u0434\u0430\u0442\u044b, " "\u043d\u043e \u043d\u0435 \u0434\u043e\u043a\u0430\u0437\u044b\u0432\u0430\u0435\u0442 " "\u043e\u0441\u0442\u0430\u0442\u043e\u043a." ) step_state = dcl.build_scenario_step_state( scenario_id="runtime_factual_demo", domain="agentic_loop", step={ "step_id": "step_01", "title": "Account 60 limited tails", "depends_on": [], "question_template": "show account 60 tails; say if exact data is unavailable", "required_answer_shape": "direct_answer_first", }, step_index=1, question_resolved="show account 60 tails; say if exact data is unavailable", analysis_context={}, turn_artifact={ "assistant_message": { "reply_type": "factual", "text": answer_text, "message_id": "msg-1", "trace_id": "trace-1", }, "technical_debug_payload": { "detected_mode": "address_query", "detected_intent": "open_items_by_counterparty_or_contract", "selected_recipe": "address_open_items_by_party_or_contract_v1", "capability_id": "address_open_items_by_counterparty_or_contract", "capability_route_mode": "heuristic", "fallback_type": "none", "mcp_call_status": "matched_non_empty", "response_type": "FACTUAL_LIST", "result_mode": "heuristic_candidates", "balance_confirmed": False, "truth_mode": "limited", "answer_shape": "limited_with_reason", }, "session_summary": {}, }, entries=[], ) self.assertEqual(step_state["execution_status"], "partial") self.assertEqual(step_state["truth_mode"], "limited") self.assertEqual(step_state["answer_shape"], "limited_with_reason") self.assertFalse(step_state["runtime_factual_answer_validated"]) self.assertTrue(step_state["guarded_insufficiency_validated"]) self.assertEqual(step_state["acceptance_status"], "validated") def test_heuristic_open_items_without_limitation_is_rejected(self) -> None: step_state = dcl.build_scenario_step_state( scenario_id="runtime_factual_demo", domain="agentic_loop", step={ "step_id": "step_01", "title": "Account 60 unguarded tails", "depends_on": [], "question_template": "show account 60 tails", "required_answer_shape": "direct_answer_first", }, step_index=1, question_resolved="show account 60 tails", analysis_context={}, turn_artifact={ "assistant_message": { "reply_type": "factual", "text": "Short: account 60 has 8 open-item rows and 6 counterparties.", "message_id": "msg-1", "trace_id": "trace-1", }, "technical_debug_payload": { "detected_mode": "address_query", "detected_intent": "open_items_by_counterparty_or_contract", "selected_recipe": "address_open_items_by_party_or_contract_v1", "capability_id": "address_open_items_by_counterparty_or_contract", "capability_route_mode": "heuristic", "fallback_type": "none", "mcp_call_status": "matched_non_empty", "response_type": "FACTUAL_LIST", "result_mode": "heuristic_candidates", "balance_confirmed": False, "truth_mode": "limited", "answer_shape": "limited_with_reason", }, "session_summary": {}, }, entries=[], ) self.assertEqual(step_state["execution_status"], "partial") self.assertFalse(step_state["runtime_factual_answer_validated"]) self.assertFalse(step_state["guarded_insufficiency_validated"]) self.assertEqual(step_state["acceptance_status"], "rejected") def test_truth_harness_warns_on_catalog_alignment_divergence(self) -> None: reviewed = dth.evaluate_truth_step( step={ "step_id": "step_01", "question_template": "show planner alignment", "criticality": "critical", "allowed_reply_types": [], }, step_state={ "question_resolved": "show planner alignment", "reply_type": "factual", "assistant_text": "Confirmed answer", "actual_direct_answer": "Confirmed answer", "detected_intent": "counterparty_turnover", "selected_recipe": "counterparty_turnover_by_period", "capability_id": "confirmed_counterparty_turnover", "mcp_discovery_catalog_chain_alignment_status": "selected_outside_match_set", "mcp_discovery_catalog_chain_top_match": "value_flow_comparison", "mcp_discovery_catalog_chain_selected_matches_top": False, "extracted_filters": {}, }, step_results={}, bindings={}, runtime_bindings={}, ) self.assertEqual(reviewed["review_status"], "warning") self.assertEqual(reviewed["warning_findings_count"], 1) self.assertEqual(reviewed["review_findings"][0]["code"], "catalog_alignment_divergence") self.assertEqual(reviewed["review_findings"][0]["severity"], "warning") def test_truth_harness_checks_expected_catalog_alignment_fields(self) -> None: reviewed = dth.evaluate_truth_step( step={ "step_id": "step_01", "question_template": "show planner alignment", "criticality": "critical", "allowed_reply_types": [], "expected_catalog_alignment_status": "selected_matches_top", "expected_catalog_chain_top_match": "value_flow_comparison", "expected_catalog_selected_matches_top": True, }, step_state={ "question_resolved": "show planner alignment", "reply_type": "factual", "assistant_text": "Confirmed answer", "actual_direct_answer": "Confirmed answer", "detected_intent": "counterparty_turnover", "selected_recipe": "counterparty_turnover_by_period", "capability_id": "confirmed_counterparty_turnover", "mcp_discovery_catalog_chain_alignment_status": "selected_matches_top", "mcp_discovery_catalog_chain_top_match": "value_flow", "mcp_discovery_catalog_chain_selected_matches_top": True, "extracted_filters": {}, }, step_results={}, bindings={}, runtime_bindings={}, ) self.assertEqual(reviewed["review_status"], "fail") self.assertEqual(reviewed["critical_findings_count"], 1) self.assertEqual(reviewed["review_findings"][0]["code"], "wrong_catalog_chain_top_match") def test_business_first_review_flags_dirty_direct_answer_surface(self) -> None: step_state = dcl.build_scenario_step_state( scenario_id="business_surface_demo", domain="business_overview", step={ "step_id": "step_01", "title": "Top year", "depends_on": [], "question_template": "какой у нас самый доходный год", }, step_index=1, question_resolved="какой у нас самый доходный год", analysis_context={}, turn_artifact={ "assistant_message": { "reply_type": "partial_coverage", "text": "Коротко: Ограниченный бизнес-обзор по подтвержденным строкам 1С. " + ("лишний текст " * 220), "message_id": "msg-1", "trace_id": "trace-1", }, "technical_debug_payload": {}, "session_summary": {}, }, entries=[], ) review = step_state["business_first_review"] self.assertFalse(review["direct_answer_first_ok"]) self.assertFalse(review["business_usefulness_ok"]) self.assertIn("business_direct_answer_missing", review["issue_codes"]) self.assertIn("answer_layering_noise", review["issue_codes"]) self.assertIn("business_answer_too_verbose", review["issue_codes"]) self.assertIn("business_direct_answer_missing", step_state["violated_invariants"]) def test_business_first_review_accepts_compact_direct_answer_surface(self) -> None: step_state = dcl.build_scenario_step_state( scenario_id="business_surface_demo", domain="business_overview", step={ "step_id": "step_01", "title": "Top year", "depends_on": [], "question_template": "какой у нас самый доходный год", }, step_index=1, question_resolved="какой у нас самый доходный год", analysis_context={}, turn_artifact={ "assistant_message": { "reply_type": "partial_coverage", "text": "Коротко: самый доходный год в доступном денежном контуре 1С — 2015: 136 723 459,73 руб.\nМетод: считаю по подтвержденным входящим поступлениям.", "message_id": "msg-1", "trace_id": "trace-1", }, "technical_debug_payload": {}, "session_summary": {}, }, entries=[], ) review = step_state["business_first_review"] self.assertTrue(review["direct_answer_first_ok"]) self.assertTrue(review["business_usefulness_ok"]) self.assertEqual(review["issue_codes"], []) def test_business_first_review_separates_direct_answer_from_later_technical_leak(self) -> None: question = "\u043a\u0430\u043a\u043e\u0439 \u0443 \u043d\u0430\u0441 \u0441\u0430\u043c\u044b\u0439 \u0434\u043e\u0445\u043e\u0434\u043d\u044b\u0439 \u0433\u043e\u0434" step_state = dcl.build_scenario_step_state( scenario_id="business_surface_demo", domain="business_overview", step={ "step_id": "step_01", "title": "Top year", "depends_on": [], "question_template": question, }, step_index=1, question_resolved=question, analysis_context={}, turn_artifact={ "assistant_message": { "reply_type": "partial_coverage", "text": "2015 \u2014 \u0441\u0430\u043c\u044b\u0439 \u0434\u043e\u0445\u043e\u0434\u043d\u044b\u0439 \u0433\u043e\u0434 \u043f\u043e \u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434\u0435\u043d\u043d\u044b\u043c \u0432\u0445\u043e\u0434\u044f\u0449\u0438\u043c \u0434\u0435\u043d\u044c\u0433\u0430\u043c.\nservice: capability_id=business_overview_route_template_v1", "message_id": "msg-1", "trace_id": "trace-1", }, "technical_debug_payload": {}, "session_summary": {}, }, entries=[], ) review = step_state["business_first_review"] self.assertTrue(review["direct_answer_first_ok"]) self.assertTrue(review["technical_garbage_present"]) self.assertIn("technical_garbage_in_answer", review["issue_codes"]) self.assertNotIn("business_direct_answer_missing", review["issue_codes"]) def test_truth_harness_promotes_business_review_issues_to_findings(self) -> None: step_state = dcl.build_scenario_step_state( scenario_id="business_surface_demo", domain="business_overview", step={ "step_id": "step_01", "title": "Top year", "depends_on": [], "question_template": "какой у нас самый доходный год", }, step_index=1, question_resolved="какой у нас самый доходный год", analysis_context={}, turn_artifact={ "assistant_message": { "reply_type": "partial_coverage", "text": "Коротко: Ограниченный бизнес-обзор по подтвержденным строкам 1С. " + ("лишний текст " * 220), "message_id": "msg-1", "trace_id": "trace-1", }, "technical_debug_payload": {}, "session_summary": {}, }, entries=[], ) reviewed = dth.evaluate_truth_step( step={ "step_id": "step_01", "question_template": "какой у нас самый доходный год", "criticality": "critical", "allowed_reply_types": [], }, step_state=step_state, step_results={}, bindings={}, runtime_bindings={}, ) codes = [item["code"] for item in reviewed["review_findings"]] self.assertIn("business_review:business_direct_answer_missing", codes) self.assertIn("business_review:answer_layering_noise", codes) self.assertEqual(reviewed["review_status"], "fail") if __name__ == "__main__": unittest.main()