NODEDC_1C/scripts/test_domain_case_loop_step_...

976 lines
47 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
import sys
import unittest
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent))
import domain_case_loop as dcl
import domain_truth_harness as dth
class DomainCaseLoopStepStateTests(unittest.TestCase):
def test_preserves_mcp_catalog_alignment_debug_fields(self) -> None:
step_state = dcl.build_scenario_step_state(
scenario_id="planner_alignment_demo",
domain="planner_autonomy",
step={
"step_id": "step_01",
"title": "Alignment visibility",
"depends_on": [],
"question_template": "show planner alignment",
},
step_index=1,
question_resolved="show planner alignment",
analysis_context={},
turn_artifact={
"assistant_message": {
"reply_type": "factual",
"text": "Confirmed answer",
"message_id": "msg-1",
"trace_id": "trace-1",
},
"technical_debug_payload": {
"detected_mode": "address_query",
"detected_intent": "counterparty_turnover",
"selected_recipe": "counterparty_turnover_by_period",
"capability_id": "confirmed_counterparty_turnover",
"mcp_discovery_catalog_chain_alignment_status": "selected_matches_top",
"mcp_discovery_catalog_chain_top_match": "value_flow",
"mcp_discovery_catalog_chain_selected_matches_top": True,
"mcp_discovery_route_candidate_status": "needs_user_scope",
"mcp_discovery_route_candidate_fact_family": "value_flow",
"mcp_discovery_route_candidate_action_family": "turnover",
"mcp_discovery_route_candidate_missing_axes": ["organization"],
"mcp_discovery_route_candidate_provided_axes": ["period"],
"mcp_discovery_route_candidate_executable_now": False,
"mcp_discovery_route_candidate_enablement_reason": "Missing scope axes: organization",
"mcp_discovery_route_candidate_next_action": "Ask the user for the missing scope axes before MCP execution.",
},
"session_summary": {},
},
entries=[],
)
self.assertEqual(step_state["mcp_discovery_catalog_chain_alignment_status"], "selected_matches_top")
self.assertEqual(step_state["mcp_discovery_catalog_chain_top_match"], "value_flow")
self.assertTrue(step_state["mcp_discovery_catalog_chain_selected_matches_top"])
self.assertEqual(step_state["mcp_discovery_route_candidate_status"], "needs_user_scope")
self.assertEqual(step_state["mcp_discovery_route_candidate_missing_axes"], ["organization"])
self.assertEqual(step_state["mcp_discovery_route_candidate_provided_axes"], ["period"])
self.assertFalse(step_state["mcp_discovery_route_candidate_executable_now"])
def test_repair_targets_promote_route_candidate_enablement_gaps(self) -> None:
repair_targets = dcl.build_deterministic_repair_targets(
{"pack_id": "route_candidate_pack", "domain": "open_world", "final_status": "accepted"},
[
{
"scenario_id": "route_candidate_demo",
"title": "Route candidate demo",
"artifact_dir": "artifacts/domain_runs/route_candidate_demo",
"scenario_state": {
"step_outputs": {
"step_01": {
"status": "accepted",
"acceptance_status": "accepted",
"execution_status": "validated",
"question_resolved": "можно ли построить анализ качества поставщиков за 2020?",
"reply_type": "partial_coverage",
"mcp_discovery_selected_chain_id": "business_overview",
"mcp_discovery_route_candidate_status": "needs_route_enablement",
"mcp_discovery_route_candidate_fact_family": "supplier_quality",
"mcp_discovery_route_candidate_action_family": "risk_or_quality_analysis",
"mcp_discovery_route_candidate_missing_axes": [],
"mcp_discovery_route_candidate_executable_now": False,
"mcp_discovery_route_candidate_enablement_reason": "Reviewed supplier-quality route is not wired yet.",
"mcp_discovery_route_candidate_next_action": "Enable a reviewed supplier-quality route before claiming this fact.",
}
}
},
}
],
)
self.assertEqual(repair_targets["target_count"], 1)
self.assertEqual(repair_targets["severity_counts"]["P1"], 1)
target = repair_targets["targets"][0]
self.assertEqual(target["problem_type"], "route_candidate_enablement_gap")
self.assertEqual(target["target_source"], "route_candidate_enablement")
self.assertEqual(target["route_candidate"]["candidate_status"], "needs_route_enablement")
self.assertEqual(repair_targets["route_candidate_status_counts"], {"needs_route_enablement": 1})
self.assertEqual(repair_targets["route_candidate_groups"][0]["selected_chain_id"], "business_overview")
def test_lead_handoff_surfaces_route_candidate_groups(self) -> None:
repair_targets = dcl.build_deterministic_repair_targets(
{"pack_id": "route_candidate_pack", "domain": "open_world", "final_status": "accepted"},
[
{
"scenario_id": "route_candidate_demo",
"title": "Route candidate demo",
"artifact_dir": "artifacts/domain_runs/route_candidate_demo",
"scenario_state": {
"step_outputs": {
"step_01": {
"status": "accepted",
"acceptance_status": "accepted",
"execution_status": "validated",
"question_resolved": "можно ли построить анализ качества поставщиков за 2020?",
"reply_type": "partial_coverage",
"mcp_discovery_selected_chain_id": "business_overview",
"mcp_discovery_route_candidate_status": "needs_route_enablement",
"mcp_discovery_route_candidate_fact_family": "supplier_quality",
"mcp_discovery_route_candidate_action_family": "risk_or_quality_analysis",
"mcp_discovery_route_candidate_missing_axes": [],
"mcp_discovery_route_candidate_executable_now": False,
"mcp_discovery_route_candidate_enablement_reason": "Reviewed supplier-quality route is not wired yet.",
"mcp_discovery_route_candidate_next_action": "Enable a reviewed supplier-quality route before claiming this fact.",
}
}
},
}
],
)
handoff = dcl.build_lead_coder_handoff(
loop_state={"loop_id": "route_candidate_loop"},
iteration_id="iteration_00",
pack_dir=Path("artifacts/domain_runs/route_candidate_pack"),
analyst_verdict_path=Path("artifacts/domain_runs/route_candidate_pack/analyst_verdict.json"),
repair_targets_path=Path("artifacts/domain_runs/route_candidate_pack/repair_targets.json"),
business_audit_path=Path("artifacts/domain_runs/route_candidate_pack/business_audit.md"),
analyst_verdict={"quality_score": 75},
repair_targets=repair_targets,
target_score=88,
loop_decision="needs_exact_capability",
analyst_accepted_gate=False,
accepted_gate=False,
deterministic_gate_ok=False,
deterministic_gate_reason="repair_targets_remaining=P0:0,P1:1",
requires_user_decision=False,
user_decision_type="none",
user_decision_prompt=None,
)
markdown = dcl.build_lead_coder_handoff_markdown(handoff)
self.assertEqual(handoff["route_candidate_groups"][0]["candidate_status"], "needs_route_enablement")
self.assertEqual(handoff["route_candidate_enablement_targets"][0]["problem_type"], "route_candidate_enablement_gap")
self.assertIn("## Route Candidate Handoff Groups", markdown)
self.assertIn("route_candidate_demo:step_01", markdown)
def test_analysis_context_date_is_not_implicit_business_filter(self) -> None:
step_state = dcl.build_scenario_step_state(
scenario_id="stage_pack_demo",
domain="agentic_loop",
step={
"step_id": "step_01",
"title": "All-time summary",
"depends_on": [],
"question_template": "all-time money summary",
},
step_index=1,
question_resolved="all-time money summary",
analysis_context={"as_of_date": "2026-05-09", "source": "stage_pack"},
turn_artifact={
"assistant_message": {
"reply_type": "factual_with_explanation",
"text": "Short: all-time confirmed money summary.",
"message_id": "msg-1",
"trace_id": "trace-1",
},
"technical_debug_payload": {},
"session_summary": {},
},
entries=[],
)
self.assertNotIn("missing_required_filter", step_state["violated_invariants"])
self.assertNotIn("wrong_as_of_date", step_state["violated_invariants"])
def test_analysis_context_date_is_required_for_explicit_date_carryover(self) -> None:
step_state = dcl.build_scenario_step_state(
scenario_id="date_carryover_demo",
domain="inventory",
step={
"step_id": "step_01",
"title": "Date carryover",
"depends_on": [],
"question_template": "stock on that date",
"required_carryover_invariants": ["date_scope"],
},
step_index=1,
question_resolved="stock on that date",
analysis_context={"as_of_date": "2021-03-31"},
turn_artifact={
"assistant_message": {
"reply_type": "factual",
"text": "Short: stock confirmed.",
"message_id": "msg-1",
"trace_id": "trace-1",
},
"technical_debug_payload": {
"detected_mode": "address_query",
"detected_intent": "inventory_on_hand_as_of_date",
"selected_recipe": "address_inventory_on_hand_as_of_date_v1",
"capability_id": "confirmed_inventory_on_hand_as_of_date",
"capability_route_mode": "exact",
"fallback_type": "none",
"extracted_filters": {"as_of_date": "2020-03-31"},
},
"session_summary": {},
},
entries=[],
)
self.assertIn("wrong_as_of_date", step_state["violated_invariants"])
def test_temporal_reset_question_skips_carried_date_scope(self) -> None:
self.assertTrue(dcl.question_resets_temporal_scope("show money za all time"))
self.assertTrue(dcl.question_resets_temporal_scope("сколько всего денег за все доступное время"))
carried = dcl.carry_forward_analysis_context(
{
"semantic_memory": {
"date_scope": {
"as_of_date": "2020-12-31",
"period_from": "2020-10-01",
"period_to": "2020-12-31",
},
"organization_scope": {"label": "ООО Альтернатива Плюс"},
}
},
{},
prefer_carryover=True,
carry_date_scope=False,
)
self.assertNotIn("as_of_date", carried)
self.assertEqual(carried["organization_scope"], {"label": "ООО Альтернатива Плюс"})
def test_merge_scenario_date_scope_keeps_current_scope_over_stale_previous(self) -> None:
merged = dcl.merge_scenario_date_scope(
{
"as_of_date": "2020-12-31",
"period_from": "2020-10-01",
"period_to": "2020-12-31",
"source": "scenario_state_carryover",
},
{
"as_of_date": "2021-03-31",
"period_from": "2021-03-01",
"period_to": "2021-03-31",
"source": "current_turn",
},
depends_on=["previous_step"],
)
self.assertEqual(merged["as_of_date"], "2021-03-31")
self.assertEqual(merged["source"], "current_turn")
def test_mcp_business_overview_all_time_scope_overrides_stale_session_date(self) -> None:
step_state = dcl.build_scenario_step_state(
scenario_id="business_overview_demo",
domain="agentic_loop",
step={
"step_id": "step_01",
"title": "All-time money",
"depends_on": ["previous_step"],
"question_template": "all-time money summary",
"expected_intents": ["business_overview"],
},
step_index=1,
question_resolved="all-time money summary",
analysis_context={},
turn_artifact={
"assistant_message": {
"reply_type": "partial_coverage",
"text": "Short: all-time confirmed money summary.",
"message_id": "msg-1",
"trace_id": "trace-1",
},
"technical_debug_payload": {
"detected_mode": "address_query",
"detected_intent": "inventory_supplier_stock_overlap_as_of_date",
"selected_recipe": "address_inventory_supplier_stock_overlap_as_of_date_v1",
"capability_id": "inventory_inventory_supplier_stock_overlap_as_of_date",
"mcp_discovery_response_applied": True,
"mcp_discovery_selected_chain_id": "business_overview",
"mcp_discovery_catalog_chain_top_match": "business_overview",
"mcp_discovery_response_candidate_v1": {
"candidate_status": "ready_for_guarded_use",
"reply_type": "partial_coverage",
},
"assistant_mcp_discovery_entry_point_v1": {
"bridge": {
"pilot": {
"derived_business_overview": {
"period_scope": None,
}
}
}
},
},
"session_summary": {
"address_navigation_state": {
"session_context": {
"active_result_set_id": "rs-stale",
"date_scope": {
"as_of_date": "2020-12-31",
"period_from": "2020-10-01",
"period_to": "2020-12-31",
},
}
}
},
},
entries=[],
)
self.assertEqual(step_state["date_scope"]["scope"], "all_time")
self.assertIsNone(step_state["date_scope"]["as_of_date"])
self.assertEqual(step_state["active_result_set_id"], "mcp-discovery-msg-1")
self.assertNotIn("wrong_date_scope_state", step_state["violated_invariants"])
def test_applied_ready_mcp_discovery_chain_satisfies_expected_intent(self) -> None:
step_state = dcl.build_scenario_step_state(
scenario_id="business_overview_demo",
domain="agentic_loop",
step={
"step_id": "step_01",
"title": "Business overview",
"depends_on": [],
"question_template": "business overview for 2020",
"expected_intents": ["business_overview"],
},
step_index=1,
question_resolved="business overview for 2020",
analysis_context={},
turn_artifact={
"assistant_message": {
"reply_type": "partial_coverage",
"text": "Short: business overview from confirmed 1C rows.",
"message_id": "msg-1",
"trace_id": "trace-1",
},
"technical_debug_payload": {
"detected_mode": "address_query",
"detected_intent": "inventory_supplier_stock_overlap_as_of_date",
"selected_recipe": "address_inventory_supplier_stock_overlap_as_of_date_v1",
"capability_id": "inventory_inventory_supplier_stock_overlap_as_of_date",
"mcp_discovery_response_applied": True,
"mcp_discovery_selected_chain_id": "business_overview",
"mcp_discovery_catalog_chain_top_match": "business_overview",
"mcp_discovery_response_candidate_v1": {
"candidate_status": "ready_for_guarded_use",
"reply_type": "partial_coverage",
},
},
"session_summary": {},
},
entries=[],
)
self.assertEqual(step_state["mcp_discovery_effective_intents"], ["business_overview"])
self.assertNotIn("wrong_intent", step_state["violated_invariants"])
def test_ready_bounded_mcp_answer_can_validate_without_exact_route(self) -> None:
step_state = dcl.build_scenario_step_state(
scenario_id="business_overview_demo",
domain="agentic_loop",
step={
"step_id": "step_01",
"title": "Business overview",
"depends_on": [],
"question_template": "business overview for 2020",
"expected_intents": ["business_overview"],
"required_answer_shape": "direct_answer_first",
},
step_index=1,
question_resolved="business overview for 2020",
analysis_context={},
turn_artifact={
"assistant_message": {
"reply_type": "partial_coverage",
"text": "Short: confirmed bounded business overview from 1C rows.",
"message_id": "msg-1",
"trace_id": "trace-1",
},
"technical_debug_payload": {
"detected_mode": "address_query",
"detected_intent": "inventory_supplier_stock_overlap_as_of_date",
"selected_recipe": "address_inventory_supplier_stock_overlap_as_of_date_v1",
"capability_id": "inventory_inventory_supplier_stock_overlap_as_of_date",
"mcp_discovery_response_applied": True,
"mcp_discovery_selected_chain_id": "business_overview",
"mcp_discovery_catalog_chain_top_match": "business_overview",
"mcp_discovery_response_candidate_v1": {
"candidate_status": "ready_for_guarded_use",
"reply_type": "partial_coverage",
},
},
"session_summary": {},
},
entries=[],
)
self.assertEqual(step_state["execution_status"], "partial")
self.assertTrue(step_state["bounded_mcp_answer_validated"])
self.assertEqual(step_state["acceptance_status"], "validated")
def test_required_answer_patterns_block_generic_bounded_mcp_summary(self) -> None:
step_state = dcl.build_scenario_step_state(
scenario_id="summary_demo",
domain="agentic_loop",
step={
"step_id": "step_01",
"title": "Summary",
"depends_on": [],
"question_template": "summarize company and SVK separately",
"required_answer_shape": "direct_answer_first",
"required_answer_patterns_all": ["SVK", "company"],
},
step_index=1,
question_resolved="summarize company and SVK separately",
analysis_context={},
turn_artifact={
"assistant_message": {
"reply_type": "partial_coverage",
"text": "Short: company money summary only.",
"message_id": "msg-1",
"trace_id": "trace-1",
},
"technical_debug_payload": {
"mcp_discovery_response_applied": True,
"mcp_discovery_selected_chain_id": "business_overview",
"mcp_discovery_catalog_chain_top_match": "business_overview",
"mcp_discovery_response_candidate_v1": {
"candidate_status": "ready_for_guarded_use",
"reply_type": "partial_coverage",
},
},
"session_summary": {},
},
entries=[],
)
self.assertIn("required_answer_patterns_all_missing", step_state["violated_invariants"])
self.assertFalse(step_state["bounded_mcp_answer_validated"])
self.assertEqual(step_state["acceptance_status"], "rejected")
def test_memory_checkpoint_can_validate_honest_no_scope_answer(self) -> None:
step_state = dcl.build_scenario_step_state(
scenario_id="memory_demo",
domain="agentic_loop",
step={
"step_id": "step_01",
"title": "Memory checkpoint",
"depends_on": [],
"question_template": "is any company or counterparty selected in the current dialog?",
"semantic_tags": ["memory", "scope_guard"],
"required_answer_shape": "direct_answer_first",
},
step_index=1,
question_resolved="is any company or counterparty selected in the current dialog?",
analysis_context={},
turn_artifact={
"assistant_message": {
"reply_type": "partial_coverage",
"text": "В текущем диалоге не выбрана компания или контрагент; память не выдумываю.",
"message_id": "msg-1",
"trace_id": "trace-1",
},
"technical_debug_payload": {
"detected_mode": "address_query",
"detected_intent": "customer_revenue_and_payments",
"fallback_type": "no_rows",
},
"session_summary": {},
},
entries=[],
)
self.assertEqual(step_state["execution_status"], "partial")
self.assertTrue(step_state["memory_checkpoint_validated"])
self.assertEqual(step_state["acceptance_status"], "validated")
def test_deterministic_chat_memory_checkpoint_validates_without_exact_capability(self) -> None:
step_state = dcl.build_scenario_step_state(
scenario_id="memory_demo",
domain="agentic_loop",
step={
"step_id": "step_01",
"title": "Memory checkpoint",
"depends_on": [],
"question_template": "current dialog memory checkpoint",
"semantic_tags": ["memory", "scope_guard"],
"required_answer_shape": "direct_answer_first",
},
step_index=1,
question_resolved="current dialog memory checkpoint",
analysis_context={},
turn_artifact={
"assistant_message": {
"reply_type": "factual_with_explanation",
"text": (
"Коротко: в текущем диалоге я не вижу выбранной компании, контрагента или позиции. "
"Память про «Группа СВК» в этом диалоге не подтверждена."
),
"message_id": "msg-1",
"trace_id": "trace-1",
},
"technical_debug_payload": {
"detected_mode": "chat",
"fallback_type": "none",
"living_router_reason": "memory_recap_followup_detected",
"living_chat_response_source": "deterministic_memory_recap_contract",
},
"session_summary": {},
},
entries=[],
)
self.assertEqual(step_state["execution_status"], "partial")
self.assertTrue(step_state["memory_checkpoint_validated"])
self.assertEqual(step_state["acceptance_status"], "validated")
def test_confirmed_runtime_factual_answer_can_validate_without_exact_route_mode(self) -> None:
step_state = dcl.build_scenario_step_state(
scenario_id="runtime_factual_demo",
domain="agentic_loop",
step={
"step_id": "step_01",
"title": "Account 60 tails",
"depends_on": [],
"question_template": "show account 60 tails",
"required_answer_shape": "direct_answer_first",
},
step_index=1,
question_resolved="show account 60 tails",
analysis_context={},
turn_artifact={
"assistant_message": {
"reply_type": "factual",
"text": "Коротко: по счету 60 найдено 8 строк хвостов; контрагентов с сигналом: 6.",
"message_id": "msg-1",
"trace_id": "trace-1",
},
"technical_debug_payload": {
"detected_mode": "address_query",
"detected_intent": "open_items_by_counterparty_or_contract",
"selected_recipe": "address_open_items_by_party_or_contract_v1",
"capability_id": "address_open_items_by_counterparty_or_contract",
"capability_route_mode": "heuristic",
"fallback_type": "none",
"mcp_call_status": "matched_non_empty",
"response_type": "FACTUAL_LIST",
"result_mode": "confirmed_balance",
},
"session_summary": {},
},
entries=[],
)
self.assertEqual(step_state["execution_status"], "partial")
self.assertTrue(step_state["runtime_factual_answer_validated"])
self.assertEqual(step_state["acceptance_status"], "validated")
def test_exact_confirmed_document_followup_sets_runtime_factual_validation(self) -> None:
step_state = dcl.build_scenario_step_state(
scenario_id="svk_pivot",
domain="agentic_loop",
step={
"step_id": "s02_svk_docs",
"title": "Counterparty documents follow-up",
"depends_on": ["s01_svk_money"],
"question_template": "show documents by this chain",
"semantic_tags": ["counterparty", "documents", "scope_guard"],
"required_answer_shape": "direct_answer_first",
},
step_index=2,
question_resolved="show documents by this chain",
analysis_context={"as_of_date": "2026-05-09"},
turn_artifact={
"assistant_message": {
"reply_type": "factual",
"text": "Контрагент: Группа СВК. Найдено документов: 19.",
"message_id": "msg-1",
"trace_id": "trace-1",
},
"technical_debug_payload": {
"detected_mode": "address_query",
"detected_intent": "list_documents_by_counterparty",
"selected_recipe": "address_documents_by_counterparty_v1",
"capability_id": "documents_drilldown",
"capability_route_mode": "exact",
"fallback_type": "none",
"mcp_call_status": "matched_non_empty",
"response_type": "FACTUAL_LIST",
"truth_mode": "confirmed",
"answer_shape": "confirmed_factual",
"coverage_status": "full",
"evidence_grade": "strong",
"extracted_filters": {"counterparty": "Группа СВК", "as_of_date": "2026-05-09"},
"focus_object": {
"object_type": "counterparty",
"object_id": "counterparty:группа свк",
"label": "Группа СВК",
},
},
"session_summary": {},
},
entries=[{"item": "2021-11-10T12:00:07Z"}],
)
self.assertEqual(step_state["execution_status"], "exact")
self.assertTrue(step_state["runtime_factual_answer_validated"])
self.assertEqual(step_state["acceptance_status"], "validated")
def test_heuristic_open_items_guarded_insufficiency_validates_separately(self) -> None:
answer_text = (
"\u041a\u043e\u0440\u043e\u0442\u043a\u043e: \u0442\u043e\u0447\u043d\u044b\u0439 "
"\u043e\u0442\u043a\u0440\u044b\u0442\u044b\u0439 \u043e\u0441\u0442\u0430\u0442\u043e\u043a "
"\u043f\u043e \u0441\u0447\u0435\u0442\u0443 60 \u043d\u0435 "
"\u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434\u0435\u043d; \u043d\u0438\u0436\u0435 "
"\u0442\u043e\u043b\u044c\u043a\u043e \u043f\u0440\u0435\u0434\u0432\u0430\u0440\u0438\u0442\u0435\u043b\u044c\u043d\u044b\u0435 "
"\u0441\u0438\u0433\u043d\u0430\u043b\u044b \u043f\u043e \u0434\u0432\u0438\u0436\u0435\u043d\u0438\u044f\u043c: 8 "
"\u0441\u0442\u0440\u043e\u043a.\n"
"\u042d\u0442\u043e \u043d\u0435 \u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434\u0435\u043d\u043d\u043e\u0435 "
"\u0441\u0430\u043b\u044c\u0434\u043e: \u0442\u0435\u043a\u0443\u0449\u0438\u0439 "
"\u043a\u043e\u043d\u0442\u0443\u0440 \u0432\u0438\u0434\u0438\u0442 "
"\u0434\u0432\u0438\u0436\u0435\u043d\u0438\u044f-\u043a\u0430\u043d\u0434\u0438\u0434\u0430\u0442\u044b, "
"\u043d\u043e \u043d\u0435 \u0434\u043e\u043a\u0430\u0437\u044b\u0432\u0430\u0435\u0442 "
"\u043e\u0441\u0442\u0430\u0442\u043e\u043a."
)
step_state = dcl.build_scenario_step_state(
scenario_id="runtime_factual_demo",
domain="agentic_loop",
step={
"step_id": "step_01",
"title": "Account 60 limited tails",
"depends_on": [],
"question_template": "show account 60 tails; say if exact data is unavailable",
"required_answer_shape": "direct_answer_first",
},
step_index=1,
question_resolved="show account 60 tails; say if exact data is unavailable",
analysis_context={},
turn_artifact={
"assistant_message": {
"reply_type": "factual",
"text": answer_text,
"message_id": "msg-1",
"trace_id": "trace-1",
},
"technical_debug_payload": {
"detected_mode": "address_query",
"detected_intent": "open_items_by_counterparty_or_contract",
"selected_recipe": "address_open_items_by_party_or_contract_v1",
"capability_id": "address_open_items_by_counterparty_or_contract",
"capability_route_mode": "heuristic",
"fallback_type": "none",
"mcp_call_status": "matched_non_empty",
"response_type": "FACTUAL_LIST",
"result_mode": "heuristic_candidates",
"balance_confirmed": False,
"truth_mode": "limited",
"answer_shape": "limited_with_reason",
},
"session_summary": {},
},
entries=[],
)
self.assertEqual(step_state["execution_status"], "partial")
self.assertEqual(step_state["truth_mode"], "limited")
self.assertEqual(step_state["answer_shape"], "limited_with_reason")
self.assertFalse(step_state["runtime_factual_answer_validated"])
self.assertTrue(step_state["guarded_insufficiency_validated"])
self.assertEqual(step_state["acceptance_status"], "validated")
def test_heuristic_open_items_without_limitation_is_rejected(self) -> None:
step_state = dcl.build_scenario_step_state(
scenario_id="runtime_factual_demo",
domain="agentic_loop",
step={
"step_id": "step_01",
"title": "Account 60 unguarded tails",
"depends_on": [],
"question_template": "show account 60 tails",
"required_answer_shape": "direct_answer_first",
},
step_index=1,
question_resolved="show account 60 tails",
analysis_context={},
turn_artifact={
"assistant_message": {
"reply_type": "factual",
"text": "Short: account 60 has 8 open-item rows and 6 counterparties.",
"message_id": "msg-1",
"trace_id": "trace-1",
},
"technical_debug_payload": {
"detected_mode": "address_query",
"detected_intent": "open_items_by_counterparty_or_contract",
"selected_recipe": "address_open_items_by_party_or_contract_v1",
"capability_id": "address_open_items_by_counterparty_or_contract",
"capability_route_mode": "heuristic",
"fallback_type": "none",
"mcp_call_status": "matched_non_empty",
"response_type": "FACTUAL_LIST",
"result_mode": "heuristic_candidates",
"balance_confirmed": False,
"truth_mode": "limited",
"answer_shape": "limited_with_reason",
},
"session_summary": {},
},
entries=[],
)
self.assertEqual(step_state["execution_status"], "partial")
self.assertFalse(step_state["runtime_factual_answer_validated"])
self.assertFalse(step_state["guarded_insufficiency_validated"])
self.assertEqual(step_state["acceptance_status"], "rejected")
def test_truth_harness_warns_on_catalog_alignment_divergence(self) -> None:
reviewed = dth.evaluate_truth_step(
step={
"step_id": "step_01",
"question_template": "show planner alignment",
"criticality": "critical",
"allowed_reply_types": [],
},
step_state={
"question_resolved": "show planner alignment",
"reply_type": "factual",
"assistant_text": "Confirmed answer",
"actual_direct_answer": "Confirmed answer",
"detected_intent": "counterparty_turnover",
"selected_recipe": "counterparty_turnover_by_period",
"capability_id": "confirmed_counterparty_turnover",
"mcp_discovery_catalog_chain_alignment_status": "selected_outside_match_set",
"mcp_discovery_catalog_chain_top_match": "value_flow_comparison",
"mcp_discovery_catalog_chain_selected_matches_top": False,
"extracted_filters": {},
},
step_results={},
bindings={},
runtime_bindings={},
)
self.assertEqual(reviewed["review_status"], "warning")
self.assertEqual(reviewed["warning_findings_count"], 1)
self.assertEqual(reviewed["review_findings"][0]["code"], "catalog_alignment_divergence")
self.assertEqual(reviewed["review_findings"][0]["severity"], "warning")
def test_truth_harness_checks_expected_catalog_alignment_fields(self) -> None:
reviewed = dth.evaluate_truth_step(
step={
"step_id": "step_01",
"question_template": "show planner alignment",
"criticality": "critical",
"allowed_reply_types": [],
"expected_catalog_alignment_status": "selected_matches_top",
"expected_catalog_chain_top_match": "value_flow_comparison",
"expected_catalog_selected_matches_top": True,
},
step_state={
"question_resolved": "show planner alignment",
"reply_type": "factual",
"assistant_text": "Confirmed answer",
"actual_direct_answer": "Confirmed answer",
"detected_intent": "counterparty_turnover",
"selected_recipe": "counterparty_turnover_by_period",
"capability_id": "confirmed_counterparty_turnover",
"mcp_discovery_catalog_chain_alignment_status": "selected_matches_top",
"mcp_discovery_catalog_chain_top_match": "value_flow",
"mcp_discovery_catalog_chain_selected_matches_top": True,
"extracted_filters": {},
},
step_results={},
bindings={},
runtime_bindings={},
)
self.assertEqual(reviewed["review_status"], "fail")
self.assertEqual(reviewed["critical_findings_count"], 1)
self.assertEqual(reviewed["review_findings"][0]["code"], "wrong_catalog_chain_top_match")
def test_truth_harness_checks_expected_route_candidate_fields(self) -> None:
reviewed = dth.evaluate_truth_step(
step={
"step_id": "step_01",
"question_template": "show route candidate",
"criticality": "critical",
"allowed_reply_types": [],
"expected_route_candidate_status": "needs_user_scope",
"expected_route_candidate_executable_now": False,
"expected_route_candidate_missing_axes": ["organization", "period"],
},
step_state={
"question_resolved": "show route candidate",
"reply_type": "clarification_required",
"assistant_text": "Please choose organization.",
"actual_direct_answer": "Please choose organization.",
"detected_intent": "counterparty_turnover",
"selected_recipe": None,
"capability_id": None,
"mcp_discovery_route_candidate_status": "needs_user_scope",
"mcp_discovery_route_candidate_missing_axes": ["organization"],
"mcp_discovery_route_candidate_executable_now": False,
"extracted_filters": {},
},
step_results={},
bindings={},
runtime_bindings={},
)
self.assertEqual(reviewed["review_status"], "fail")
self.assertEqual(reviewed["critical_findings_count"], 1)
self.assertEqual(reviewed["review_findings"][0]["code"], "missing_route_candidate_axes")
def test_business_first_review_flags_dirty_direct_answer_surface(self) -> None:
step_state = dcl.build_scenario_step_state(
scenario_id="business_surface_demo",
domain="business_overview",
step={
"step_id": "step_01",
"title": "Top year",
"depends_on": [],
"question_template": "какой у нас самый доходный год",
},
step_index=1,
question_resolved="какой у нас самый доходный год",
analysis_context={},
turn_artifact={
"assistant_message": {
"reply_type": "partial_coverage",
"text": "Коротко: Ограниченный бизнес-обзор по подтвержденным строкам 1С. " + ("лишний текст " * 220),
"message_id": "msg-1",
"trace_id": "trace-1",
},
"technical_debug_payload": {},
"session_summary": {},
},
entries=[],
)
review = step_state["business_first_review"]
self.assertFalse(review["direct_answer_first_ok"])
self.assertFalse(review["business_usefulness_ok"])
self.assertIn("business_direct_answer_missing", review["issue_codes"])
self.assertIn("answer_layering_noise", review["issue_codes"])
self.assertIn("business_answer_too_verbose", review["issue_codes"])
self.assertIn("business_direct_answer_missing", step_state["violated_invariants"])
def test_business_first_review_accepts_compact_direct_answer_surface(self) -> None:
step_state = dcl.build_scenario_step_state(
scenario_id="business_surface_demo",
domain="business_overview",
step={
"step_id": "step_01",
"title": "Top year",
"depends_on": [],
"question_template": "какой у нас самый доходный год",
},
step_index=1,
question_resolved="какой у нас самый доходный год",
analysis_context={},
turn_artifact={
"assistant_message": {
"reply_type": "partial_coverage",
"text": "Коротко: самый доходный год в доступном денежном контуре 1С — 2015: 136 723 459,73 руб.\nМетод: считаю по подтвержденным входящим поступлениям.",
"message_id": "msg-1",
"trace_id": "trace-1",
},
"technical_debug_payload": {},
"session_summary": {},
},
entries=[],
)
review = step_state["business_first_review"]
self.assertTrue(review["direct_answer_first_ok"])
self.assertTrue(review["business_usefulness_ok"])
self.assertEqual(review["issue_codes"], [])
def test_business_first_review_separates_direct_answer_from_later_technical_leak(self) -> None:
question = "\u043a\u0430\u043a\u043e\u0439 \u0443 \u043d\u0430\u0441 \u0441\u0430\u043c\u044b\u0439 \u0434\u043e\u0445\u043e\u0434\u043d\u044b\u0439 \u0433\u043e\u0434"
step_state = dcl.build_scenario_step_state(
scenario_id="business_surface_demo",
domain="business_overview",
step={
"step_id": "step_01",
"title": "Top year",
"depends_on": [],
"question_template": question,
},
step_index=1,
question_resolved=question,
analysis_context={},
turn_artifact={
"assistant_message": {
"reply_type": "partial_coverage",
"text": "2015 \u2014 \u0441\u0430\u043c\u044b\u0439 \u0434\u043e\u0445\u043e\u0434\u043d\u044b\u0439 \u0433\u043e\u0434 \u043f\u043e \u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434\u0435\u043d\u043d\u044b\u043c \u0432\u0445\u043e\u0434\u044f\u0449\u0438\u043c \u0434\u0435\u043d\u044c\u0433\u0430\u043c.\nservice: capability_id=business_overview_route_template_v1",
"message_id": "msg-1",
"trace_id": "trace-1",
},
"technical_debug_payload": {},
"session_summary": {},
},
entries=[],
)
review = step_state["business_first_review"]
self.assertTrue(review["direct_answer_first_ok"])
self.assertTrue(review["technical_garbage_present"])
self.assertIn("technical_garbage_in_answer", review["issue_codes"])
self.assertNotIn("business_direct_answer_missing", review["issue_codes"])
def test_truth_harness_promotes_business_review_issues_to_findings(self) -> None:
step_state = dcl.build_scenario_step_state(
scenario_id="business_surface_demo",
domain="business_overview",
step={
"step_id": "step_01",
"title": "Top year",
"depends_on": [],
"question_template": "какой у нас самый доходный год",
},
step_index=1,
question_resolved="какой у нас самый доходный год",
analysis_context={},
turn_artifact={
"assistant_message": {
"reply_type": "partial_coverage",
"text": "Коротко: Ограниченный бизнес-обзор по подтвержденным строкам 1С. " + ("лишний текст " * 220),
"message_id": "msg-1",
"trace_id": "trace-1",
},
"technical_debug_payload": {},
"session_summary": {},
},
entries=[],
)
reviewed = dth.evaluate_truth_step(
step={
"step_id": "step_01",
"question_template": "какой у нас самый доходный год",
"criticality": "critical",
"allowed_reply_types": [],
},
step_state=step_state,
step_results={},
bindings={},
runtime_bindings={},
)
codes = [item["code"] for item in reviewed["review_findings"]]
self.assertIn("business_review:business_direct_answer_missing", codes)
self.assertIn("business_review:answer_layering_noise", codes)
self.assertEqual(reviewed["review_status"], "fail")
if __name__ == "__main__":
unittest.main()