from __future__ import annotations import json import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).resolve().parents[1])) from scripts.domain_case_loop import ( build_deterministic_repair_targets, build_scenario_acceptance_matrix, carry_forward_analysis_context, derive_pack_final_status, evaluate_analyst_gate, evaluate_deterministic_loop_gate, load_scenario_pack, merge_scenario_date_scope, validate_step_contract, ) def test_carry_forward_analysis_context_preserves_followup_anchor() -> None: scenario_state = { "semantic_memory": { "date_scope": {"as_of_date": "2020-03-31"}, } } analysis_context = {"as_of_date": "2026-04-13", "source": "current_analysis"} carried = carry_forward_analysis_context(scenario_state, analysis_context) assert carried["as_of_date"] == "2026-04-13" assert carried["source"] == "current_analysis" def test_carry_forward_analysis_context_fills_missing_anchor() -> None: scenario_state = { "semantic_memory": { "date_scope": {"as_of_date": "2020-03-31"}, } } carried = carry_forward_analysis_context(scenario_state, {}) assert carried["as_of_date"] == "2020-03-31" assert carried["source"] == "scenario_state_carryover" def test_merge_scenario_date_scope_preserves_historical_anchor_on_followup() -> None: previous_date_scope = {"as_of_date": "2020-03-31", "source": "exact_anchor"} current_date_scope = {"as_of_date": "2026-04-13", "source": "current_analysis"} merged = merge_scenario_date_scope( previous_date_scope, current_date_scope, depends_on=["step_01_anchor"], ) assert merged["as_of_date"] == "2020-03-31" assert merged["source"] == "current_analysis" def test_load_scenario_pack_accepts_active_domain_contract(tmp_path) -> None: manifest_path = tmp_path / "active_domain_contract.json" manifest_path.write_text( json.dumps( { "schema_version": "active_domain_contract_v1", "status": "active", "domain_id": "inventory_stock_supplier_provenance", "runtime_domain": "inventory_stock", "title": "Warehouse domain", "default_analysis_context": {"as_of_date": "2021-09-30"}, "observed_anchors": { "warehouse": "Основной склад", "organization": "ООО \\Альтернатива Плюс\\", "historical_as_of_date": "2019-03-31", "current_as_of_date_example": "2021-09-30", "focus_item_historical": "Столешница 600*3050*26 дуб ниагара", }, "question_pool": { "questions": [ {"question_id": "Q01", "node_id": "N01_stock_snapshot", "text": "Q1"}, {"question_id": "Q19", "node_id": "N03_selected_item_supplier", "text": "Q19"}, ] }, "scenario_tree": { "critical_edges": [ { "edge_id": "E01_snapshot_to_selected_item_supplier", "from_node": "N01_stock_snapshot", "to_node": "N03_selected_item_supplier", "primary_user_path": True, } ] }, "orchestration_pack": { "pack_id": "inventory_active_contract_smoke", "scenarios": [ { "scenario_id": "inventory_selected_item_provenance", "title": "Selected item provenance", "question_ids": ["Q01", "Q19"], "steps": [ { "step_id": "step_01_snapshot", "question_id": "Q01", "node_id": "N01_stock_snapshot", "question": "Какие товары сейчас лежат на складе", }, { "step_id": "step_02_supplier", "question_id": "Q19", "node_id": "N03_selected_item_supplier", "question": "По выбранному объекту \"Столешница 600*3050*26 дуб ниагара\": кто это поставил нам", }, ], } ], }, }, ensure_ascii=False, indent=2, ) + "\n", encoding="utf-8", ) pack = load_scenario_pack(manifest_path) assert pack["schema_version"] == "domain_scenario_pack_v1" assert pack["source_schema_version"] == "active_domain_contract_v1" assert pack["domain"] == "inventory_stock" assert pack["bindings"]["observed_warehouse"] == "Основной склад" assert pack["bindings"]["focus_item_historical"] == "Столешница 600*3050*26 дуб ниагара" assert pack["scenarios"][0]["question_ids"] == ["Q01", "Q19"] assert pack["scenarios"][0]["steps"][1]["question_id"] == "Q19" def test_load_scenario_pack_enriches_step_with_node_contract_defaults(tmp_path) -> None: manifest_path = tmp_path / "active_domain_contract.json" manifest_path.write_text( json.dumps( { "schema_version": "active_domain_contract_v1", "status": "active", "domain_id": "inventory_stock_supplier_provenance", "runtime_domain": "inventory_stock", "title": "Warehouse domain", "question_pool": { "questions": [ {"question_id": "Q19", "node_id": "N03_selected_item_supplier", "text": "Q19"}, ] }, "scenario_tree": { "critical_nodes": [ { "node_id": "N03_selected_item_supplier", "expected_intents": ["inventory_purchase_provenance_for_item"], "expected_answer_shape": "direct_supplier_answer_first_then_evidence", "required_carryover_invariants": ["focus_object", "date_scope"], } ] }, "orchestration_pack": { "pack_id": "inventory_active_contract_smoke", "scenarios": [ { "scenario_id": "inventory_selected_item_provenance", "title": "Selected item provenance", "steps": [ { "step_id": "step_02_supplier", "question_id": "Q19", "node_id": "N03_selected_item_supplier", "question": "По выбранному объекту \"...\": кто это поставил нам", } ], } ], }, }, ensure_ascii=False, indent=2, ) + "\n", encoding="utf-8", ) pack = load_scenario_pack(manifest_path) step = pack["scenarios"][0]["steps"][0] assert step["expected_intents"] == ["inventory_purchase_provenance_for_item"] assert step["required_answer_shape"] == "direct_supplier_answer_first_then_evidence" assert "focus_object" in step["required_carryover_invariants"] assert "date_scope" in step["required_carryover_invariants"] def test_build_scenario_acceptance_matrix_marks_green_edge_when_covering_scenario_is_accepted() -> None: pack = { "pack_id": "inventory_active_contract_smoke", "domain": "inventory_stock", "source_contract": {"domain_id": "inventory_stock_supplier_provenance", "title": "Warehouse domain"}, "question_pool": { "questions": [ {"question_id": "Q01", "node_id": "N01_stock_snapshot"}, {"question_id": "Q19", "node_id": "N03_selected_item_supplier"}, ] }, "scenario_tree": { "critical_nodes": [ { "node_id": "N03_selected_item_supplier", "covers_question_ids": ["Q19"], "required_wording_families": ["canonical"], } ], "critical_edges": [ { "edge_id": "E01_snapshot_to_selected_item_supplier", "from_node": "N01_stock_snapshot", "to_node": "N03_selected_item_supplier", "primary_user_path": True, } ], "primary_user_paths": [ {"path_id": "P01_snapshot_to_supplier", "nodes": ["N01_stock_snapshot", "N03_selected_item_supplier"]} ], }, "scenarios": [ { "scenario_id": "inventory_selected_item_provenance", "question_ids": ["Q01", "Q19"], "steps": [ { "step_id": "step_01_snapshot", "question_id": "Q01", "node_id": "N01_stock_snapshot", "paraphrase_family": "canonical", }, { "step_id": "step_02_supplier", "question_id": "Q19", "node_id": "N03_selected_item_supplier", "paraphrase_family": "canonical", }, ], } ], } scenario_results = [ { "scenario_id": "inventory_selected_item_provenance", "final_status": "accepted", "session_id": "asst-demo", "artifact_dir": "artifacts/domain_runs/demo", } ] matrix = build_scenario_acceptance_matrix(pack, scenario_results) assert "E01_snapshot_to_selected_item_supplier" in matrix assert "| E01_snapshot_to_selected_item_supplier | green |" in matrix assert "| P01_snapshot_to_supplier | green |" in matrix def test_build_scenario_acceptance_matrix_marks_partial_when_wording_family_is_missing() -> None: pack = { "pack_id": "inventory_active_contract_smoke", "domain": "inventory_stock", "source_contract": {"domain_id": "inventory_stock_supplier_provenance", "title": "Warehouse domain"}, "question_pool": { "questions": [ {"question_id": "Q19", "node_id": "N03_selected_item_supplier"}, ] }, "scenario_tree": { "critical_nodes": [ { "node_id": "N03_selected_item_supplier", "covers_question_ids": ["Q19"], "required_wording_families": ["canonical", "ui_selected_object_colloquial"], } ] }, "scenarios": [ { "scenario_id": "inventory_selected_item_provenance", "question_ids": ["Q19"], "steps": [ { "step_id": "step_01_supplier", "question_id": "Q19", "node_id": "N03_selected_item_supplier", "paraphrase_family": "canonical", } ], } ], } scenario_results = [ { "scenario_id": "inventory_selected_item_provenance", "final_status": "accepted", "session_id": "asst-demo", "artifact_dir": "artifacts/domain_runs/demo", } ] matrix = build_scenario_acceptance_matrix(pack, scenario_results) assert "| N03_selected_item_supplier | partial |" in matrix assert "missing_wording_families" in matrix assert "ui_selected_object_colloquial" in matrix def test_derive_pack_final_status_downgrades_accepted_when_matrix_contains_partial_coverage() -> None: pack = { "pack_id": "inventory_active_contract_smoke", "domain": "inventory_stock", "scenarios": [ { "scenario_id": "inventory_selected_item_provenance", "question_ids": ["Q19"], "steps": [ { "step_id": "step_01_supplier", "question_id": "Q19", "node_id": "N03_selected_item_supplier", "paraphrase_family": "canonical", } ], }, ], "scenario_tree": { "critical_nodes": [ { "node_id": "N03_selected_item_supplier", "covers_question_ids": ["Q19"], "required_wording_families": ["canonical", "ui_selected_object_colloquial"], } ] }, "question_pool": { "questions": [ {"question_id": "Q19", "node_id": "N03_selected_item_supplier"}, ] }, } scenario_results = [ { "scenario_id": "inventory_selected_item_provenance", "final_status": "accepted", "session_id": "asst-demo", "artifact_dir": "artifacts/domain_runs/demo", } ] assert derive_pack_final_status(pack, scenario_results) == "partial" def test_evaluate_analyst_gate_requires_temporal_honesty_field_truth_and_layering() -> None: verdict = { "quality_score": 91, "unresolved_p0_count": 0, "regression_detected": False, "direct_answer_ok": True, "business_usefulness_ok": True, "temporal_honesty_ok": False, "field_truth_ok": True, "answer_layering_ok": True, "loop_decision": "accepted", "requires_user_decision": False, "user_decision_type": "none", "user_decision_prompt": None, } accepted, loop_decision, requires_user_decision, user_decision_type, user_decision_prompt = evaluate_analyst_gate( verdict, target_score=80, ) assert accepted is False assert loop_decision == "accepted" assert requires_user_decision is False assert user_decision_type == "none" assert user_decision_prompt is None def test_validate_step_contract_rejects_wrong_month_filter_even_when_execution_is_exact() -> None: validated = validate_step_contract( { "execution_status": "exact", "status": "exact", "node_role": "root", "analysis_context": {"as_of_date": "2016-05-31"}, "expected_intents": ["inventory_on_hand_as_of_date"], "detected_intent": "inventory_on_hand_as_of_date", "expected_capability": "confirmed_inventory_on_hand_as_of_date", "capability_id": "confirmed_inventory_on_hand_as_of_date", "expected_recipe": "address_inventory_on_hand_as_of_date_v1", "selected_recipe": "address_inventory_on_hand_as_of_date_v1", "expected_result_mode": "confirmed_balance", "result_mode": "confirmed_balance", "required_filters": { "as_of_date": "2016-05-31", "period_from": "2016-05-01", "period_to": "2016-05-31", }, "required_answer_shape": "item_list_with_quantity_cost_warehouse_organization", "required_carryover_invariants": [], "required_state_objects": [], "forbidden_capabilities": [], "forbidden_recipes": [], "actual_direct_answer": "На 31.12.2016 на складе подтверждено 4 позиций.", "top_non_empty_lines": ["На 31.12.2016 на складе подтверждено 4 позиций."], "extracted_filters": { "as_of_date": "2016-12-31", "period_from": "2016-01-01", "period_to": "2016-12-31", }, "date_scope": {"as_of_date": "2016-12-31"}, "focus_object": None, } ) assert validated["acceptance_status"] == "rejected" assert "wrong_as_of_date" in validated["violated_invariants"] assert "wrong_period_from" in validated["violated_invariants"] assert "wrong_period_to" in validated["violated_invariants"] assert validated["hard_fail"] is True def test_validate_step_contract_rejects_selected_object_followup_without_focus_object_and_with_wrong_route() -> None: validated = validate_step_contract( { "execution_status": "exact", "status": "exact", "node_role": "critical_child", "analysis_context": {"as_of_date": "2019-03-31"}, "expected_intents": ["inventory_purchase_provenance_for_item"], "detected_intent": "inventory_on_hand_as_of_date", "expected_capability": "inventory_purchase_provenance_for_item", "capability_id": "confirmed_inventory_on_hand_as_of_date", "expected_result_mode": "confirmed_balance", "result_mode": "confirmed_balance", "required_filters": {"as_of_date": "2019-03-31"}, "required_answer_shape": "direct_supplier_answer_first_then_evidence", "required_carryover_invariants": ["focus_object", "date_scope"], "required_state_objects": [], "forbidden_capabilities": ["confirmed_inventory_on_hand_as_of_date"], "forbidden_recipes": ["address_inventory_on_hand_as_of_date_v1"], "selected_recipe": "address_inventory_on_hand_as_of_date_v1", "actual_direct_answer": "На 31.03.2019 на складе подтверждено 16 позиций.", "top_non_empty_lines": ["На 31.03.2019 на складе подтверждено 16 позиций."], "extracted_filters": {"as_of_date": "2019-03-31"}, "date_scope": {"as_of_date": "2019-03-31"}, "focus_object": None, } ) assert validated["acceptance_status"] == "rejected" assert "wrong_intent" in validated["violated_invariants"] assert "wrong_followup_action" in validated["violated_invariants"] assert "forbidden_capability_selected" in validated["violated_invariants"] assert "forbidden_recipe_selected" in validated["violated_invariants"] assert "focus_object_missing" in validated["violated_invariants"] def test_validate_step_contract_rejects_top_level_noise_as_direct_answer() -> None: validated = validate_step_contract( { "execution_status": "exact", "status": "exact", "node_role": "critical_child", "analysis_context": {"as_of_date": "2019-03-31"}, "expected_intents": ["inventory_purchase_provenance_for_item"], "detected_intent": "inventory_purchase_provenance_for_item", "expected_capability": "inventory_purchase_provenance_for_item", "capability_id": "inventory_inventory_purchase_provenance_for_item", "expected_result_mode": "confirmed_balance", "result_mode": "confirmed_balance", "required_filters": {"as_of_date": "2019-03-31"}, "required_answer_shape": "direct_supplier_answer_first_then_evidence", "required_carryover_invariants": [], "required_state_objects": [], "forbidden_capabilities": [], "forbidden_recipes": [], "selected_recipe": "address_inventory_purchase_provenance_for_item_v1", "actual_direct_answer": "Статус результата: подтверждено.", "top_non_empty_lines": [ "Статус результата: подтверждено.", "Поставщик: Торговый дом \\Союз\\.", ], "extracted_filters": {"as_of_date": "2019-03-31"}, "date_scope": {"as_of_date": "2019-03-31"}, "focus_object": {"object_id": "item:1", "label": "Столешница"}, } ) assert validated["acceptance_status"] == "rejected" assert "direct_answer_missing" in validated["violated_invariants"] assert "top_level_noise_present" in validated["violated_invariants"] def test_build_deterministic_repair_targets_marks_followup_router_gap_as_p0() -> None: repair_targets = build_deterministic_repair_targets( {"pack_id": "demo_pack", "domain": "inventory_stock", "final_status": "partial"}, [ { "scenario_id": "inventory_selected_item_provenance", "title": "Selected item provenance", "artifact_dir": "artifacts/domain_runs/demo/scenarios/inventory_selected_item_provenance", "scenario_state": { "step_outputs": { "step_02_supplier": { "step_id": "step_02_supplier", "question_resolved": 'По выбранному объекту "Столешница": кто поставил', "execution_status": "exact", "acceptance_status": "rejected", "reply_type": "factual", "selected_recipe": "address_inventory_on_hand_as_of_date_v1", "capability_id": "confirmed_inventory_on_hand_as_of_date", "violated_invariants": [ "wrong_followup_action", "focus_object_missing", "forbidden_capability_selected", ], "warnings": [], "hard_fail": True, } } }, } ], ) assert repair_targets["target_count"] == 1 target = repair_targets["targets"][0] assert target["severity"] == "P0" assert target["problem_type"] == "followup_action_resolution_gap" assert "followup_action_resolution_gap" in target["root_cause_layers"] assert "object_memory_gap" in target["root_cause_layers"] assert "addressIntentResolver.ts" in " ".join(target["candidate_files"]) def test_build_deterministic_repair_targets_marks_anchor_gap_as_p1() -> None: repair_targets = build_deterministic_repair_targets( {"pack_id": "demo_pack", "domain": "inventory_stock", "final_status": "partial"}, [ { "scenario_id": "inventory_sale_trace", "title": "Sale trace", "artifact_dir": "artifacts/domain_runs/demo/scenarios/inventory_sale_trace", "scenario_state": { "step_outputs": { "step_02_selected_item_buyer_ui": { "step_id": "step_02_selected_item_buyer_ui", "question_resolved": 'По выбранному объекту "Шкаф": кому был продан товар', "execution_status": "partial", "acceptance_status": "rejected", "reply_type": "partial_coverage", "fallback_type": "partial", "mcp_call_status": "materialized_but_not_anchor_matched", "selected_recipe": "address_inventory_sale_trace_for_item_v1", "capability_id": "inventory_inventory_sale_trace_for_item", "violated_invariants": [], "warnings": [], "hard_fail": False, } } }, } ], ) assert repair_targets["target_count"] == 1 target = repair_targets["targets"][0] assert target["severity"] == "P1" assert target["problem_type"] == "domain_anchor_gap" assert target["root_cause_layers"] == ["domain_anchor_gap"] assert "addressQueryService.ts" in " ".join(target["candidate_files"]) def test_evaluate_deterministic_loop_gate_rejects_partial_pack_even_without_targets() -> None: gate_ok, reason = evaluate_deterministic_loop_gate( {"final_status": "partial"}, {"severity_counts": {"P0": 0, "P1": 0}}, ) assert gate_ok is False assert reason == "pack_final_status=partial" def test_evaluate_deterministic_loop_gate_rejects_remaining_p1_targets() -> None: gate_ok, reason = evaluate_deterministic_loop_gate( {"final_status": "accepted"}, {"severity_counts": {"P0": 0, "P1": 2}}, ) assert gate_ok is False assert reason == "repair_targets_remaining=P0:0,P1:2" def test_evaluate_deterministic_loop_gate_accepts_clean_pack_without_remaining_p0_p1() -> None: gate_ok, reason = evaluate_deterministic_loop_gate( {"final_status": "accepted"}, {"severity_counts": {"P0": 0, "P1": 0, "warning": 1}}, ) assert gate_ok is True assert reason == "deterministic_gate_passed"