NODEDC_1C/tests/test_domain_case_loop.py

502 lines
20 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
import json
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
from scripts.domain_case_loop import (
build_scenario_acceptance_matrix,
carry_forward_analysis_context,
derive_pack_final_status,
evaluate_analyst_gate,
load_scenario_pack,
merge_scenario_date_scope,
validate_step_contract,
)
def test_carry_forward_analysis_context_preserves_followup_anchor() -> None:
scenario_state = {
"semantic_memory": {
"date_scope": {"as_of_date": "2020-03-31"},
}
}
analysis_context = {"as_of_date": "2026-04-13", "source": "current_analysis"}
carried = carry_forward_analysis_context(scenario_state, analysis_context)
assert carried["as_of_date"] == "2026-04-13"
assert carried["source"] == "current_analysis"
def test_carry_forward_analysis_context_fills_missing_anchor() -> None:
scenario_state = {
"semantic_memory": {
"date_scope": {"as_of_date": "2020-03-31"},
}
}
carried = carry_forward_analysis_context(scenario_state, {})
assert carried["as_of_date"] == "2020-03-31"
assert carried["source"] == "scenario_state_carryover"
def test_merge_scenario_date_scope_preserves_historical_anchor_on_followup() -> None:
previous_date_scope = {"as_of_date": "2020-03-31", "source": "exact_anchor"}
current_date_scope = {"as_of_date": "2026-04-13", "source": "current_analysis"}
merged = merge_scenario_date_scope(
previous_date_scope,
current_date_scope,
depends_on=["step_01_anchor"],
)
assert merged["as_of_date"] == "2020-03-31"
assert merged["source"] == "current_analysis"
def test_load_scenario_pack_accepts_active_domain_contract(tmp_path) -> None:
manifest_path = tmp_path / "active_domain_contract.json"
manifest_path.write_text(
json.dumps(
{
"schema_version": "active_domain_contract_v1",
"status": "active",
"domain_id": "inventory_stock_supplier_provenance",
"runtime_domain": "inventory_stock",
"title": "Warehouse domain",
"default_analysis_context": {"as_of_date": "2021-09-30"},
"observed_anchors": {
"warehouse": "Основной склад",
"organization": "ООО \\Альтернатива Плюс\\",
"historical_as_of_date": "2019-03-31",
"current_as_of_date_example": "2021-09-30",
"focus_item_historical": "Столешница 600*3050*26 дуб ниагара",
},
"question_pool": {
"questions": [
{"question_id": "Q01", "node_id": "N01_stock_snapshot", "text": "Q1"},
{"question_id": "Q19", "node_id": "N03_selected_item_supplier", "text": "Q19"},
]
},
"scenario_tree": {
"critical_edges": [
{
"edge_id": "E01_snapshot_to_selected_item_supplier",
"from_node": "N01_stock_snapshot",
"to_node": "N03_selected_item_supplier",
"primary_user_path": True,
}
]
},
"orchestration_pack": {
"pack_id": "inventory_active_contract_smoke",
"scenarios": [
{
"scenario_id": "inventory_selected_item_provenance",
"title": "Selected item provenance",
"question_ids": ["Q01", "Q19"],
"steps": [
{
"step_id": "step_01_snapshot",
"question_id": "Q01",
"node_id": "N01_stock_snapshot",
"question": "Какие товары сейчас лежат на складе",
},
{
"step_id": "step_02_supplier",
"question_id": "Q19",
"node_id": "N03_selected_item_supplier",
"question": "По выбранному объекту \"Столешница 600*3050*26 дуб ниагара\": кто это поставил нам",
},
],
}
],
},
},
ensure_ascii=False,
indent=2,
)
+ "\n",
encoding="utf-8",
)
pack = load_scenario_pack(manifest_path)
assert pack["schema_version"] == "domain_scenario_pack_v1"
assert pack["source_schema_version"] == "active_domain_contract_v1"
assert pack["domain"] == "inventory_stock"
assert pack["bindings"]["observed_warehouse"] == "Основной склад"
assert pack["bindings"]["focus_item_historical"] == "Столешница 600*3050*26 дуб ниагара"
assert pack["scenarios"][0]["question_ids"] == ["Q01", "Q19"]
assert pack["scenarios"][0]["steps"][1]["question_id"] == "Q19"
def test_load_scenario_pack_enriches_step_with_node_contract_defaults(tmp_path) -> None:
manifest_path = tmp_path / "active_domain_contract.json"
manifest_path.write_text(
json.dumps(
{
"schema_version": "active_domain_contract_v1",
"status": "active",
"domain_id": "inventory_stock_supplier_provenance",
"runtime_domain": "inventory_stock",
"title": "Warehouse domain",
"question_pool": {
"questions": [
{"question_id": "Q19", "node_id": "N03_selected_item_supplier", "text": "Q19"},
]
},
"scenario_tree": {
"critical_nodes": [
{
"node_id": "N03_selected_item_supplier",
"expected_intents": ["inventory_purchase_provenance_for_item"],
"expected_answer_shape": "direct_supplier_answer_first_then_evidence",
"required_carryover_invariants": ["focus_object", "date_scope"],
}
]
},
"orchestration_pack": {
"pack_id": "inventory_active_contract_smoke",
"scenarios": [
{
"scenario_id": "inventory_selected_item_provenance",
"title": "Selected item provenance",
"steps": [
{
"step_id": "step_02_supplier",
"question_id": "Q19",
"node_id": "N03_selected_item_supplier",
"question": "По выбранному объекту \"...\": кто это поставил нам",
}
],
}
],
},
},
ensure_ascii=False,
indent=2,
)
+ "\n",
encoding="utf-8",
)
pack = load_scenario_pack(manifest_path)
step = pack["scenarios"][0]["steps"][0]
assert step["expected_intents"] == ["inventory_purchase_provenance_for_item"]
assert step["required_answer_shape"] == "direct_supplier_answer_first_then_evidence"
assert "focus_object" in step["required_carryover_invariants"]
assert "date_scope" in step["required_carryover_invariants"]
def test_build_scenario_acceptance_matrix_marks_green_edge_when_covering_scenario_is_accepted() -> None:
pack = {
"pack_id": "inventory_active_contract_smoke",
"domain": "inventory_stock",
"source_contract": {"domain_id": "inventory_stock_supplier_provenance", "title": "Warehouse domain"},
"question_pool": {
"questions": [
{"question_id": "Q01", "node_id": "N01_stock_snapshot"},
{"question_id": "Q19", "node_id": "N03_selected_item_supplier"},
]
},
"scenario_tree": {
"critical_nodes": [
{
"node_id": "N03_selected_item_supplier",
"covers_question_ids": ["Q19"],
"required_wording_families": ["canonical"],
}
],
"critical_edges": [
{
"edge_id": "E01_snapshot_to_selected_item_supplier",
"from_node": "N01_stock_snapshot",
"to_node": "N03_selected_item_supplier",
"primary_user_path": True,
}
],
"primary_user_paths": [
{"path_id": "P01_snapshot_to_supplier", "nodes": ["N01_stock_snapshot", "N03_selected_item_supplier"]}
],
},
"scenarios": [
{
"scenario_id": "inventory_selected_item_provenance",
"question_ids": ["Q01", "Q19"],
"steps": [
{
"step_id": "step_01_snapshot",
"question_id": "Q01",
"node_id": "N01_stock_snapshot",
"paraphrase_family": "canonical",
},
{
"step_id": "step_02_supplier",
"question_id": "Q19",
"node_id": "N03_selected_item_supplier",
"paraphrase_family": "canonical",
},
],
}
],
}
scenario_results = [
{
"scenario_id": "inventory_selected_item_provenance",
"final_status": "accepted",
"session_id": "asst-demo",
"artifact_dir": "artifacts/domain_runs/demo",
}
]
matrix = build_scenario_acceptance_matrix(pack, scenario_results)
assert "E01_snapshot_to_selected_item_supplier" in matrix
assert "| E01_snapshot_to_selected_item_supplier | green |" in matrix
assert "| P01_snapshot_to_supplier | green |" in matrix
def test_build_scenario_acceptance_matrix_marks_partial_when_wording_family_is_missing() -> None:
pack = {
"pack_id": "inventory_active_contract_smoke",
"domain": "inventory_stock",
"source_contract": {"domain_id": "inventory_stock_supplier_provenance", "title": "Warehouse domain"},
"question_pool": {
"questions": [
{"question_id": "Q19", "node_id": "N03_selected_item_supplier"},
]
},
"scenario_tree": {
"critical_nodes": [
{
"node_id": "N03_selected_item_supplier",
"covers_question_ids": ["Q19"],
"required_wording_families": ["canonical", "ui_selected_object_colloquial"],
}
]
},
"scenarios": [
{
"scenario_id": "inventory_selected_item_provenance",
"question_ids": ["Q19"],
"steps": [
{
"step_id": "step_01_supplier",
"question_id": "Q19",
"node_id": "N03_selected_item_supplier",
"paraphrase_family": "canonical",
}
],
}
],
}
scenario_results = [
{
"scenario_id": "inventory_selected_item_provenance",
"final_status": "accepted",
"session_id": "asst-demo",
"artifact_dir": "artifacts/domain_runs/demo",
}
]
matrix = build_scenario_acceptance_matrix(pack, scenario_results)
assert "| N03_selected_item_supplier | partial |" in matrix
assert "missing_wording_families" in matrix
assert "ui_selected_object_colloquial" in matrix
def test_derive_pack_final_status_downgrades_accepted_when_matrix_contains_partial_coverage() -> None:
pack = {
"pack_id": "inventory_active_contract_smoke",
"domain": "inventory_stock",
"scenarios": [
{
"scenario_id": "inventory_selected_item_provenance",
"question_ids": ["Q19"],
"steps": [
{
"step_id": "step_01_supplier",
"question_id": "Q19",
"node_id": "N03_selected_item_supplier",
"paraphrase_family": "canonical",
}
],
},
],
"scenario_tree": {
"critical_nodes": [
{
"node_id": "N03_selected_item_supplier",
"covers_question_ids": ["Q19"],
"required_wording_families": ["canonical", "ui_selected_object_colloquial"],
}
]
},
"question_pool": {
"questions": [
{"question_id": "Q19", "node_id": "N03_selected_item_supplier"},
]
},
}
scenario_results = [
{
"scenario_id": "inventory_selected_item_provenance",
"final_status": "accepted",
"session_id": "asst-demo",
"artifact_dir": "artifacts/domain_runs/demo",
}
]
assert derive_pack_final_status(pack, scenario_results) == "partial"
def test_evaluate_analyst_gate_requires_temporal_honesty_field_truth_and_layering() -> None:
verdict = {
"quality_score": 91,
"unresolved_p0_count": 0,
"regression_detected": False,
"direct_answer_ok": True,
"business_usefulness_ok": True,
"temporal_honesty_ok": False,
"field_truth_ok": True,
"answer_layering_ok": True,
"loop_decision": "accepted",
"requires_user_decision": False,
"user_decision_type": "none",
"user_decision_prompt": None,
}
accepted, loop_decision, requires_user_decision, user_decision_type, user_decision_prompt = evaluate_analyst_gate(
verdict,
target_score=80,
)
assert accepted is False
assert loop_decision == "accepted"
assert requires_user_decision is False
assert user_decision_type == "none"
assert user_decision_prompt is None
def test_validate_step_contract_rejects_wrong_month_filter_even_when_execution_is_exact() -> None:
validated = validate_step_contract(
{
"execution_status": "exact",
"status": "exact",
"node_role": "root",
"analysis_context": {"as_of_date": "2016-05-31"},
"expected_intents": ["inventory_on_hand_as_of_date"],
"detected_intent": "inventory_on_hand_as_of_date",
"expected_capability": "confirmed_inventory_on_hand_as_of_date",
"capability_id": "confirmed_inventory_on_hand_as_of_date",
"expected_recipe": "address_inventory_on_hand_as_of_date_v1",
"selected_recipe": "address_inventory_on_hand_as_of_date_v1",
"expected_result_mode": "confirmed_balance",
"result_mode": "confirmed_balance",
"required_filters": {
"as_of_date": "2016-05-31",
"period_from": "2016-05-01",
"period_to": "2016-05-31",
},
"required_answer_shape": "item_list_with_quantity_cost_warehouse_organization",
"required_carryover_invariants": [],
"required_state_objects": [],
"forbidden_capabilities": [],
"forbidden_recipes": [],
"actual_direct_answer": "На 31.12.2016 на складе подтверждено 4 позиций.",
"top_non_empty_lines": ["На 31.12.2016 на складе подтверждено 4 позиций."],
"extracted_filters": {
"as_of_date": "2016-12-31",
"period_from": "2016-01-01",
"period_to": "2016-12-31",
},
"date_scope": {"as_of_date": "2016-12-31"},
"focus_object": None,
}
)
assert validated["acceptance_status"] == "rejected"
assert "wrong_as_of_date" in validated["violated_invariants"]
assert "wrong_period_from" in validated["violated_invariants"]
assert "wrong_period_to" in validated["violated_invariants"]
assert validated["hard_fail"] is True
def test_validate_step_contract_rejects_selected_object_followup_without_focus_object_and_with_wrong_route() -> None:
validated = validate_step_contract(
{
"execution_status": "exact",
"status": "exact",
"node_role": "critical_child",
"analysis_context": {"as_of_date": "2019-03-31"},
"expected_intents": ["inventory_purchase_provenance_for_item"],
"detected_intent": "inventory_on_hand_as_of_date",
"expected_capability": "inventory_purchase_provenance_for_item",
"capability_id": "confirmed_inventory_on_hand_as_of_date",
"expected_result_mode": "confirmed_balance",
"result_mode": "confirmed_balance",
"required_filters": {"as_of_date": "2019-03-31"},
"required_answer_shape": "direct_supplier_answer_first_then_evidence",
"required_carryover_invariants": ["focus_object", "date_scope"],
"required_state_objects": [],
"forbidden_capabilities": ["confirmed_inventory_on_hand_as_of_date"],
"forbidden_recipes": ["address_inventory_on_hand_as_of_date_v1"],
"selected_recipe": "address_inventory_on_hand_as_of_date_v1",
"actual_direct_answer": "На 31.03.2019 на складе подтверждено 16 позиций.",
"top_non_empty_lines": ["На 31.03.2019 на складе подтверждено 16 позиций."],
"extracted_filters": {"as_of_date": "2019-03-31"},
"date_scope": {"as_of_date": "2019-03-31"},
"focus_object": None,
}
)
assert validated["acceptance_status"] == "rejected"
assert "wrong_intent" in validated["violated_invariants"]
assert "wrong_followup_action" in validated["violated_invariants"]
assert "forbidden_capability_selected" in validated["violated_invariants"]
assert "forbidden_recipe_selected" in validated["violated_invariants"]
assert "focus_object_missing" in validated["violated_invariants"]
def test_validate_step_contract_rejects_top_level_noise_as_direct_answer() -> None:
validated = validate_step_contract(
{
"execution_status": "exact",
"status": "exact",
"node_role": "critical_child",
"analysis_context": {"as_of_date": "2019-03-31"},
"expected_intents": ["inventory_purchase_provenance_for_item"],
"detected_intent": "inventory_purchase_provenance_for_item",
"expected_capability": "inventory_purchase_provenance_for_item",
"capability_id": "inventory_inventory_purchase_provenance_for_item",
"expected_result_mode": "confirmed_balance",
"result_mode": "confirmed_balance",
"required_filters": {"as_of_date": "2019-03-31"},
"required_answer_shape": "direct_supplier_answer_first_then_evidence",
"required_carryover_invariants": [],
"required_state_objects": [],
"forbidden_capabilities": [],
"forbidden_recipes": [],
"selected_recipe": "address_inventory_purchase_provenance_for_item_v1",
"actual_direct_answer": "Статус результата: подтверждено.",
"top_non_empty_lines": [
"Статус результата: подтверждено.",
"Поставщик: Торговый дом \\Союз\\.",
],
"extracted_filters": {"as_of_date": "2019-03-31"},
"date_scope": {"as_of_date": "2019-03-31"},
"focus_object": {"object_id": "item:1", "label": "Столешница"},
}
)
assert validated["acceptance_status"] == "rejected"
assert "direct_answer_missing" in validated["violated_invariants"]
assert "top_level_noise_present" in validated["violated_invariants"]