502 lines
20 KiB
Python
502 lines
20 KiB
Python
from __future__ import annotations
|
||
|
||
import json
|
||
import sys
|
||
from pathlib import Path
|
||
|
||
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
|
||
|
||
from scripts.domain_case_loop import (
|
||
build_scenario_acceptance_matrix,
|
||
carry_forward_analysis_context,
|
||
derive_pack_final_status,
|
||
evaluate_analyst_gate,
|
||
load_scenario_pack,
|
||
merge_scenario_date_scope,
|
||
validate_step_contract,
|
||
)
|
||
|
||
|
||
def test_carry_forward_analysis_context_preserves_followup_anchor() -> None:
|
||
scenario_state = {
|
||
"semantic_memory": {
|
||
"date_scope": {"as_of_date": "2020-03-31"},
|
||
}
|
||
}
|
||
analysis_context = {"as_of_date": "2026-04-13", "source": "current_analysis"}
|
||
|
||
carried = carry_forward_analysis_context(scenario_state, analysis_context)
|
||
|
||
assert carried["as_of_date"] == "2026-04-13"
|
||
assert carried["source"] == "current_analysis"
|
||
|
||
|
||
def test_carry_forward_analysis_context_fills_missing_anchor() -> None:
|
||
scenario_state = {
|
||
"semantic_memory": {
|
||
"date_scope": {"as_of_date": "2020-03-31"},
|
||
}
|
||
}
|
||
|
||
carried = carry_forward_analysis_context(scenario_state, {})
|
||
|
||
assert carried["as_of_date"] == "2020-03-31"
|
||
assert carried["source"] == "scenario_state_carryover"
|
||
|
||
|
||
def test_merge_scenario_date_scope_preserves_historical_anchor_on_followup() -> None:
|
||
previous_date_scope = {"as_of_date": "2020-03-31", "source": "exact_anchor"}
|
||
current_date_scope = {"as_of_date": "2026-04-13", "source": "current_analysis"}
|
||
|
||
merged = merge_scenario_date_scope(
|
||
previous_date_scope,
|
||
current_date_scope,
|
||
depends_on=["step_01_anchor"],
|
||
)
|
||
|
||
assert merged["as_of_date"] == "2020-03-31"
|
||
assert merged["source"] == "current_analysis"
|
||
|
||
|
||
def test_load_scenario_pack_accepts_active_domain_contract(tmp_path) -> None:
|
||
manifest_path = tmp_path / "active_domain_contract.json"
|
||
manifest_path.write_text(
|
||
json.dumps(
|
||
{
|
||
"schema_version": "active_domain_contract_v1",
|
||
"status": "active",
|
||
"domain_id": "inventory_stock_supplier_provenance",
|
||
"runtime_domain": "inventory_stock",
|
||
"title": "Warehouse domain",
|
||
"default_analysis_context": {"as_of_date": "2021-09-30"},
|
||
"observed_anchors": {
|
||
"warehouse": "Основной склад",
|
||
"organization": "ООО \\Альтернатива Плюс\\",
|
||
"historical_as_of_date": "2019-03-31",
|
||
"current_as_of_date_example": "2021-09-30",
|
||
"focus_item_historical": "Столешница 600*3050*26 дуб ниагара",
|
||
},
|
||
"question_pool": {
|
||
"questions": [
|
||
{"question_id": "Q01", "node_id": "N01_stock_snapshot", "text": "Q1"},
|
||
{"question_id": "Q19", "node_id": "N03_selected_item_supplier", "text": "Q19"},
|
||
]
|
||
},
|
||
"scenario_tree": {
|
||
"critical_edges": [
|
||
{
|
||
"edge_id": "E01_snapshot_to_selected_item_supplier",
|
||
"from_node": "N01_stock_snapshot",
|
||
"to_node": "N03_selected_item_supplier",
|
||
"primary_user_path": True,
|
||
}
|
||
]
|
||
},
|
||
"orchestration_pack": {
|
||
"pack_id": "inventory_active_contract_smoke",
|
||
"scenarios": [
|
||
{
|
||
"scenario_id": "inventory_selected_item_provenance",
|
||
"title": "Selected item provenance",
|
||
"question_ids": ["Q01", "Q19"],
|
||
"steps": [
|
||
{
|
||
"step_id": "step_01_snapshot",
|
||
"question_id": "Q01",
|
||
"node_id": "N01_stock_snapshot",
|
||
"question": "Какие товары сейчас лежат на складе",
|
||
},
|
||
{
|
||
"step_id": "step_02_supplier",
|
||
"question_id": "Q19",
|
||
"node_id": "N03_selected_item_supplier",
|
||
"question": "По выбранному объекту \"Столешница 600*3050*26 дуб ниагара\": кто это поставил нам",
|
||
},
|
||
],
|
||
}
|
||
],
|
||
},
|
||
},
|
||
ensure_ascii=False,
|
||
indent=2,
|
||
)
|
||
+ "\n",
|
||
encoding="utf-8",
|
||
)
|
||
|
||
pack = load_scenario_pack(manifest_path)
|
||
|
||
assert pack["schema_version"] == "domain_scenario_pack_v1"
|
||
assert pack["source_schema_version"] == "active_domain_contract_v1"
|
||
assert pack["domain"] == "inventory_stock"
|
||
assert pack["bindings"]["observed_warehouse"] == "Основной склад"
|
||
assert pack["bindings"]["focus_item_historical"] == "Столешница 600*3050*26 дуб ниагара"
|
||
assert pack["scenarios"][0]["question_ids"] == ["Q01", "Q19"]
|
||
assert pack["scenarios"][0]["steps"][1]["question_id"] == "Q19"
|
||
|
||
|
||
def test_load_scenario_pack_enriches_step_with_node_contract_defaults(tmp_path) -> None:
|
||
manifest_path = tmp_path / "active_domain_contract.json"
|
||
manifest_path.write_text(
|
||
json.dumps(
|
||
{
|
||
"schema_version": "active_domain_contract_v1",
|
||
"status": "active",
|
||
"domain_id": "inventory_stock_supplier_provenance",
|
||
"runtime_domain": "inventory_stock",
|
||
"title": "Warehouse domain",
|
||
"question_pool": {
|
||
"questions": [
|
||
{"question_id": "Q19", "node_id": "N03_selected_item_supplier", "text": "Q19"},
|
||
]
|
||
},
|
||
"scenario_tree": {
|
||
"critical_nodes": [
|
||
{
|
||
"node_id": "N03_selected_item_supplier",
|
||
"expected_intents": ["inventory_purchase_provenance_for_item"],
|
||
"expected_answer_shape": "direct_supplier_answer_first_then_evidence",
|
||
"required_carryover_invariants": ["focus_object", "date_scope"],
|
||
}
|
||
]
|
||
},
|
||
"orchestration_pack": {
|
||
"pack_id": "inventory_active_contract_smoke",
|
||
"scenarios": [
|
||
{
|
||
"scenario_id": "inventory_selected_item_provenance",
|
||
"title": "Selected item provenance",
|
||
"steps": [
|
||
{
|
||
"step_id": "step_02_supplier",
|
||
"question_id": "Q19",
|
||
"node_id": "N03_selected_item_supplier",
|
||
"question": "По выбранному объекту \"...\": кто это поставил нам",
|
||
}
|
||
],
|
||
}
|
||
],
|
||
},
|
||
},
|
||
ensure_ascii=False,
|
||
indent=2,
|
||
)
|
||
+ "\n",
|
||
encoding="utf-8",
|
||
)
|
||
|
||
pack = load_scenario_pack(manifest_path)
|
||
step = pack["scenarios"][0]["steps"][0]
|
||
|
||
assert step["expected_intents"] == ["inventory_purchase_provenance_for_item"]
|
||
assert step["required_answer_shape"] == "direct_supplier_answer_first_then_evidence"
|
||
assert "focus_object" in step["required_carryover_invariants"]
|
||
assert "date_scope" in step["required_carryover_invariants"]
|
||
|
||
|
||
def test_build_scenario_acceptance_matrix_marks_green_edge_when_covering_scenario_is_accepted() -> None:
|
||
pack = {
|
||
"pack_id": "inventory_active_contract_smoke",
|
||
"domain": "inventory_stock",
|
||
"source_contract": {"domain_id": "inventory_stock_supplier_provenance", "title": "Warehouse domain"},
|
||
"question_pool": {
|
||
"questions": [
|
||
{"question_id": "Q01", "node_id": "N01_stock_snapshot"},
|
||
{"question_id": "Q19", "node_id": "N03_selected_item_supplier"},
|
||
]
|
||
},
|
||
"scenario_tree": {
|
||
"critical_nodes": [
|
||
{
|
||
"node_id": "N03_selected_item_supplier",
|
||
"covers_question_ids": ["Q19"],
|
||
"required_wording_families": ["canonical"],
|
||
}
|
||
],
|
||
"critical_edges": [
|
||
{
|
||
"edge_id": "E01_snapshot_to_selected_item_supplier",
|
||
"from_node": "N01_stock_snapshot",
|
||
"to_node": "N03_selected_item_supplier",
|
||
"primary_user_path": True,
|
||
}
|
||
],
|
||
"primary_user_paths": [
|
||
{"path_id": "P01_snapshot_to_supplier", "nodes": ["N01_stock_snapshot", "N03_selected_item_supplier"]}
|
||
],
|
||
},
|
||
"scenarios": [
|
||
{
|
||
"scenario_id": "inventory_selected_item_provenance",
|
||
"question_ids": ["Q01", "Q19"],
|
||
"steps": [
|
||
{
|
||
"step_id": "step_01_snapshot",
|
||
"question_id": "Q01",
|
||
"node_id": "N01_stock_snapshot",
|
||
"paraphrase_family": "canonical",
|
||
},
|
||
{
|
||
"step_id": "step_02_supplier",
|
||
"question_id": "Q19",
|
||
"node_id": "N03_selected_item_supplier",
|
||
"paraphrase_family": "canonical",
|
||
},
|
||
],
|
||
}
|
||
],
|
||
}
|
||
scenario_results = [
|
||
{
|
||
"scenario_id": "inventory_selected_item_provenance",
|
||
"final_status": "accepted",
|
||
"session_id": "asst-demo",
|
||
"artifact_dir": "artifacts/domain_runs/demo",
|
||
}
|
||
]
|
||
|
||
matrix = build_scenario_acceptance_matrix(pack, scenario_results)
|
||
|
||
assert "E01_snapshot_to_selected_item_supplier" in matrix
|
||
assert "| E01_snapshot_to_selected_item_supplier | green |" in matrix
|
||
assert "| P01_snapshot_to_supplier | green |" in matrix
|
||
|
||
|
||
def test_build_scenario_acceptance_matrix_marks_partial_when_wording_family_is_missing() -> None:
|
||
pack = {
|
||
"pack_id": "inventory_active_contract_smoke",
|
||
"domain": "inventory_stock",
|
||
"source_contract": {"domain_id": "inventory_stock_supplier_provenance", "title": "Warehouse domain"},
|
||
"question_pool": {
|
||
"questions": [
|
||
{"question_id": "Q19", "node_id": "N03_selected_item_supplier"},
|
||
]
|
||
},
|
||
"scenario_tree": {
|
||
"critical_nodes": [
|
||
{
|
||
"node_id": "N03_selected_item_supplier",
|
||
"covers_question_ids": ["Q19"],
|
||
"required_wording_families": ["canonical", "ui_selected_object_colloquial"],
|
||
}
|
||
]
|
||
},
|
||
"scenarios": [
|
||
{
|
||
"scenario_id": "inventory_selected_item_provenance",
|
||
"question_ids": ["Q19"],
|
||
"steps": [
|
||
{
|
||
"step_id": "step_01_supplier",
|
||
"question_id": "Q19",
|
||
"node_id": "N03_selected_item_supplier",
|
||
"paraphrase_family": "canonical",
|
||
}
|
||
],
|
||
}
|
||
],
|
||
}
|
||
scenario_results = [
|
||
{
|
||
"scenario_id": "inventory_selected_item_provenance",
|
||
"final_status": "accepted",
|
||
"session_id": "asst-demo",
|
||
"artifact_dir": "artifacts/domain_runs/demo",
|
||
}
|
||
]
|
||
|
||
matrix = build_scenario_acceptance_matrix(pack, scenario_results)
|
||
|
||
assert "| N03_selected_item_supplier | partial |" in matrix
|
||
assert "missing_wording_families" in matrix
|
||
assert "ui_selected_object_colloquial" in matrix
|
||
|
||
|
||
def test_derive_pack_final_status_downgrades_accepted_when_matrix_contains_partial_coverage() -> None:
|
||
pack = {
|
||
"pack_id": "inventory_active_contract_smoke",
|
||
"domain": "inventory_stock",
|
||
"scenarios": [
|
||
{
|
||
"scenario_id": "inventory_selected_item_provenance",
|
||
"question_ids": ["Q19"],
|
||
"steps": [
|
||
{
|
||
"step_id": "step_01_supplier",
|
||
"question_id": "Q19",
|
||
"node_id": "N03_selected_item_supplier",
|
||
"paraphrase_family": "canonical",
|
||
}
|
||
],
|
||
},
|
||
],
|
||
"scenario_tree": {
|
||
"critical_nodes": [
|
||
{
|
||
"node_id": "N03_selected_item_supplier",
|
||
"covers_question_ids": ["Q19"],
|
||
"required_wording_families": ["canonical", "ui_selected_object_colloquial"],
|
||
}
|
||
]
|
||
},
|
||
"question_pool": {
|
||
"questions": [
|
||
{"question_id": "Q19", "node_id": "N03_selected_item_supplier"},
|
||
]
|
||
},
|
||
}
|
||
scenario_results = [
|
||
{
|
||
"scenario_id": "inventory_selected_item_provenance",
|
||
"final_status": "accepted",
|
||
"session_id": "asst-demo",
|
||
"artifact_dir": "artifacts/domain_runs/demo",
|
||
}
|
||
]
|
||
|
||
assert derive_pack_final_status(pack, scenario_results) == "partial"
|
||
|
||
|
||
def test_evaluate_analyst_gate_requires_temporal_honesty_field_truth_and_layering() -> None:
|
||
verdict = {
|
||
"quality_score": 91,
|
||
"unresolved_p0_count": 0,
|
||
"regression_detected": False,
|
||
"direct_answer_ok": True,
|
||
"business_usefulness_ok": True,
|
||
"temporal_honesty_ok": False,
|
||
"field_truth_ok": True,
|
||
"answer_layering_ok": True,
|
||
"loop_decision": "accepted",
|
||
"requires_user_decision": False,
|
||
"user_decision_type": "none",
|
||
"user_decision_prompt": None,
|
||
}
|
||
|
||
accepted, loop_decision, requires_user_decision, user_decision_type, user_decision_prompt = evaluate_analyst_gate(
|
||
verdict,
|
||
target_score=80,
|
||
)
|
||
|
||
assert accepted is False
|
||
assert loop_decision == "accepted"
|
||
assert requires_user_decision is False
|
||
assert user_decision_type == "none"
|
||
assert user_decision_prompt is None
|
||
|
||
|
||
def test_validate_step_contract_rejects_wrong_month_filter_even_when_execution_is_exact() -> None:
|
||
validated = validate_step_contract(
|
||
{
|
||
"execution_status": "exact",
|
||
"status": "exact",
|
||
"node_role": "root",
|
||
"analysis_context": {"as_of_date": "2016-05-31"},
|
||
"expected_intents": ["inventory_on_hand_as_of_date"],
|
||
"detected_intent": "inventory_on_hand_as_of_date",
|
||
"expected_capability": "confirmed_inventory_on_hand_as_of_date",
|
||
"capability_id": "confirmed_inventory_on_hand_as_of_date",
|
||
"expected_recipe": "address_inventory_on_hand_as_of_date_v1",
|
||
"selected_recipe": "address_inventory_on_hand_as_of_date_v1",
|
||
"expected_result_mode": "confirmed_balance",
|
||
"result_mode": "confirmed_balance",
|
||
"required_filters": {
|
||
"as_of_date": "2016-05-31",
|
||
"period_from": "2016-05-01",
|
||
"period_to": "2016-05-31",
|
||
},
|
||
"required_answer_shape": "item_list_with_quantity_cost_warehouse_organization",
|
||
"required_carryover_invariants": [],
|
||
"required_state_objects": [],
|
||
"forbidden_capabilities": [],
|
||
"forbidden_recipes": [],
|
||
"actual_direct_answer": "На 31.12.2016 на складе подтверждено 4 позиций.",
|
||
"top_non_empty_lines": ["На 31.12.2016 на складе подтверждено 4 позиций."],
|
||
"extracted_filters": {
|
||
"as_of_date": "2016-12-31",
|
||
"period_from": "2016-01-01",
|
||
"period_to": "2016-12-31",
|
||
},
|
||
"date_scope": {"as_of_date": "2016-12-31"},
|
||
"focus_object": None,
|
||
}
|
||
)
|
||
|
||
assert validated["acceptance_status"] == "rejected"
|
||
assert "wrong_as_of_date" in validated["violated_invariants"]
|
||
assert "wrong_period_from" in validated["violated_invariants"]
|
||
assert "wrong_period_to" in validated["violated_invariants"]
|
||
assert validated["hard_fail"] is True
|
||
|
||
|
||
def test_validate_step_contract_rejects_selected_object_followup_without_focus_object_and_with_wrong_route() -> None:
|
||
validated = validate_step_contract(
|
||
{
|
||
"execution_status": "exact",
|
||
"status": "exact",
|
||
"node_role": "critical_child",
|
||
"analysis_context": {"as_of_date": "2019-03-31"},
|
||
"expected_intents": ["inventory_purchase_provenance_for_item"],
|
||
"detected_intent": "inventory_on_hand_as_of_date",
|
||
"expected_capability": "inventory_purchase_provenance_for_item",
|
||
"capability_id": "confirmed_inventory_on_hand_as_of_date",
|
||
"expected_result_mode": "confirmed_balance",
|
||
"result_mode": "confirmed_balance",
|
||
"required_filters": {"as_of_date": "2019-03-31"},
|
||
"required_answer_shape": "direct_supplier_answer_first_then_evidence",
|
||
"required_carryover_invariants": ["focus_object", "date_scope"],
|
||
"required_state_objects": [],
|
||
"forbidden_capabilities": ["confirmed_inventory_on_hand_as_of_date"],
|
||
"forbidden_recipes": ["address_inventory_on_hand_as_of_date_v1"],
|
||
"selected_recipe": "address_inventory_on_hand_as_of_date_v1",
|
||
"actual_direct_answer": "На 31.03.2019 на складе подтверждено 16 позиций.",
|
||
"top_non_empty_lines": ["На 31.03.2019 на складе подтверждено 16 позиций."],
|
||
"extracted_filters": {"as_of_date": "2019-03-31"},
|
||
"date_scope": {"as_of_date": "2019-03-31"},
|
||
"focus_object": None,
|
||
}
|
||
)
|
||
|
||
assert validated["acceptance_status"] == "rejected"
|
||
assert "wrong_intent" in validated["violated_invariants"]
|
||
assert "wrong_followup_action" in validated["violated_invariants"]
|
||
assert "forbidden_capability_selected" in validated["violated_invariants"]
|
||
assert "forbidden_recipe_selected" in validated["violated_invariants"]
|
||
assert "focus_object_missing" in validated["violated_invariants"]
|
||
|
||
|
||
def test_validate_step_contract_rejects_top_level_noise_as_direct_answer() -> None:
|
||
validated = validate_step_contract(
|
||
{
|
||
"execution_status": "exact",
|
||
"status": "exact",
|
||
"node_role": "critical_child",
|
||
"analysis_context": {"as_of_date": "2019-03-31"},
|
||
"expected_intents": ["inventory_purchase_provenance_for_item"],
|
||
"detected_intent": "inventory_purchase_provenance_for_item",
|
||
"expected_capability": "inventory_purchase_provenance_for_item",
|
||
"capability_id": "inventory_inventory_purchase_provenance_for_item",
|
||
"expected_result_mode": "confirmed_balance",
|
||
"result_mode": "confirmed_balance",
|
||
"required_filters": {"as_of_date": "2019-03-31"},
|
||
"required_answer_shape": "direct_supplier_answer_first_then_evidence",
|
||
"required_carryover_invariants": [],
|
||
"required_state_objects": [],
|
||
"forbidden_capabilities": [],
|
||
"forbidden_recipes": [],
|
||
"selected_recipe": "address_inventory_purchase_provenance_for_item_v1",
|
||
"actual_direct_answer": "Статус результата: подтверждено.",
|
||
"top_non_empty_lines": [
|
||
"Статус результата: подтверждено.",
|
||
"Поставщик: Торговый дом \\Союз\\.",
|
||
],
|
||
"extracted_filters": {"as_of_date": "2019-03-31"},
|
||
"date_scope": {"as_of_date": "2019-03-31"},
|
||
"focus_object": {"object_id": "item:1", "label": "Столешница"},
|
||
}
|
||
)
|
||
|
||
assert validated["acceptance_status"] == "rejected"
|
||
assert "direct_answer_missing" in validated["violated_invariants"]
|
||
assert "top_level_noise_present" in validated["violated_invariants"]
|