884 lines
38 KiB
Python
884 lines
38 KiB
Python
from __future__ import annotations
|
||
|
||
import json
|
||
import sys
|
||
from pathlib import Path
|
||
|
||
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
|
||
|
||
from scripts.domain_case_loop import (
|
||
build_coder_loop_prompt,
|
||
build_coder_snapshot_paths,
|
||
build_deterministic_repair_targets,
|
||
build_scenario_step_state,
|
||
build_scenario_acceptance_matrix,
|
||
carry_forward_analysis_context,
|
||
derive_pack_final_status,
|
||
evaluate_analyst_gate,
|
||
evaluate_deterministic_loop_gate,
|
||
load_scenario_pack,
|
||
merge_scenario_date_scope,
|
||
select_primary_repair_focus,
|
||
restore_line_collapsed_files_from_snapshot,
|
||
snapshot_coder_candidate_files,
|
||
validate_step_contract,
|
||
)
|
||
|
||
|
||
def test_carry_forward_analysis_context_preserves_followup_anchor() -> None:
|
||
scenario_state = {
|
||
"semantic_memory": {
|
||
"date_scope": {"as_of_date": "2020-03-31"},
|
||
}
|
||
}
|
||
analysis_context = {"as_of_date": "2026-04-13", "source": "current_analysis"}
|
||
|
||
carried = carry_forward_analysis_context(scenario_state, analysis_context)
|
||
|
||
assert carried["as_of_date"] == "2026-04-13"
|
||
assert carried["source"] == "current_analysis"
|
||
|
||
|
||
def test_carry_forward_analysis_context_fills_missing_anchor() -> None:
|
||
scenario_state = {
|
||
"semantic_memory": {
|
||
"date_scope": {"as_of_date": "2020-03-31"},
|
||
}
|
||
}
|
||
|
||
carried = carry_forward_analysis_context(scenario_state, {})
|
||
|
||
assert carried["as_of_date"] == "2020-03-31"
|
||
assert carried["source"] == "scenario_state_carryover"
|
||
|
||
|
||
def test_merge_scenario_date_scope_preserves_historical_anchor_on_followup() -> None:
|
||
previous_date_scope = {"as_of_date": "2020-03-31", "source": "exact_anchor"}
|
||
current_date_scope = {"as_of_date": "2026-04-13", "source": "current_analysis"}
|
||
|
||
merged = merge_scenario_date_scope(
|
||
previous_date_scope,
|
||
current_date_scope,
|
||
depends_on=["step_01_anchor"],
|
||
)
|
||
|
||
assert merged["as_of_date"] == "2020-03-31"
|
||
assert merged["source"] == "current_analysis"
|
||
|
||
|
||
def test_load_scenario_pack_accepts_active_domain_contract(tmp_path) -> None:
|
||
manifest_path = tmp_path / "active_domain_contract.json"
|
||
manifest_path.write_text(
|
||
json.dumps(
|
||
{
|
||
"schema_version": "active_domain_contract_v1",
|
||
"status": "active",
|
||
"domain_id": "inventory_stock_supplier_provenance",
|
||
"runtime_domain": "inventory_stock",
|
||
"title": "Warehouse domain",
|
||
"default_analysis_context": {"as_of_date": "2021-09-30"},
|
||
"observed_anchors": {
|
||
"warehouse": "Основной склад",
|
||
"organization": "ООО \\Альтернатива Плюс\\",
|
||
"historical_as_of_date": "2019-03-31",
|
||
"current_as_of_date_example": "2021-09-30",
|
||
"focus_item_historical": "Столешница 600*3050*26 дуб ниагара",
|
||
},
|
||
"question_pool": {
|
||
"questions": [
|
||
{"question_id": "Q01", "node_id": "N01_stock_snapshot", "text": "Q1"},
|
||
{"question_id": "Q19", "node_id": "N03_selected_item_supplier", "text": "Q19"},
|
||
]
|
||
},
|
||
"scenario_tree": {
|
||
"critical_edges": [
|
||
{
|
||
"edge_id": "E01_snapshot_to_selected_item_supplier",
|
||
"from_node": "N01_stock_snapshot",
|
||
"to_node": "N03_selected_item_supplier",
|
||
"primary_user_path": True,
|
||
}
|
||
]
|
||
},
|
||
"orchestration_pack": {
|
||
"pack_id": "inventory_active_contract_smoke",
|
||
"scenarios": [
|
||
{
|
||
"scenario_id": "inventory_selected_item_provenance",
|
||
"title": "Selected item provenance",
|
||
"question_ids": ["Q01", "Q19"],
|
||
"steps": [
|
||
{
|
||
"step_id": "step_01_snapshot",
|
||
"question_id": "Q01",
|
||
"node_id": "N01_stock_snapshot",
|
||
"question": "Какие товары сейчас лежат на складе",
|
||
},
|
||
{
|
||
"step_id": "step_02_supplier",
|
||
"question_id": "Q19",
|
||
"node_id": "N03_selected_item_supplier",
|
||
"question": "По выбранному объекту \"Столешница 600*3050*26 дуб ниагара\": кто это поставил нам",
|
||
},
|
||
],
|
||
}
|
||
],
|
||
},
|
||
},
|
||
ensure_ascii=False,
|
||
indent=2,
|
||
)
|
||
+ "\n",
|
||
encoding="utf-8",
|
||
)
|
||
|
||
pack = load_scenario_pack(manifest_path)
|
||
|
||
assert pack["schema_version"] == "domain_scenario_pack_v1"
|
||
assert pack["source_schema_version"] == "active_domain_contract_v1"
|
||
assert pack["domain"] == "inventory_stock"
|
||
assert pack["bindings"]["observed_warehouse"] == "Основной склад"
|
||
assert pack["bindings"]["focus_item_historical"] == "Столешница 600*3050*26 дуб ниагара"
|
||
assert pack["scenarios"][0]["question_ids"] == ["Q01", "Q19"]
|
||
assert pack["scenarios"][0]["steps"][1]["question_id"] == "Q19"
|
||
|
||
|
||
def test_load_scenario_pack_enriches_step_with_node_contract_defaults(tmp_path) -> None:
|
||
manifest_path = tmp_path / "active_domain_contract.json"
|
||
manifest_path.write_text(
|
||
json.dumps(
|
||
{
|
||
"schema_version": "active_domain_contract_v1",
|
||
"status": "active",
|
||
"domain_id": "inventory_stock_supplier_provenance",
|
||
"runtime_domain": "inventory_stock",
|
||
"title": "Warehouse domain",
|
||
"question_pool": {
|
||
"questions": [
|
||
{"question_id": "Q19", "node_id": "N03_selected_item_supplier", "text": "Q19"},
|
||
]
|
||
},
|
||
"scenario_tree": {
|
||
"critical_nodes": [
|
||
{
|
||
"node_id": "N03_selected_item_supplier",
|
||
"expected_intents": ["inventory_purchase_provenance_for_item"],
|
||
"expected_answer_shape": "direct_supplier_answer_first_then_evidence",
|
||
"required_carryover_invariants": ["focus_object", "date_scope"],
|
||
}
|
||
]
|
||
},
|
||
"orchestration_pack": {
|
||
"pack_id": "inventory_active_contract_smoke",
|
||
"scenarios": [
|
||
{
|
||
"scenario_id": "inventory_selected_item_provenance",
|
||
"title": "Selected item provenance",
|
||
"steps": [
|
||
{
|
||
"step_id": "step_02_supplier",
|
||
"question_id": "Q19",
|
||
"node_id": "N03_selected_item_supplier",
|
||
"question": "По выбранному объекту \"...\": кто это поставил нам",
|
||
}
|
||
],
|
||
}
|
||
],
|
||
},
|
||
},
|
||
ensure_ascii=False,
|
||
indent=2,
|
||
)
|
||
+ "\n",
|
||
encoding="utf-8",
|
||
)
|
||
|
||
pack = load_scenario_pack(manifest_path)
|
||
step = pack["scenarios"][0]["steps"][0]
|
||
|
||
assert step["expected_intents"] == ["inventory_purchase_provenance_for_item"]
|
||
assert step["required_answer_shape"] == "direct_supplier_answer_first_then_evidence"
|
||
assert "focus_object" in step["required_carryover_invariants"]
|
||
assert "date_scope" in step["required_carryover_invariants"]
|
||
|
||
|
||
def test_build_scenario_acceptance_matrix_marks_green_edge_when_covering_scenario_is_accepted() -> None:
|
||
pack = {
|
||
"pack_id": "inventory_active_contract_smoke",
|
||
"domain": "inventory_stock",
|
||
"source_contract": {"domain_id": "inventory_stock_supplier_provenance", "title": "Warehouse domain"},
|
||
"question_pool": {
|
||
"questions": [
|
||
{"question_id": "Q01", "node_id": "N01_stock_snapshot"},
|
||
{"question_id": "Q19", "node_id": "N03_selected_item_supplier"},
|
||
]
|
||
},
|
||
"scenario_tree": {
|
||
"critical_nodes": [
|
||
{
|
||
"node_id": "N03_selected_item_supplier",
|
||
"covers_question_ids": ["Q19"],
|
||
"required_wording_families": ["canonical"],
|
||
}
|
||
],
|
||
"critical_edges": [
|
||
{
|
||
"edge_id": "E01_snapshot_to_selected_item_supplier",
|
||
"from_node": "N01_stock_snapshot",
|
||
"to_node": "N03_selected_item_supplier",
|
||
"primary_user_path": True,
|
||
}
|
||
],
|
||
"primary_user_paths": [
|
||
{"path_id": "P01_snapshot_to_supplier", "nodes": ["N01_stock_snapshot", "N03_selected_item_supplier"]}
|
||
],
|
||
},
|
||
"scenarios": [
|
||
{
|
||
"scenario_id": "inventory_selected_item_provenance",
|
||
"question_ids": ["Q01", "Q19"],
|
||
"steps": [
|
||
{
|
||
"step_id": "step_01_snapshot",
|
||
"question_id": "Q01",
|
||
"node_id": "N01_stock_snapshot",
|
||
"paraphrase_family": "canonical",
|
||
},
|
||
{
|
||
"step_id": "step_02_supplier",
|
||
"question_id": "Q19",
|
||
"node_id": "N03_selected_item_supplier",
|
||
"paraphrase_family": "canonical",
|
||
},
|
||
],
|
||
}
|
||
],
|
||
}
|
||
scenario_results = [
|
||
{
|
||
"scenario_id": "inventory_selected_item_provenance",
|
||
"final_status": "accepted",
|
||
"session_id": "asst-demo",
|
||
"artifact_dir": "artifacts/domain_runs/demo",
|
||
}
|
||
]
|
||
|
||
matrix = build_scenario_acceptance_matrix(pack, scenario_results)
|
||
|
||
assert "E01_snapshot_to_selected_item_supplier" in matrix
|
||
assert "| E01_snapshot_to_selected_item_supplier | green |" in matrix
|
||
assert "| P01_snapshot_to_supplier | green |" in matrix
|
||
|
||
|
||
def test_build_scenario_acceptance_matrix_marks_partial_when_wording_family_is_missing() -> None:
|
||
pack = {
|
||
"pack_id": "inventory_active_contract_smoke",
|
||
"domain": "inventory_stock",
|
||
"source_contract": {"domain_id": "inventory_stock_supplier_provenance", "title": "Warehouse domain"},
|
||
"question_pool": {
|
||
"questions": [
|
||
{"question_id": "Q19", "node_id": "N03_selected_item_supplier"},
|
||
]
|
||
},
|
||
"scenario_tree": {
|
||
"critical_nodes": [
|
||
{
|
||
"node_id": "N03_selected_item_supplier",
|
||
"covers_question_ids": ["Q19"],
|
||
"required_wording_families": ["canonical", "ui_selected_object_colloquial"],
|
||
}
|
||
]
|
||
},
|
||
"scenarios": [
|
||
{
|
||
"scenario_id": "inventory_selected_item_provenance",
|
||
"question_ids": ["Q19"],
|
||
"steps": [
|
||
{
|
||
"step_id": "step_01_supplier",
|
||
"question_id": "Q19",
|
||
"node_id": "N03_selected_item_supplier",
|
||
"paraphrase_family": "canonical",
|
||
}
|
||
],
|
||
}
|
||
],
|
||
}
|
||
scenario_results = [
|
||
{
|
||
"scenario_id": "inventory_selected_item_provenance",
|
||
"final_status": "accepted",
|
||
"session_id": "asst-demo",
|
||
"artifact_dir": "artifacts/domain_runs/demo",
|
||
}
|
||
]
|
||
|
||
matrix = build_scenario_acceptance_matrix(pack, scenario_results)
|
||
|
||
assert "| N03_selected_item_supplier | partial |" in matrix
|
||
assert "missing_wording_families" in matrix
|
||
assert "ui_selected_object_colloquial" in matrix
|
||
|
||
|
||
def test_derive_pack_final_status_downgrades_accepted_when_matrix_contains_partial_coverage() -> None:
|
||
pack = {
|
||
"pack_id": "inventory_active_contract_smoke",
|
||
"domain": "inventory_stock",
|
||
"scenarios": [
|
||
{
|
||
"scenario_id": "inventory_selected_item_provenance",
|
||
"question_ids": ["Q19"],
|
||
"steps": [
|
||
{
|
||
"step_id": "step_01_supplier",
|
||
"question_id": "Q19",
|
||
"node_id": "N03_selected_item_supplier",
|
||
"paraphrase_family": "canonical",
|
||
}
|
||
],
|
||
},
|
||
],
|
||
"scenario_tree": {
|
||
"critical_nodes": [
|
||
{
|
||
"node_id": "N03_selected_item_supplier",
|
||
"covers_question_ids": ["Q19"],
|
||
"required_wording_families": ["canonical", "ui_selected_object_colloquial"],
|
||
}
|
||
]
|
||
},
|
||
"question_pool": {
|
||
"questions": [
|
||
{"question_id": "Q19", "node_id": "N03_selected_item_supplier"},
|
||
]
|
||
},
|
||
}
|
||
scenario_results = [
|
||
{
|
||
"scenario_id": "inventory_selected_item_provenance",
|
||
"final_status": "accepted",
|
||
"session_id": "asst-demo",
|
||
"artifact_dir": "artifacts/domain_runs/demo",
|
||
}
|
||
]
|
||
|
||
assert derive_pack_final_status(pack, scenario_results) == "partial"
|
||
|
||
|
||
def test_evaluate_analyst_gate_requires_temporal_honesty_field_truth_and_layering() -> None:
|
||
verdict = {
|
||
"quality_score": 91,
|
||
"unresolved_p0_count": 0,
|
||
"regression_detected": False,
|
||
"direct_answer_ok": True,
|
||
"business_usefulness_ok": True,
|
||
"temporal_honesty_ok": False,
|
||
"field_truth_ok": True,
|
||
"answer_layering_ok": True,
|
||
"loop_decision": "accepted",
|
||
"requires_user_decision": False,
|
||
"user_decision_type": "none",
|
||
"user_decision_prompt": None,
|
||
}
|
||
|
||
accepted, loop_decision, requires_user_decision, user_decision_type, user_decision_prompt = evaluate_analyst_gate(
|
||
verdict,
|
||
target_score=80,
|
||
)
|
||
|
||
assert accepted is False
|
||
assert loop_decision == "accepted"
|
||
assert requires_user_decision is False
|
||
assert user_decision_type == "none"
|
||
assert user_decision_prompt is None
|
||
|
||
|
||
def test_validate_step_contract_rejects_wrong_month_filter_even_when_execution_is_exact() -> None:
|
||
validated = validate_step_contract(
|
||
{
|
||
"execution_status": "exact",
|
||
"status": "exact",
|
||
"node_role": "root",
|
||
"analysis_context": {"as_of_date": "2016-05-31"},
|
||
"expected_intents": ["inventory_on_hand_as_of_date"],
|
||
"detected_intent": "inventory_on_hand_as_of_date",
|
||
"expected_capability": "confirmed_inventory_on_hand_as_of_date",
|
||
"capability_id": "confirmed_inventory_on_hand_as_of_date",
|
||
"expected_recipe": "address_inventory_on_hand_as_of_date_v1",
|
||
"selected_recipe": "address_inventory_on_hand_as_of_date_v1",
|
||
"expected_result_mode": "confirmed_balance",
|
||
"result_mode": "confirmed_balance",
|
||
"required_filters": {
|
||
"as_of_date": "2016-05-31",
|
||
"period_from": "2016-05-01",
|
||
"period_to": "2016-05-31",
|
||
},
|
||
"required_answer_shape": "item_list_with_quantity_cost_warehouse_organization",
|
||
"required_carryover_invariants": [],
|
||
"required_state_objects": [],
|
||
"forbidden_capabilities": [],
|
||
"forbidden_recipes": [],
|
||
"actual_direct_answer": "На 31.12.2016 на складе подтверждено 4 позиций.",
|
||
"top_non_empty_lines": ["На 31.12.2016 на складе подтверждено 4 позиций."],
|
||
"extracted_filters": {
|
||
"as_of_date": "2016-12-31",
|
||
"period_from": "2016-01-01",
|
||
"period_to": "2016-12-31",
|
||
},
|
||
"date_scope": {"as_of_date": "2016-12-31"},
|
||
"focus_object": None,
|
||
}
|
||
)
|
||
|
||
assert validated["acceptance_status"] == "rejected"
|
||
assert "wrong_as_of_date" in validated["violated_invariants"]
|
||
assert "wrong_period_from" in validated["violated_invariants"]
|
||
assert "wrong_period_to" in validated["violated_invariants"]
|
||
assert validated["hard_fail"] is True
|
||
|
||
|
||
def test_validate_step_contract_rejects_selected_object_followup_without_focus_object_and_with_wrong_route() -> None:
|
||
validated = validate_step_contract(
|
||
{
|
||
"execution_status": "exact",
|
||
"status": "exact",
|
||
"node_role": "critical_child",
|
||
"analysis_context": {"as_of_date": "2019-03-31"},
|
||
"expected_intents": ["inventory_purchase_provenance_for_item"],
|
||
"detected_intent": "inventory_on_hand_as_of_date",
|
||
"expected_capability": "inventory_purchase_provenance_for_item",
|
||
"capability_id": "confirmed_inventory_on_hand_as_of_date",
|
||
"expected_result_mode": "confirmed_balance",
|
||
"result_mode": "confirmed_balance",
|
||
"required_filters": {"as_of_date": "2019-03-31"},
|
||
"required_answer_shape": "direct_supplier_answer_first_then_evidence",
|
||
"required_carryover_invariants": ["focus_object", "date_scope"],
|
||
"required_state_objects": [],
|
||
"forbidden_capabilities": ["confirmed_inventory_on_hand_as_of_date"],
|
||
"forbidden_recipes": ["address_inventory_on_hand_as_of_date_v1"],
|
||
"selected_recipe": "address_inventory_on_hand_as_of_date_v1",
|
||
"actual_direct_answer": "На 31.03.2019 на складе подтверждено 16 позиций.",
|
||
"top_non_empty_lines": ["На 31.03.2019 на складе подтверждено 16 позиций."],
|
||
"extracted_filters": {"as_of_date": "2019-03-31"},
|
||
"date_scope": {"as_of_date": "2019-03-31"},
|
||
"focus_object": None,
|
||
}
|
||
)
|
||
|
||
assert validated["acceptance_status"] == "rejected"
|
||
assert "wrong_intent" in validated["violated_invariants"]
|
||
assert "wrong_followup_action" in validated["violated_invariants"]
|
||
assert "forbidden_capability_selected" in validated["violated_invariants"]
|
||
assert "forbidden_recipe_selected" in validated["violated_invariants"]
|
||
assert "focus_object_missing" in validated["violated_invariants"]
|
||
|
||
|
||
def test_validate_step_contract_rejects_top_level_noise_as_direct_answer() -> None:
|
||
validated = validate_step_contract(
|
||
{
|
||
"execution_status": "exact",
|
||
"status": "exact",
|
||
"node_role": "critical_child",
|
||
"analysis_context": {"as_of_date": "2019-03-31"},
|
||
"expected_intents": ["inventory_purchase_provenance_for_item"],
|
||
"detected_intent": "inventory_purchase_provenance_for_item",
|
||
"expected_capability": "inventory_purchase_provenance_for_item",
|
||
"capability_id": "inventory_inventory_purchase_provenance_for_item",
|
||
"expected_result_mode": "confirmed_balance",
|
||
"result_mode": "confirmed_balance",
|
||
"required_filters": {"as_of_date": "2019-03-31"},
|
||
"required_answer_shape": "direct_supplier_answer_first_then_evidence",
|
||
"required_carryover_invariants": [],
|
||
"required_state_objects": [],
|
||
"forbidden_capabilities": [],
|
||
"forbidden_recipes": [],
|
||
"selected_recipe": "address_inventory_purchase_provenance_for_item_v1",
|
||
"actual_direct_answer": "Статус результата: подтверждено.",
|
||
"top_non_empty_lines": [
|
||
"Статус результата: подтверждено.",
|
||
"Поставщик: Торговый дом \\Союз\\.",
|
||
],
|
||
"extracted_filters": {"as_of_date": "2019-03-31"},
|
||
"date_scope": {"as_of_date": "2019-03-31"},
|
||
"focus_object": {"object_id": "item:1", "label": "Столешница"},
|
||
}
|
||
)
|
||
|
||
assert validated["acceptance_status"] == "rejected"
|
||
assert "direct_answer_missing" in validated["violated_invariants"]
|
||
assert "top_level_noise_present" in validated["violated_invariants"]
|
||
|
||
|
||
def test_build_deterministic_repair_targets_marks_followup_router_gap_as_p0() -> None:
|
||
repair_targets = build_deterministic_repair_targets(
|
||
{"pack_id": "demo_pack", "domain": "inventory_stock", "final_status": "partial"},
|
||
[
|
||
{
|
||
"scenario_id": "inventory_selected_item_provenance",
|
||
"title": "Selected item provenance",
|
||
"artifact_dir": "artifacts/domain_runs/demo/scenarios/inventory_selected_item_provenance",
|
||
"scenario_state": {
|
||
"step_outputs": {
|
||
"step_02_supplier": {
|
||
"step_id": "step_02_supplier",
|
||
"question_resolved": 'По выбранному объекту "Столешница": кто поставил',
|
||
"execution_status": "exact",
|
||
"acceptance_status": "rejected",
|
||
"reply_type": "factual",
|
||
"selected_recipe": "address_inventory_on_hand_as_of_date_v1",
|
||
"capability_id": "confirmed_inventory_on_hand_as_of_date",
|
||
"violated_invariants": [
|
||
"wrong_followup_action",
|
||
"focus_object_missing",
|
||
"forbidden_capability_selected",
|
||
],
|
||
"warnings": [],
|
||
"hard_fail": True,
|
||
}
|
||
}
|
||
},
|
||
}
|
||
],
|
||
)
|
||
|
||
assert repair_targets["target_count"] == 1
|
||
target = repair_targets["targets"][0]
|
||
assert target["severity"] == "P0"
|
||
assert target["problem_type"] == "followup_action_resolution_gap"
|
||
assert "followup_action_resolution_gap" in target["root_cause_layers"]
|
||
assert "object_memory_gap" in target["root_cause_layers"]
|
||
assert "addressIntentResolver.ts" in " ".join(target["candidate_files"])
|
||
|
||
|
||
def test_build_deterministic_repair_targets_marks_anchor_gap_as_p1() -> None:
|
||
repair_targets = build_deterministic_repair_targets(
|
||
{"pack_id": "demo_pack", "domain": "inventory_stock", "final_status": "partial"},
|
||
[
|
||
{
|
||
"scenario_id": "inventory_sale_trace",
|
||
"title": "Sale trace",
|
||
"artifact_dir": "artifacts/domain_runs/demo/scenarios/inventory_sale_trace",
|
||
"scenario_state": {
|
||
"step_outputs": {
|
||
"step_02_selected_item_buyer_ui": {
|
||
"step_id": "step_02_selected_item_buyer_ui",
|
||
"question_resolved": 'По выбранному объекту "Шкаф": кому был продан товар',
|
||
"execution_status": "partial",
|
||
"acceptance_status": "rejected",
|
||
"reply_type": "partial_coverage",
|
||
"fallback_type": "partial",
|
||
"mcp_call_status": "materialized_but_not_anchor_matched",
|
||
"selected_recipe": "address_inventory_sale_trace_for_item_v1",
|
||
"capability_id": "inventory_inventory_sale_trace_for_item",
|
||
"violated_invariants": [],
|
||
"warnings": [],
|
||
"hard_fail": False,
|
||
}
|
||
}
|
||
},
|
||
}
|
||
],
|
||
)
|
||
|
||
assert repair_targets["target_count"] == 1
|
||
target = repair_targets["targets"][0]
|
||
assert target["severity"] == "P1"
|
||
assert target["problem_type"] == "domain_anchor_gap"
|
||
assert target["root_cause_layers"] == ["domain_anchor_gap"]
|
||
assert "addressQueryService.ts" in " ".join(target["candidate_files"])
|
||
|
||
|
||
def test_build_deterministic_repair_targets_prioritizes_high_leverage_focus() -> None:
|
||
repair_targets = build_deterministic_repair_targets(
|
||
{"pack_id": "demo_pack", "domain": "inventory_stock", "final_status": "partial"},
|
||
[
|
||
{
|
||
"scenario_id": "inventory_aging_and_unresolved",
|
||
"title": "Aging and unresolved",
|
||
"artifact_dir": "artifacts/domain_runs/demo/scenarios/inventory_aging_and_unresolved",
|
||
"scenario_state": {
|
||
"step_outputs": {
|
||
"step_05_unresolved_supplier_link": {
|
||
"step_id": "step_05_unresolved_supplier_link",
|
||
"question_resolved": "Какие товары сейчас висят в остатке без понятной привязки к поставщику",
|
||
"execution_status": "exact",
|
||
"acceptance_status": "rejected",
|
||
"reply_type": "factual",
|
||
"selected_recipe": "address_inventory_supplier_stock_overlap_as_of_date_v1",
|
||
"capability_id": "inventory_inventory_supplier_stock_overlap_as_of_date",
|
||
"violated_invariants": [
|
||
"wrong_as_of_date",
|
||
"missing_required_filter",
|
||
"wrong_date_scope_state",
|
||
],
|
||
"warnings": [],
|
||
"hard_fail": True,
|
||
}
|
||
}
|
||
},
|
||
},
|
||
{
|
||
"scenario_id": "inventory_snapshot_roots",
|
||
"title": "Root stock snapshots",
|
||
"artifact_dir": "artifacts/domain_runs/demo/scenarios/inventory_snapshot_roots",
|
||
"scenario_state": {
|
||
"step_outputs": {
|
||
"step_01_stock_now": {
|
||
"step_id": "step_01_stock_now",
|
||
"question_resolved": "Какие товары сейчас лежат на складе",
|
||
"execution_status": "exact",
|
||
"acceptance_status": "rejected",
|
||
"reply_type": "factual",
|
||
"selected_recipe": "address_inventory_on_hand_as_of_date_v1",
|
||
"capability_id": "confirmed_inventory_on_hand_as_of_date",
|
||
"violated_invariants": [
|
||
"wrong_as_of_date",
|
||
"missing_required_filter",
|
||
],
|
||
"warnings": [],
|
||
"hard_fail": True,
|
||
},
|
||
"step_02_stock_on_historical_date": {
|
||
"step_id": "step_02_stock_on_historical_date",
|
||
"question_resolved": "Покажи остатки на складе на март 2019",
|
||
"execution_status": "exact",
|
||
"acceptance_status": "rejected",
|
||
"reply_type": "factual",
|
||
"selected_recipe": "address_inventory_on_hand_as_of_date_v1",
|
||
"capability_id": "confirmed_inventory_on_hand_as_of_date",
|
||
"violated_invariants": [
|
||
"wrong_as_of_date",
|
||
"wrong_period_from",
|
||
"wrong_period_to",
|
||
],
|
||
"warnings": [],
|
||
"hard_fail": True,
|
||
},
|
||
}
|
||
},
|
||
},
|
||
],
|
||
)
|
||
|
||
assert repair_targets["target_count"] == 3
|
||
assert repair_targets["priority_foci"][0]["problem_type"] == "temporal_honesty_gap"
|
||
assert repair_targets["priority_foci"][0]["target_count"] == 2
|
||
assert repair_targets["targets"][0]["problem_type"] == "temporal_honesty_gap"
|
||
assert repair_targets["targets"][0]["repair_focus_rank"] == 1
|
||
|
||
|
||
def test_build_coder_loop_prompt_demands_high_leverage_focus_first(tmp_path) -> None:
|
||
prompt = build_coder_loop_prompt(
|
||
loop_dir=tmp_path / "loop",
|
||
iteration_dir=tmp_path / "loop" / "iterations" / "iteration_00",
|
||
pack_dir=tmp_path / "loop" / "iterations" / "iteration_00" / "pack_output" / "pack_run",
|
||
repair_targets_path=tmp_path / "loop" / "iterations" / "iteration_00" / "pack_output" / "pack_run" / "repair_targets.json",
|
||
repair_targets_json='{"priority_foci":[{"focus_rank":1,"problem_type":"temporal_honesty_gap","target_count":4}]}',
|
||
assigned_focus={"focus_id": "temporal_honesty_gap|addressFilterExtractor.ts", "problem_type": "temporal_honesty_gap"},
|
||
analyst_verdict_path=tmp_path / "loop" / "iterations" / "iteration_00" / "analyst_verdict.json",
|
||
analyst_verdict_json='{"quality_score":56}',
|
||
)
|
||
|
||
assert "highest-leverage repair focus first" in prompt
|
||
assert "patch the narrowest shared layer" in prompt
|
||
assert "single-line collapses" in prompt
|
||
assert "mandatory for this iteration" in prompt
|
||
assert "temporal_honesty_gap|addressFilterExtractor.ts" in prompt
|
||
|
||
|
||
def test_select_primary_repair_focus_returns_top_priority_focus() -> None:
|
||
focus = select_primary_repair_focus(
|
||
{
|
||
"priority_foci": [
|
||
{"focus_id": "focus-1", "focus_rank": 1},
|
||
{"focus_id": "focus-2", "focus_rank": 2},
|
||
]
|
||
}
|
||
)
|
||
|
||
assert focus == {"focus_id": "focus-1", "focus_rank": 1}
|
||
|
||
|
||
def test_build_coder_snapshot_paths_collects_candidate_files_once(tmp_path) -> None:
|
||
repo_root = tmp_path
|
||
file_a = repo_root / "llm_normalizer/backend/src/services/addressFilterExtractor.ts"
|
||
file_b = repo_root / "llm_normalizer/backend/src/services/address_runtime/decomposeStage.ts"
|
||
file_a.parent.mkdir(parents=True, exist_ok=True)
|
||
file_b.parent.mkdir(parents=True, exist_ok=True)
|
||
file_a.write_text("line1\nline2\n", encoding="utf-8")
|
||
file_b.write_text("line1\nline2\n", encoding="utf-8")
|
||
|
||
original_repo_root = sys.modules["scripts.domain_case_loop"].REPO_ROOT
|
||
sys.modules["scripts.domain_case_loop"].REPO_ROOT = repo_root
|
||
try:
|
||
paths = build_coder_snapshot_paths(
|
||
{
|
||
"priority_foci": [
|
||
{
|
||
"candidate_files": [
|
||
"llm_normalizer/backend/src/services/addressFilterExtractor.ts",
|
||
"llm_normalizer/backend/src/services/address_runtime/decomposeStage.ts",
|
||
]
|
||
}
|
||
],
|
||
"targets": [
|
||
{
|
||
"candidate_files": [
|
||
"llm_normalizer/backend/src/services/addressFilterExtractor.ts",
|
||
"../outside.ts",
|
||
]
|
||
}
|
||
],
|
||
}
|
||
)
|
||
finally:
|
||
sys.modules["scripts.domain_case_loop"].REPO_ROOT = original_repo_root
|
||
|
||
assert paths == [file_a, file_b]
|
||
|
||
|
||
def test_restore_line_collapsed_files_from_snapshot_recovers_original_text(tmp_path) -> None:
|
||
sample = tmp_path / "sample.ts"
|
||
original = "const a = 1;\nconst b = 2;\n"
|
||
sample.write_text(original, encoding="utf-8")
|
||
snapshots = snapshot_coder_candidate_files([sample])
|
||
sample.write_text("const a = 1;const b = 2;", encoding="utf-8")
|
||
|
||
original_repo_root = sys.modules["scripts.domain_case_loop"].REPO_ROOT
|
||
sys.modules["scripts.domain_case_loop"].REPO_ROOT = tmp_path
|
||
try:
|
||
restored = restore_line_collapsed_files_from_snapshot(snapshots)
|
||
finally:
|
||
sys.modules["scripts.domain_case_loop"].REPO_ROOT = original_repo_root
|
||
|
||
assert restored == ["sample.ts"]
|
||
assert sample.read_text(encoding="utf-8") == original
|
||
|
||
|
||
def test_restore_line_collapsed_files_from_snapshot_keeps_semantic_changes(tmp_path) -> None:
|
||
sample = tmp_path / "sample.ts"
|
||
original = "const a = 1;\nconst b = 2;\n"
|
||
sample.write_text(original, encoding="utf-8")
|
||
snapshots = snapshot_coder_candidate_files([sample])
|
||
sample.write_text("const a = 1;const b = 3;", encoding="utf-8")
|
||
|
||
original_repo_root = sys.modules["scripts.domain_case_loop"].REPO_ROOT
|
||
sys.modules["scripts.domain_case_loop"].REPO_ROOT = tmp_path
|
||
try:
|
||
restored = restore_line_collapsed_files_from_snapshot(snapshots)
|
||
finally:
|
||
sys.modules["scripts.domain_case_loop"].REPO_ROOT = original_repo_root
|
||
|
||
assert restored == []
|
||
assert sample.read_text(encoding="utf-8") == "const a = 1;const b = 3;"
|
||
|
||
|
||
def test_evaluate_deterministic_loop_gate_rejects_partial_pack_even_without_targets() -> None:
|
||
gate_ok, reason = evaluate_deterministic_loop_gate(
|
||
{"final_status": "partial"},
|
||
{"severity_counts": {"P0": 0, "P1": 0}},
|
||
)
|
||
|
||
assert gate_ok is False
|
||
assert reason == "pack_final_status=partial"
|
||
|
||
|
||
def test_evaluate_deterministic_loop_gate_rejects_remaining_p1_targets() -> None:
|
||
gate_ok, reason = evaluate_deterministic_loop_gate(
|
||
{"final_status": "accepted"},
|
||
{"severity_counts": {"P0": 0, "P1": 2}},
|
||
)
|
||
|
||
assert gate_ok is False
|
||
assert reason == "repair_targets_remaining=P0:0,P1:2"
|
||
|
||
|
||
def test_evaluate_deterministic_loop_gate_accepts_clean_pack_without_remaining_p0_p1() -> None:
|
||
gate_ok, reason = evaluate_deterministic_loop_gate(
|
||
{"final_status": "accepted"},
|
||
{"severity_counts": {"P0": 0, "P1": 0, "warning": 1}},
|
||
)
|
||
|
||
assert gate_ok is True
|
||
assert reason == "deterministic_gate_passed"
|
||
|
||
|
||
def test_build_scenario_step_state_uses_effective_analysis_context_from_turn_artifact() -> None:
|
||
step_state = build_scenario_step_state(
|
||
scenario_id="inventory_snapshot_roots",
|
||
domain="inventory_stock",
|
||
step={
|
||
"step_id": "step_03_account_41_now",
|
||
"title": "Account 41 current composition",
|
||
"depends_on": [],
|
||
"question_template": "Из каких товаров состоит остаток по 41 счету",
|
||
"analysis_context": {},
|
||
"expected_intents": ["inventory_on_hand_as_of_date"],
|
||
"expected_capability": "confirmed_inventory_on_hand_as_of_date",
|
||
"expected_recipe": None,
|
||
"expected_result_mode": "confirmed_balance",
|
||
"required_filters": {
|
||
"period_from": "2021-09-01",
|
||
"period_to": "2021-09-30",
|
||
},
|
||
"forbidden_capabilities": [],
|
||
"forbidden_recipes": [],
|
||
"required_state_objects": [],
|
||
"required_answer_shape": "item_list_with_account_41_scope",
|
||
"forbidden_answer_patterns": [],
|
||
"required_carryover_invariants": [],
|
||
"invariant_severity": {},
|
||
},
|
||
step_index=3,
|
||
question_resolved="Из каких товаров состоит остаток по 41 счету",
|
||
analysis_context={"as_of_date": "2021-09-30", "source": "scenario_manifest"},
|
||
turn_artifact={
|
||
"scenario": {
|
||
"analysis_context": {
|
||
"as_of_date": "2021-09-30",
|
||
"source": "scenario_manifest",
|
||
}
|
||
},
|
||
"assistant_message": {
|
||
"reply_type": "factual",
|
||
"text": "На 31.03.2019 на складе подтверждено 16 позиций.",
|
||
},
|
||
"technical_debug_payload": {
|
||
"detected_mode": "address_query",
|
||
"detected_intent": "inventory_on_hand_as_of_date",
|
||
"selected_recipe": "address_inventory_on_hand_as_of_date_v1",
|
||
"capability_id": "confirmed_inventory_on_hand_as_of_date",
|
||
"capability_route_mode": "exact",
|
||
"route_expectation_status": "matched",
|
||
"result_mode": "confirmed_balance",
|
||
"response_type": "FACTUAL_LIST",
|
||
"extracted_filters": {
|
||
"as_of_date": "2019-03-31",
|
||
"period_from": "2019-03-01",
|
||
"period_to": "2019-03-31",
|
||
},
|
||
"fallback_type": "none",
|
||
"mcp_call_status": "matched_non_empty",
|
||
"balance_confirmed": True,
|
||
},
|
||
"session_summary": {
|
||
"address_navigation_state": {
|
||
"session_context": {
|
||
"date_scope": {
|
||
"as_of_date": "2019-03-31",
|
||
"period_from": "2019-03-01",
|
||
"period_to": "2019-03-31",
|
||
}
|
||
}
|
||
}
|
||
},
|
||
},
|
||
entries=[],
|
||
)
|
||
|
||
assert step_state["analysis_context"]["as_of_date"] == "2021-09-30"
|
||
assert "wrong_as_of_date" in step_state["violated_invariants"]
|
||
assert "wrong_period_from" in step_state["violated_invariants"]
|
||
assert "wrong_period_to" in step_state["violated_invariants"]
|
||
assert step_state["acceptance_status"] == "rejected"
|