NODEDC_1C/tests/test_domain_case_loop.py

944 lines
40 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
import json
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
from scripts.domain_case_loop import (
build_coder_loop_prompt,
build_coder_snapshot_paths,
build_deterministic_repair_targets,
build_scenario_step_state,
build_scenario_acceptance_matrix,
carry_forward_analysis_context,
derive_pack_final_status,
evaluate_analyst_gate,
evaluate_deterministic_loop_gate,
load_scenario_pack,
load_shared_local_llm_defaults,
merge_scenario_date_scope,
select_primary_repair_focus,
restore_line_collapsed_files_from_snapshot,
snapshot_coder_candidate_files,
validate_step_contract,
)
def test_carry_forward_analysis_context_preserves_followup_anchor() -> None:
scenario_state = {
"semantic_memory": {
"date_scope": {"as_of_date": "2020-03-31"},
}
}
analysis_context = {"as_of_date": "2026-04-13", "source": "current_analysis"}
carried = carry_forward_analysis_context(scenario_state, analysis_context)
assert carried["as_of_date"] == "2026-04-13"
assert carried["source"] == "current_analysis"
def test_carry_forward_analysis_context_fills_missing_anchor() -> None:
scenario_state = {
"semantic_memory": {
"date_scope": {"as_of_date": "2020-03-31"},
}
}
carried = carry_forward_analysis_context(scenario_state, {})
assert carried["as_of_date"] == "2020-03-31"
assert carried["source"] == "scenario_state_carryover"
def test_merge_scenario_date_scope_preserves_historical_anchor_on_followup() -> None:
previous_date_scope = {"as_of_date": "2020-03-31", "source": "exact_anchor"}
current_date_scope = {"as_of_date": "2026-04-13", "source": "current_analysis"}
merged = merge_scenario_date_scope(
previous_date_scope,
current_date_scope,
depends_on=["step_01_anchor"],
)
assert merged["as_of_date"] == "2020-03-31"
assert merged["source"] == "current_analysis"
def test_load_shared_local_llm_defaults_uses_ui_selected_local_model(tmp_path) -> None:
config_path = tmp_path / "shared_llm_connection.json"
config_path.write_text(
json.dumps(
{
"schema_version": "shared_llm_connection_v1",
"updated_at": "2026-04-15T06:00:00Z",
"connection": {
"llmProvider": "local",
"model": "unsloth/qwen3-30b-a3b-instruct-2507",
"baseUrl": "http://127.0.0.1:1234/v1",
"temperature": 0.2,
"maxOutputTokens": 1200,
},
},
ensure_ascii=False,
indent=2,
)
+ "\n",
encoding="utf-8",
)
defaults = load_shared_local_llm_defaults(config_path)
assert defaults["llm_provider"] == "local"
assert defaults["llm_model"] == "unsloth/qwen3-30b-a3b-instruct-2507"
assert defaults["llm_base_url"] == "http://127.0.0.1:1234/v1"
assert defaults["temperature"] == 0.2
assert defaults["max_output_tokens"] == 1200
def test_load_shared_local_llm_defaults_ignores_non_local_provider(tmp_path) -> None:
config_path = tmp_path / "shared_llm_connection.json"
config_path.write_text(
json.dumps(
{
"schema_version": "shared_llm_connection_v1",
"updated_at": "2026-04-15T06:00:00Z",
"connection": {
"llmProvider": "openai",
"model": "gpt-4o-mini",
"baseUrl": "https://api.openai.com/v1",
"temperature": 0,
"maxOutputTokens": 700,
},
},
ensure_ascii=False,
indent=2,
)
+ "\n",
encoding="utf-8",
)
defaults = load_shared_local_llm_defaults(config_path)
assert defaults["llm_provider"] == "local"
assert defaults["llm_model"] == "qwen2.5-14b-instruct-1m"
def test_load_scenario_pack_accepts_active_domain_contract(tmp_path) -> None:
manifest_path = tmp_path / "active_domain_contract.json"
manifest_path.write_text(
json.dumps(
{
"schema_version": "active_domain_contract_v1",
"status": "active",
"domain_id": "inventory_stock_supplier_provenance",
"runtime_domain": "inventory_stock",
"title": "Warehouse domain",
"default_analysis_context": {"as_of_date": "2021-09-30"},
"observed_anchors": {
"warehouse": "Основной склад",
"organization": "ООО \\Альтернатива Плюс\\",
"historical_as_of_date": "2019-03-31",
"current_as_of_date_example": "2021-09-30",
"focus_item_historical": "Столешница 600*3050*26 дуб ниагара",
},
"question_pool": {
"questions": [
{"question_id": "Q01", "node_id": "N01_stock_snapshot", "text": "Q1"},
{"question_id": "Q19", "node_id": "N03_selected_item_supplier", "text": "Q19"},
]
},
"scenario_tree": {
"critical_edges": [
{
"edge_id": "E01_snapshot_to_selected_item_supplier",
"from_node": "N01_stock_snapshot",
"to_node": "N03_selected_item_supplier",
"primary_user_path": True,
}
]
},
"orchestration_pack": {
"pack_id": "inventory_active_contract_smoke",
"scenarios": [
{
"scenario_id": "inventory_selected_item_provenance",
"title": "Selected item provenance",
"question_ids": ["Q01", "Q19"],
"steps": [
{
"step_id": "step_01_snapshot",
"question_id": "Q01",
"node_id": "N01_stock_snapshot",
"question": "Какие товары сейчас лежат на складе",
},
{
"step_id": "step_02_supplier",
"question_id": "Q19",
"node_id": "N03_selected_item_supplier",
"question": "По выбранному объекту \"Столешница 600*3050*26 дуб ниагара\": кто это поставил нам",
},
],
}
],
},
},
ensure_ascii=False,
indent=2,
)
+ "\n",
encoding="utf-8",
)
pack = load_scenario_pack(manifest_path)
assert pack["schema_version"] == "domain_scenario_pack_v1"
assert pack["source_schema_version"] == "active_domain_contract_v1"
assert pack["domain"] == "inventory_stock"
assert pack["bindings"]["observed_warehouse"] == "Основной склад"
assert pack["bindings"]["focus_item_historical"] == "Столешница 600*3050*26 дуб ниагара"
assert pack["scenarios"][0]["question_ids"] == ["Q01", "Q19"]
assert pack["scenarios"][0]["steps"][1]["question_id"] == "Q19"
def test_load_scenario_pack_enriches_step_with_node_contract_defaults(tmp_path) -> None:
manifest_path = tmp_path / "active_domain_contract.json"
manifest_path.write_text(
json.dumps(
{
"schema_version": "active_domain_contract_v1",
"status": "active",
"domain_id": "inventory_stock_supplier_provenance",
"runtime_domain": "inventory_stock",
"title": "Warehouse domain",
"question_pool": {
"questions": [
{"question_id": "Q19", "node_id": "N03_selected_item_supplier", "text": "Q19"},
]
},
"scenario_tree": {
"critical_nodes": [
{
"node_id": "N03_selected_item_supplier",
"expected_intents": ["inventory_purchase_provenance_for_item"],
"expected_answer_shape": "direct_supplier_answer_first_then_evidence",
"required_carryover_invariants": ["focus_object", "date_scope"],
}
]
},
"orchestration_pack": {
"pack_id": "inventory_active_contract_smoke",
"scenarios": [
{
"scenario_id": "inventory_selected_item_provenance",
"title": "Selected item provenance",
"steps": [
{
"step_id": "step_02_supplier",
"question_id": "Q19",
"node_id": "N03_selected_item_supplier",
"question": "По выбранному объекту \"...\": кто это поставил нам",
}
],
}
],
},
},
ensure_ascii=False,
indent=2,
)
+ "\n",
encoding="utf-8",
)
pack = load_scenario_pack(manifest_path)
step = pack["scenarios"][0]["steps"][0]
assert step["expected_intents"] == ["inventory_purchase_provenance_for_item"]
assert step["required_answer_shape"] == "direct_supplier_answer_first_then_evidence"
assert "focus_object" in step["required_carryover_invariants"]
assert "date_scope" in step["required_carryover_invariants"]
def test_build_scenario_acceptance_matrix_marks_green_edge_when_covering_scenario_is_accepted() -> None:
pack = {
"pack_id": "inventory_active_contract_smoke",
"domain": "inventory_stock",
"source_contract": {"domain_id": "inventory_stock_supplier_provenance", "title": "Warehouse domain"},
"question_pool": {
"questions": [
{"question_id": "Q01", "node_id": "N01_stock_snapshot"},
{"question_id": "Q19", "node_id": "N03_selected_item_supplier"},
]
},
"scenario_tree": {
"critical_nodes": [
{
"node_id": "N03_selected_item_supplier",
"covers_question_ids": ["Q19"],
"required_wording_families": ["canonical"],
}
],
"critical_edges": [
{
"edge_id": "E01_snapshot_to_selected_item_supplier",
"from_node": "N01_stock_snapshot",
"to_node": "N03_selected_item_supplier",
"primary_user_path": True,
}
],
"primary_user_paths": [
{"path_id": "P01_snapshot_to_supplier", "nodes": ["N01_stock_snapshot", "N03_selected_item_supplier"]}
],
},
"scenarios": [
{
"scenario_id": "inventory_selected_item_provenance",
"question_ids": ["Q01", "Q19"],
"steps": [
{
"step_id": "step_01_snapshot",
"question_id": "Q01",
"node_id": "N01_stock_snapshot",
"paraphrase_family": "canonical",
},
{
"step_id": "step_02_supplier",
"question_id": "Q19",
"node_id": "N03_selected_item_supplier",
"paraphrase_family": "canonical",
},
],
}
],
}
scenario_results = [
{
"scenario_id": "inventory_selected_item_provenance",
"final_status": "accepted",
"session_id": "asst-demo",
"artifact_dir": "artifacts/domain_runs/demo",
}
]
matrix = build_scenario_acceptance_matrix(pack, scenario_results)
assert "E01_snapshot_to_selected_item_supplier" in matrix
assert "| E01_snapshot_to_selected_item_supplier | green |" in matrix
assert "| P01_snapshot_to_supplier | green |" in matrix
def test_build_scenario_acceptance_matrix_marks_partial_when_wording_family_is_missing() -> None:
pack = {
"pack_id": "inventory_active_contract_smoke",
"domain": "inventory_stock",
"source_contract": {"domain_id": "inventory_stock_supplier_provenance", "title": "Warehouse domain"},
"question_pool": {
"questions": [
{"question_id": "Q19", "node_id": "N03_selected_item_supplier"},
]
},
"scenario_tree": {
"critical_nodes": [
{
"node_id": "N03_selected_item_supplier",
"covers_question_ids": ["Q19"],
"required_wording_families": ["canonical", "ui_selected_object_colloquial"],
}
]
},
"scenarios": [
{
"scenario_id": "inventory_selected_item_provenance",
"question_ids": ["Q19"],
"steps": [
{
"step_id": "step_01_supplier",
"question_id": "Q19",
"node_id": "N03_selected_item_supplier",
"paraphrase_family": "canonical",
}
],
}
],
}
scenario_results = [
{
"scenario_id": "inventory_selected_item_provenance",
"final_status": "accepted",
"session_id": "asst-demo",
"artifact_dir": "artifacts/domain_runs/demo",
}
]
matrix = build_scenario_acceptance_matrix(pack, scenario_results)
assert "| N03_selected_item_supplier | partial |" in matrix
assert "missing_wording_families" in matrix
assert "ui_selected_object_colloquial" in matrix
def test_derive_pack_final_status_downgrades_accepted_when_matrix_contains_partial_coverage() -> None:
pack = {
"pack_id": "inventory_active_contract_smoke",
"domain": "inventory_stock",
"scenarios": [
{
"scenario_id": "inventory_selected_item_provenance",
"question_ids": ["Q19"],
"steps": [
{
"step_id": "step_01_supplier",
"question_id": "Q19",
"node_id": "N03_selected_item_supplier",
"paraphrase_family": "canonical",
}
],
},
],
"scenario_tree": {
"critical_nodes": [
{
"node_id": "N03_selected_item_supplier",
"covers_question_ids": ["Q19"],
"required_wording_families": ["canonical", "ui_selected_object_colloquial"],
}
]
},
"question_pool": {
"questions": [
{"question_id": "Q19", "node_id": "N03_selected_item_supplier"},
]
},
}
scenario_results = [
{
"scenario_id": "inventory_selected_item_provenance",
"final_status": "accepted",
"session_id": "asst-demo",
"artifact_dir": "artifacts/domain_runs/demo",
}
]
assert derive_pack_final_status(pack, scenario_results) == "partial"
def test_evaluate_analyst_gate_requires_temporal_honesty_field_truth_and_layering() -> None:
verdict = {
"quality_score": 91,
"unresolved_p0_count": 0,
"regression_detected": False,
"direct_answer_ok": True,
"business_usefulness_ok": True,
"temporal_honesty_ok": False,
"field_truth_ok": True,
"answer_layering_ok": True,
"loop_decision": "accepted",
"requires_user_decision": False,
"user_decision_type": "none",
"user_decision_prompt": None,
}
accepted, loop_decision, requires_user_decision, user_decision_type, user_decision_prompt = evaluate_analyst_gate(
verdict,
target_score=80,
)
assert accepted is False
assert loop_decision == "accepted"
assert requires_user_decision is False
assert user_decision_type == "none"
assert user_decision_prompt is None
def test_validate_step_contract_rejects_wrong_month_filter_even_when_execution_is_exact() -> None:
validated = validate_step_contract(
{
"execution_status": "exact",
"status": "exact",
"node_role": "root",
"analysis_context": {"as_of_date": "2016-05-31"},
"expected_intents": ["inventory_on_hand_as_of_date"],
"detected_intent": "inventory_on_hand_as_of_date",
"expected_capability": "confirmed_inventory_on_hand_as_of_date",
"capability_id": "confirmed_inventory_on_hand_as_of_date",
"expected_recipe": "address_inventory_on_hand_as_of_date_v1",
"selected_recipe": "address_inventory_on_hand_as_of_date_v1",
"expected_result_mode": "confirmed_balance",
"result_mode": "confirmed_balance",
"required_filters": {
"as_of_date": "2016-05-31",
"period_from": "2016-05-01",
"period_to": "2016-05-31",
},
"required_answer_shape": "item_list_with_quantity_cost_warehouse_organization",
"required_carryover_invariants": [],
"required_state_objects": [],
"forbidden_capabilities": [],
"forbidden_recipes": [],
"actual_direct_answer": "На 31.12.2016 на складе подтверждено 4 позиций.",
"top_non_empty_lines": ["На 31.12.2016 на складе подтверждено 4 позиций."],
"extracted_filters": {
"as_of_date": "2016-12-31",
"period_from": "2016-01-01",
"period_to": "2016-12-31",
},
"date_scope": {"as_of_date": "2016-12-31"},
"focus_object": None,
}
)
assert validated["acceptance_status"] == "rejected"
assert "wrong_as_of_date" in validated["violated_invariants"]
assert "wrong_period_from" in validated["violated_invariants"]
assert "wrong_period_to" in validated["violated_invariants"]
assert validated["hard_fail"] is True
def test_validate_step_contract_rejects_selected_object_followup_without_focus_object_and_with_wrong_route() -> None:
validated = validate_step_contract(
{
"execution_status": "exact",
"status": "exact",
"node_role": "critical_child",
"analysis_context": {"as_of_date": "2019-03-31"},
"expected_intents": ["inventory_purchase_provenance_for_item"],
"detected_intent": "inventory_on_hand_as_of_date",
"expected_capability": "inventory_purchase_provenance_for_item",
"capability_id": "confirmed_inventory_on_hand_as_of_date",
"expected_result_mode": "confirmed_balance",
"result_mode": "confirmed_balance",
"required_filters": {"as_of_date": "2019-03-31"},
"required_answer_shape": "direct_supplier_answer_first_then_evidence",
"required_carryover_invariants": ["focus_object", "date_scope"],
"required_state_objects": [],
"forbidden_capabilities": ["confirmed_inventory_on_hand_as_of_date"],
"forbidden_recipes": ["address_inventory_on_hand_as_of_date_v1"],
"selected_recipe": "address_inventory_on_hand_as_of_date_v1",
"actual_direct_answer": "На 31.03.2019 на складе подтверждено 16 позиций.",
"top_non_empty_lines": ["На 31.03.2019 на складе подтверждено 16 позиций."],
"extracted_filters": {"as_of_date": "2019-03-31"},
"date_scope": {"as_of_date": "2019-03-31"},
"focus_object": None,
}
)
assert validated["acceptance_status"] == "rejected"
assert "wrong_intent" in validated["violated_invariants"]
assert "wrong_followup_action" in validated["violated_invariants"]
assert "forbidden_capability_selected" in validated["violated_invariants"]
assert "forbidden_recipe_selected" in validated["violated_invariants"]
assert "focus_object_missing" in validated["violated_invariants"]
def test_validate_step_contract_rejects_top_level_noise_as_direct_answer() -> None:
validated = validate_step_contract(
{
"execution_status": "exact",
"status": "exact",
"node_role": "critical_child",
"analysis_context": {"as_of_date": "2019-03-31"},
"expected_intents": ["inventory_purchase_provenance_for_item"],
"detected_intent": "inventory_purchase_provenance_for_item",
"expected_capability": "inventory_purchase_provenance_for_item",
"capability_id": "inventory_inventory_purchase_provenance_for_item",
"expected_result_mode": "confirmed_balance",
"result_mode": "confirmed_balance",
"required_filters": {"as_of_date": "2019-03-31"},
"required_answer_shape": "direct_supplier_answer_first_then_evidence",
"required_carryover_invariants": [],
"required_state_objects": [],
"forbidden_capabilities": [],
"forbidden_recipes": [],
"selected_recipe": "address_inventory_purchase_provenance_for_item_v1",
"actual_direct_answer": "Статус результата: подтверждено.",
"top_non_empty_lines": [
"Статус результата: подтверждено.",
"Поставщик: Торговый дом \\Союз\\.",
],
"extracted_filters": {"as_of_date": "2019-03-31"},
"date_scope": {"as_of_date": "2019-03-31"},
"focus_object": {"object_id": "item:1", "label": "Столешница"},
}
)
assert validated["acceptance_status"] == "rejected"
assert "direct_answer_missing" in validated["violated_invariants"]
assert "top_level_noise_present" in validated["violated_invariants"]
def test_build_deterministic_repair_targets_marks_followup_router_gap_as_p0() -> None:
repair_targets = build_deterministic_repair_targets(
{"pack_id": "demo_pack", "domain": "inventory_stock", "final_status": "partial"},
[
{
"scenario_id": "inventory_selected_item_provenance",
"title": "Selected item provenance",
"artifact_dir": "artifacts/domain_runs/demo/scenarios/inventory_selected_item_provenance",
"scenario_state": {
"step_outputs": {
"step_02_supplier": {
"step_id": "step_02_supplier",
"question_resolved": 'По выбранному объекту "Столешница": кто поставил',
"execution_status": "exact",
"acceptance_status": "rejected",
"reply_type": "factual",
"selected_recipe": "address_inventory_on_hand_as_of_date_v1",
"capability_id": "confirmed_inventory_on_hand_as_of_date",
"violated_invariants": [
"wrong_followup_action",
"focus_object_missing",
"forbidden_capability_selected",
],
"warnings": [],
"hard_fail": True,
}
}
},
}
],
)
assert repair_targets["target_count"] == 1
target = repair_targets["targets"][0]
assert target["severity"] == "P0"
assert target["problem_type"] == "followup_action_resolution_gap"
assert "followup_action_resolution_gap" in target["root_cause_layers"]
assert "object_memory_gap" in target["root_cause_layers"]
assert "addressIntentResolver.ts" in " ".join(target["candidate_files"])
def test_build_deterministic_repair_targets_marks_anchor_gap_as_p1() -> None:
repair_targets = build_deterministic_repair_targets(
{"pack_id": "demo_pack", "domain": "inventory_stock", "final_status": "partial"},
[
{
"scenario_id": "inventory_sale_trace",
"title": "Sale trace",
"artifact_dir": "artifacts/domain_runs/demo/scenarios/inventory_sale_trace",
"scenario_state": {
"step_outputs": {
"step_02_selected_item_buyer_ui": {
"step_id": "step_02_selected_item_buyer_ui",
"question_resolved": 'По выбранному объекту "Шкаф": кому был продан товар',
"execution_status": "partial",
"acceptance_status": "rejected",
"reply_type": "partial_coverage",
"fallback_type": "partial",
"mcp_call_status": "materialized_but_not_anchor_matched",
"selected_recipe": "address_inventory_sale_trace_for_item_v1",
"capability_id": "inventory_inventory_sale_trace_for_item",
"violated_invariants": [],
"warnings": [],
"hard_fail": False,
}
}
},
}
],
)
assert repair_targets["target_count"] == 1
target = repair_targets["targets"][0]
assert target["severity"] == "P1"
assert target["problem_type"] == "domain_anchor_gap"
assert target["root_cause_layers"] == ["domain_anchor_gap"]
assert "addressQueryService.ts" in " ".join(target["candidate_files"])
def test_build_deterministic_repair_targets_prioritizes_high_leverage_focus() -> None:
repair_targets = build_deterministic_repair_targets(
{"pack_id": "demo_pack", "domain": "inventory_stock", "final_status": "partial"},
[
{
"scenario_id": "inventory_aging_and_unresolved",
"title": "Aging and unresolved",
"artifact_dir": "artifacts/domain_runs/demo/scenarios/inventory_aging_and_unresolved",
"scenario_state": {
"step_outputs": {
"step_05_unresolved_supplier_link": {
"step_id": "step_05_unresolved_supplier_link",
"question_resolved": "Какие товары сейчас висят в остатке без понятной привязки к поставщику",
"execution_status": "exact",
"acceptance_status": "rejected",
"reply_type": "factual",
"selected_recipe": "address_inventory_supplier_stock_overlap_as_of_date_v1",
"capability_id": "inventory_inventory_supplier_stock_overlap_as_of_date",
"violated_invariants": [
"wrong_as_of_date",
"missing_required_filter",
"wrong_date_scope_state",
],
"warnings": [],
"hard_fail": True,
}
}
},
},
{
"scenario_id": "inventory_snapshot_roots",
"title": "Root stock snapshots",
"artifact_dir": "artifacts/domain_runs/demo/scenarios/inventory_snapshot_roots",
"scenario_state": {
"step_outputs": {
"step_01_stock_now": {
"step_id": "step_01_stock_now",
"question_resolved": "Какие товары сейчас лежат на складе",
"execution_status": "exact",
"acceptance_status": "rejected",
"reply_type": "factual",
"selected_recipe": "address_inventory_on_hand_as_of_date_v1",
"capability_id": "confirmed_inventory_on_hand_as_of_date",
"violated_invariants": [
"wrong_as_of_date",
"missing_required_filter",
],
"warnings": [],
"hard_fail": True,
},
"step_02_stock_on_historical_date": {
"step_id": "step_02_stock_on_historical_date",
"question_resolved": "Покажи остатки на складе на март 2019",
"execution_status": "exact",
"acceptance_status": "rejected",
"reply_type": "factual",
"selected_recipe": "address_inventory_on_hand_as_of_date_v1",
"capability_id": "confirmed_inventory_on_hand_as_of_date",
"violated_invariants": [
"wrong_as_of_date",
"wrong_period_from",
"wrong_period_to",
],
"warnings": [],
"hard_fail": True,
},
}
},
},
],
)
assert repair_targets["target_count"] == 3
assert repair_targets["priority_foci"][0]["problem_type"] == "temporal_honesty_gap"
assert repair_targets["priority_foci"][0]["target_count"] == 2
assert repair_targets["targets"][0]["problem_type"] == "temporal_honesty_gap"
assert repair_targets["targets"][0]["repair_focus_rank"] == 1
def test_build_coder_loop_prompt_demands_high_leverage_focus_first(tmp_path) -> None:
prompt = build_coder_loop_prompt(
loop_dir=tmp_path / "loop",
iteration_dir=tmp_path / "loop" / "iterations" / "iteration_00",
pack_dir=tmp_path / "loop" / "iterations" / "iteration_00" / "pack_output" / "pack_run",
repair_targets_path=tmp_path / "loop" / "iterations" / "iteration_00" / "pack_output" / "pack_run" / "repair_targets.json",
repair_targets_json='{"priority_foci":[{"focus_rank":1,"problem_type":"temporal_honesty_gap","target_count":4}]}',
assigned_focus={"focus_id": "temporal_honesty_gap|addressFilterExtractor.ts", "problem_type": "temporal_honesty_gap"},
analyst_verdict_path=tmp_path / "loop" / "iterations" / "iteration_00" / "analyst_verdict.json",
analyst_verdict_json='{"quality_score":56}',
)
assert "highest-leverage repair focus first" in prompt
assert "patch the narrowest shared layer" in prompt
assert "single-line collapses" in prompt
assert "mandatory for this iteration" in prompt
assert "temporal_honesty_gap|addressFilterExtractor.ts" in prompt
def test_select_primary_repair_focus_returns_top_priority_focus() -> None:
focus = select_primary_repair_focus(
{
"priority_foci": [
{"focus_id": "focus-1", "focus_rank": 1},
{"focus_id": "focus-2", "focus_rank": 2},
]
}
)
assert focus == {"focus_id": "focus-1", "focus_rank": 1}
def test_build_coder_snapshot_paths_collects_candidate_files_once(tmp_path) -> None:
repo_root = tmp_path
file_a = repo_root / "llm_normalizer/backend/src/services/addressFilterExtractor.ts"
file_b = repo_root / "llm_normalizer/backend/src/services/address_runtime/decomposeStage.ts"
file_a.parent.mkdir(parents=True, exist_ok=True)
file_b.parent.mkdir(parents=True, exist_ok=True)
file_a.write_text("line1\nline2\n", encoding="utf-8")
file_b.write_text("line1\nline2\n", encoding="utf-8")
original_repo_root = sys.modules["scripts.domain_case_loop"].REPO_ROOT
sys.modules["scripts.domain_case_loop"].REPO_ROOT = repo_root
try:
paths = build_coder_snapshot_paths(
{
"priority_foci": [
{
"candidate_files": [
"llm_normalizer/backend/src/services/addressFilterExtractor.ts",
"llm_normalizer/backend/src/services/address_runtime/decomposeStage.ts",
]
}
],
"targets": [
{
"candidate_files": [
"llm_normalizer/backend/src/services/addressFilterExtractor.ts",
"../outside.ts",
]
}
],
}
)
finally:
sys.modules["scripts.domain_case_loop"].REPO_ROOT = original_repo_root
assert paths == [file_a, file_b]
def test_restore_line_collapsed_files_from_snapshot_recovers_original_text(tmp_path) -> None:
sample = tmp_path / "sample.ts"
original = "const a = 1;\nconst b = 2;\n"
sample.write_text(original, encoding="utf-8")
snapshots = snapshot_coder_candidate_files([sample])
sample.write_text("const a = 1;const b = 2;", encoding="utf-8")
original_repo_root = sys.modules["scripts.domain_case_loop"].REPO_ROOT
sys.modules["scripts.domain_case_loop"].REPO_ROOT = tmp_path
try:
restored = restore_line_collapsed_files_from_snapshot(snapshots)
finally:
sys.modules["scripts.domain_case_loop"].REPO_ROOT = original_repo_root
assert restored == ["sample.ts"]
assert sample.read_text(encoding="utf-8") == original
def test_restore_line_collapsed_files_from_snapshot_keeps_semantic_changes(tmp_path) -> None:
sample = tmp_path / "sample.ts"
original = "const a = 1;\nconst b = 2;\n"
sample.write_text(original, encoding="utf-8")
snapshots = snapshot_coder_candidate_files([sample])
sample.write_text("const a = 1;const b = 3;", encoding="utf-8")
original_repo_root = sys.modules["scripts.domain_case_loop"].REPO_ROOT
sys.modules["scripts.domain_case_loop"].REPO_ROOT = tmp_path
try:
restored = restore_line_collapsed_files_from_snapshot(snapshots)
finally:
sys.modules["scripts.domain_case_loop"].REPO_ROOT = original_repo_root
assert restored == []
assert sample.read_text(encoding="utf-8") == "const a = 1;const b = 3;"
def test_evaluate_deterministic_loop_gate_rejects_partial_pack_even_without_targets() -> None:
gate_ok, reason = evaluate_deterministic_loop_gate(
{"final_status": "partial"},
{"severity_counts": {"P0": 0, "P1": 0}},
)
assert gate_ok is False
assert reason == "pack_final_status=partial"
def test_evaluate_deterministic_loop_gate_rejects_remaining_p1_targets() -> None:
gate_ok, reason = evaluate_deterministic_loop_gate(
{"final_status": "accepted"},
{"severity_counts": {"P0": 0, "P1": 2}},
)
assert gate_ok is False
assert reason == "repair_targets_remaining=P0:0,P1:2"
def test_evaluate_deterministic_loop_gate_accepts_clean_pack_without_remaining_p0_p1() -> None:
gate_ok, reason = evaluate_deterministic_loop_gate(
{"final_status": "accepted"},
{"severity_counts": {"P0": 0, "P1": 0, "warning": 1}},
)
assert gate_ok is True
assert reason == "deterministic_gate_passed"
def test_build_scenario_step_state_uses_effective_analysis_context_from_turn_artifact() -> None:
step_state = build_scenario_step_state(
scenario_id="inventory_snapshot_roots",
domain="inventory_stock",
step={
"step_id": "step_03_account_41_now",
"title": "Account 41 current composition",
"depends_on": [],
"question_template": "Из каких товаров состоит остаток по 41 счету",
"analysis_context": {},
"expected_intents": ["inventory_on_hand_as_of_date"],
"expected_capability": "confirmed_inventory_on_hand_as_of_date",
"expected_recipe": None,
"expected_result_mode": "confirmed_balance",
"required_filters": {
"period_from": "2021-09-01",
"period_to": "2021-09-30",
},
"forbidden_capabilities": [],
"forbidden_recipes": [],
"required_state_objects": [],
"required_answer_shape": "item_list_with_account_41_scope",
"forbidden_answer_patterns": [],
"required_carryover_invariants": [],
"invariant_severity": {},
},
step_index=3,
question_resolved="Из каких товаров состоит остаток по 41 счету",
analysis_context={"as_of_date": "2021-09-30", "source": "scenario_manifest"},
turn_artifact={
"scenario": {
"analysis_context": {
"as_of_date": "2021-09-30",
"source": "scenario_manifest",
}
},
"assistant_message": {
"reply_type": "factual",
"text": "На 31.03.2019 на складе подтверждено 16 позиций.",
},
"technical_debug_payload": {
"detected_mode": "address_query",
"detected_intent": "inventory_on_hand_as_of_date",
"selected_recipe": "address_inventory_on_hand_as_of_date_v1",
"capability_id": "confirmed_inventory_on_hand_as_of_date",
"capability_route_mode": "exact",
"route_expectation_status": "matched",
"result_mode": "confirmed_balance",
"response_type": "FACTUAL_LIST",
"extracted_filters": {
"as_of_date": "2019-03-31",
"period_from": "2019-03-01",
"period_to": "2019-03-31",
},
"fallback_type": "none",
"mcp_call_status": "matched_non_empty",
"balance_confirmed": True,
},
"session_summary": {
"address_navigation_state": {
"session_context": {
"date_scope": {
"as_of_date": "2019-03-31",
"period_from": "2019-03-01",
"period_to": "2019-03-31",
}
}
}
},
},
entries=[],
)
assert step_state["analysis_context"]["as_of_date"] == "2021-09-30"
assert "wrong_as_of_date" in step_state["violated_invariants"]
assert "wrong_period_from" in step_state["violated_invariants"]
assert "wrong_period_to" in step_state["violated_invariants"]
assert step_state["acceptance_status"] == "rejected"