From c998664869c7b48a51086c940807ffb3e70a57df Mon Sep 17 00:00:00 2001 From: dctouch Date: Sun, 24 May 2026 15:54:34 +0300 Subject: [PATCH] =?UTF-8?q?=D0=A3=D1=81=D0=B8=D0=BB=D0=B8=D1=82=D1=8C=20re?= =?UTF-8?q?liability=20pack=20=D0=BC=D0=B0=D1=80=D0=B6=D0=B8=D0=BD=D0=B0?= =?UTF-8?q?=D0=BB=D1=8C=D0=BD=D0=BE=D1=81=D1=82=D0=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...in_profitability_reliability_20260524.json | 130 ++++++++++- .../business_answer_contract.schema.json | 81 +++++++ .../schemas/domain_scenario_pack.schema.json | 110 +++++++++ .../agent_reliability_contract_healthcheck.py | 213 +++++++++++++++++- scripts/domain_case_loop.py | 31 ++- ..._agent_reliability_contract_healthcheck.py | 127 +++++++++++ 6 files changed, 682 insertions(+), 10 deletions(-) create mode 100644 docs/orchestration/schemas/business_answer_contract.schema.json create mode 100644 docs/orchestration/schemas/domain_scenario_pack.schema.json diff --git a/docs/orchestration/agent_margin_profitability_reliability_20260524.json b/docs/orchestration/agent_margin_profitability_reliability_20260524.json index 630ff04..e3391d4 100644 --- a/docs/orchestration/agent_margin_profitability_reliability_20260524.json +++ b/docs/orchestration/agent_margin_profitability_reliability_20260524.json @@ -3,8 +3,28 @@ "pack_id": "agent_margin_profitability_reliability_20260524", "domain": "margin_profitability", "title": "AGENT | margin profitability wrong-domain traps", - "description": "Минимальный reliability pack для проверки, что вопросы про маржинальность номенклатуры не утекают в ОС, амортизацию, банк, оплаты или взаиморасчёты.", + "description": "Минимальный reliability pack для проверки, что вопросы про маржинальность номенклатуры не утекают в ОС, амортизацию, банк, оплаты или взаиморасчеты.", "source_contract_id": "margin_profitability_v1", + "issue_codes_under_test": [ + "margin_domain_leak_accounting_route", + "business_next_step_missing", + "technical_garbage_in_answer" + ], + "detectors_under_test": [ + "margin_domain_leak_accounting_route", + "margin_required_fields_missing", + "margin_next_action_missing", + "margin_payment_document_false_source", + "margin_os_amortization_leak", + "runtime_tokens_in_user_answer", + "capability_ids_in_user_answer" + ], + "rerun_matrix": [ + "failed_margin_scenario", + "margin_neighbor_pack", + "wrong_domain_trap_pack", + "accepted_smoke_pack" + ], "bindings": { "period": "2020 год", "item": "товар" @@ -18,6 +38,24 @@ "wrong_domain_traps" ] }, + "acceptance": { + "min_score": 80, + "max_unresolved_p0": 0, + "require_all_critical_steps_pass": true, + "must_have": [ + "direct_answer_first", + "period_or_honest_period_clarification", + "revenue_cogs_gross_profit_margin_or_honest_unknown", + "next_action_if_limited", + "detector_results_fail_or_review_on_real_defect" + ], + "must_not_have": [ + "fixed_assets_leak", + "amortization_leak", + "payment_document_as_margin_source", + "route_or_capability_ids_in_user_answer" + ] + }, "scenarios": [ { "scenario_id": "margin_root_wrong_domain_trap", @@ -27,7 +65,16 @@ "step_id": "step_01", "title": "Маржинальность номенклатуры", "question": "Какая номенклатура была самой маржинальной за {{bindings.period}}?", - "semantic_tags": ["margin_profitability", "inventory", "wrong_domain_trap"], + "semantic_tags": [ + "margin_profitability", + "inventory", + "wrong_domain_trap" + ], + "expected_intents": [ + "inventory_margin_ranking_for_nomenclature" + ], + "expected_capability": "inventory_inventory_margin_ranking_for_nomenclature", + "expected_recipe": "address_inventory_margin_ranking_for_nomenclature_v1", "expected_result_mode": "ranking_or_limited_accounting_answer", "expected_business_answer_contract": "margin_profitability_v1", "required_answer_shape": "direct_answer_first", @@ -37,7 +84,7 @@ "forbidden_answer_patterns": [ "(?i)(амортизац|основн(ые|ых)? средств|объект ОС|оплат[аы]|банк|settlement|payment_document)" ], - "notes": "Если точного расчёта нет, допустим честный limited answer, но не уход в ОС/банк/оплаты." + "notes": "Если точного расчета нет, допустим честный limited answer, но не уход в ОС/банк/оплаты." } ] }, @@ -49,10 +96,21 @@ "step_id": "step_01", "title": "Запрос маржинальности", "question": "Покажи топ товаров по марже за {{bindings.period}}.", - "semantic_tags": ["margin_profitability", "inventory"], + "semantic_tags": [ + "margin_profitability", + "inventory" + ], + "expected_intents": [ + "inventory_margin_ranking_for_nomenclature" + ], + "expected_capability": "inventory_inventory_margin_ranking_for_nomenclature", + "expected_recipe": "address_inventory_margin_ranking_for_nomenclature_v1", "expected_result_mode": "ranking_or_limited_accounting_answer", "expected_business_answer_contract": "margin_profitability_v1", "required_answer_shape": "direct_answer_first", + "required_answer_patterns_any": [ + "(?i)(марж|прибыл|выруч|себестоим|не могу подтвердить|не хватает)" + ], "forbidden_answer_patterns": [ "(?i)(амортизац|объект ОС|payment_document|settlement)" ] @@ -61,8 +119,14 @@ "step_id": "step_02", "title": "Почему именно так", "question": "А из чего ты это посчитал и чего не хватает для точной маржи?", - "depends_on": ["step_01"], - "semantic_tags": ["margin_profitability", "evidence", "scope_guard"], + "depends_on": [ + "step_01" + ], + "semantic_tags": [ + "margin_profitability", + "evidence", + "scope_guard" + ], "expected_result_mode": "evidence_or_honest_boundary", "expected_business_answer_contract": "margin_profitability_v1", "required_answer_shape": "direct_answer_first", @@ -74,6 +138,60 @@ ] } ] + }, + { + "scenario_id": "margin_false_source_traps", + "title": "Margin must reject payment and fixed-assets false sources", + "steps": [ + { + "step_id": "step_01_payment_source_trap", + "title": "Оплаты не являются источником маржи", + "question": "Можно быстро понять маржинальность товаров за {{bindings.period}} по оплатам и банку?", + "semantic_tags": [ + "margin_profitability", + "wrong_domain_trap", + "payment_false_source" + ], + "expected_result_mode": "honest_boundary_with_next_action", + "expected_business_answer_contract": "margin_profitability_v1", + "required_answer_shape": "direct_answer_first", + "required_answer_patterns_any": [ + "(?i)(оплат|банк).{0,120}(не источник|нельзя|недостаточ|не подтвержд)", + "(?i)(выруч|себестоим|валов|марж)" + ], + "forbidden_answer_patterns": [ + "(?i)(оплат[аы]|банк|payment_document).{0,80}(источник|достаточ|посчитал|марж[ау])", + "(?i)(route_id|capability_id|runtime_|debug)" + ], + "notes": "Платежи могут помогать сверить денежный поток, но не являются достаточной базой для валовой маржи по номенклатуре." + }, + { + "step_id": "step_02_fixed_asset_source_trap", + "title": "ОС и амортизация не должны попасть в товарную маржу", + "question": "Посчитай маржинальность товарной номенклатуры за {{bindings.period}}, не ОС и не амортизацию.", + "semantic_tags": [ + "margin_profitability", + "wrong_domain_trap", + "fixed_asset_false_source" + ], + "expected_intents": [ + "inventory_margin_ranking_for_nomenclature" + ], + "expected_capability": "inventory_inventory_margin_ranking_for_nomenclature", + "expected_recipe": "address_inventory_margin_ranking_for_nomenclature_v1", + "expected_result_mode": "ranking_or_limited_accounting_answer", + "expected_business_answer_contract": "margin_profitability_v1", + "required_answer_shape": "direct_answer_first", + "required_answer_patterns_any": [ + "(?i)(марж|выруч|себестоим|валов|не могу подтвердить|не хватает)" + ], + "forbidden_answer_patterns": [ + "(?i)(амортизац|объект ОС|основн(ые|ых)? средств).{0,80}(марж|себестоим|валов)", + "(?i)(route_id|capability_id|runtime_|debug)" + ], + "notes": "Даже если пользователь сам сказал `не ОС`, runtime должен удержать товарный контур и не строить ответ вокруг амортизации." + } + ] } ] } diff --git a/docs/orchestration/schemas/business_answer_contract.schema.json b/docs/orchestration/schemas/business_answer_contract.schema.json new file mode 100644 index 0000000..59a27ca --- /dev/null +++ b/docs/orchestration/schemas/business_answer_contract.schema.json @@ -0,0 +1,81 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Business Answer Contract", + "type": "object", + "additionalProperties": true, + "required": ["schema_version", "contract_id", "domain", "answer_surface", "detectors"], + "properties": { + "schema_version": { + "const": "business_answer_contract_v1" + }, + "contract_id": { + "type": "string", + "minLength": 1 + }, + "domain": { + "type": "string", + "minLength": 1 + }, + "title": { + "type": "string" + }, + "purpose": { + "type": "string" + }, + "answer_surface": { + "type": "object", + "additionalProperties": true, + "required": ["required_fields"], + "properties": { + "must_start_with": { + "type": "string" + }, + "required_fields": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "additionalProperties": true, + "required": ["field", "meaning"], + "properties": { + "field": { + "type": "string", + "minLength": 1 + }, + "meaning": { + "type": "string", + "minLength": 1 + } + } + } + }, + "must_not_contain": { + "type": "array", + "items": { + "type": "string" + } + }, + "limited_answer_rule": { + "type": "string" + } + } + }, + "root_layers": { + "type": "array", + "items": { + "type": "string" + } + }, + "detectors": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1 + }, + "acceptance": { + "type": "object", + "additionalProperties": true + } + } +} diff --git a/docs/orchestration/schemas/domain_scenario_pack.schema.json b/docs/orchestration/schemas/domain_scenario_pack.schema.json new file mode 100644 index 0000000..4f331dd --- /dev/null +++ b/docs/orchestration/schemas/domain_scenario_pack.schema.json @@ -0,0 +1,110 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Domain Scenario Pack", + "type": "object", + "additionalProperties": true, + "required": ["schema_version", "pack_id", "domain", "scenarios"], + "properties": { + "schema_version": { + "const": "domain_scenario_pack_v1" + }, + "pack_id": { + "type": "string", + "minLength": 1 + }, + "domain": { + "type": "string", + "minLength": 1 + }, + "title": { + "type": "string" + }, + "description": { + "type": "string" + }, + "source_contract_id": { + "type": "string" + }, + "issue_codes_under_test": { + "type": "array", + "items": { + "type": "string" + } + }, + "detectors_under_test": { + "type": "array", + "items": { + "type": "string" + } + }, + "rerun_matrix": { + "type": "array", + "items": { + "type": "string" + } + }, + "analysis_context": { + "type": "object", + "additionalProperties": true + }, + "acceptance": { + "type": "object", + "additionalProperties": true + }, + "scenarios": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "additionalProperties": true, + "required": ["scenario_id", "steps"], + "properties": { + "scenario_id": { + "type": "string", + "minLength": 1 + }, + "title": { + "type": "string" + }, + "steps": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "additionalProperties": true, + "required": ["step_id", "question"], + "properties": { + "step_id": { + "type": "string", + "minLength": 1 + }, + "title": { + "type": "string" + }, + "question": { + "type": "string", + "minLength": 1 + }, + "expected_business_answer_contract": { + "type": "string" + }, + "semantic_tags": { + "type": "array", + "items": { + "type": "string" + } + }, + "forbidden_answer_patterns": { + "type": "array", + "items": { + "type": "string" + } + } + } + } + } + } + } + } + } +} diff --git a/scripts/agent_reliability_contract_healthcheck.py b/scripts/agent_reliability_contract_healthcheck.py index 024f98f..e9da10e 100644 --- a/scripts/agent_reliability_contract_healthcheck.py +++ b/scripts/agent_reliability_contract_healthcheck.py @@ -9,16 +9,21 @@ from typing import Any REPO_ROOT = Path(__file__).resolve().parent.parent +ORCHESTRATION_DIR = REPO_ROOT / "docs" / "orchestration" SCHEMA_DIR = REPO_ROOT / "docs" / "orchestration" / "schemas" ISSUE_CATALOG_PATH = REPO_ROOT / "docs" / "orchestration" / "issue_catalog.json" DETECTOR_REGISTRY_PATH = REPO_ROOT / "docs" / "orchestration" / "detector_registry.json" CONTRACTS_DIR = REPO_ROOT / "docs" / "orchestration" / "contracts" +BUSINESS_ANSWER_CONTRACT_SCHEMA_VERSION = "business_answer_contract_v1" +DOMAIN_SCENARIO_PACK_SCHEMA_VERSION = "domain_scenario_pack_v1" EXPECTED_SCHEMA_FILES = { "agent_issue_catalog.schema.json": "Agent Issue Catalog", "agent_detector_registry.schema.json": "Agent Detector Registry", "agent_detector_results.schema.json": "Agent Detector Results", "auto_coder_gate.schema.json": "Auto-Coder Gate", + "business_answer_contract.schema.json": "Business Answer Contract", "business_audit_contract.schema.json": "Business Audit Contract", + "domain_scenario_pack.schema.json": "Domain Scenario Pack", "domain_loop_lead_coder_handoff.schema.json": "Domain Loop Lead Coder Handoff", } AUTO_CODER_ALLOWED_ISSUE_CODES = { @@ -109,6 +114,72 @@ def collect_contract_detector_refs(contracts_dir: Path) -> tuple[dict[str, list[ return refs, warnings +def check_answer_contracts(contracts_dir: Path) -> tuple[dict[str, Any], list[str], list[str], set[str]]: + failures: list[str] = [] + warnings: list[str] = [] + contract_ids: set[str] = set() + contract_paths: list[str] = [] + if not contracts_dir.exists(): + return {"path": display_path(contracts_dir), "exists": False}, ["missing_answer_contracts_dir"], warnings, contract_ids + + for path in sorted(contracts_dir.glob("*.json")): + display = display_path(path) + contract_paths.append(display) + try: + payload = read_json(path) + except json.JSONDecodeError as error: + failures.append(f"invalid_answer_contract_json:{display}:{error.msg}") + continue + if not isinstance(payload, dict): + failures.append(f"answer_contract_not_object:{display}") + continue + + schema_version = str(payload.get("schema_version") or "").strip() + contract_id = str(payload.get("contract_id") or "").strip() + if schema_version != BUSINESS_ANSWER_CONTRACT_SCHEMA_VERSION: + failures.append(f"answer_contract_schema_version_mismatch:{display}:{schema_version or 'empty'}") + if not contract_id: + failures.append(f"answer_contract_missing_contract_id:{display}") + else: + if contract_id in contract_ids: + failures.append(f"answer_contract_duplicate_contract_id:{contract_id}") + contract_ids.add(contract_id) + if path.stem != contract_id: + warnings.append(f"answer_contract_filename_mismatch:{display}:{contract_id}") + if not str(payload.get("domain") or "").strip(): + failures.append(f"answer_contract_missing_domain:{display}") + + answer_surface = payload.get("answer_surface") if isinstance(payload.get("answer_surface"), dict) else {} + if not answer_surface: + failures.append(f"answer_contract_missing_answer_surface:{display}") + continue + required_fields = answer_surface.get("required_fields") + if not isinstance(required_fields, list) or not required_fields: + failures.append(f"answer_contract_missing_required_fields:{display}") + else: + for index, field in enumerate(required_fields): + if not isinstance(field, dict): + failures.append(f"answer_contract_required_field_not_object:{display}:{index}") + continue + if not str(field.get("field") or "").strip(): + failures.append(f"answer_contract_required_field_missing_name:{display}:{index}") + if not str(field.get("meaning") or "").strip(): + failures.append(f"answer_contract_required_field_missing_meaning:{display}:{index}") + if not normalize_string_list(payload.get("detectors")): + failures.append(f"answer_contract_missing_detectors:{display}") + + summary = { + "path": display_path(contracts_dir), + "exists": True, + "contract_count": len(contract_ids), + "contract_ids": sorted(contract_ids), + "contract_paths": contract_paths, + } + if not contract_ids: + failures.append("answer_contracts_empty") + return summary, failures, warnings, contract_ids + + def is_broad_patch_target(value: str) -> bool: normalized = value.strip().replace("\\", "/").lower() broad_targets = { @@ -307,23 +378,161 @@ def check_detector_registry( return summary, failures, warnings +def scenario_pack_paths(orchestration_dir: Path) -> tuple[list[tuple[Path, dict[str, Any]]], list[str]]: + warnings: list[str] = [] + packs: list[tuple[Path, dict[str, Any]]] = [] + if not orchestration_dir.exists(): + return packs, ["domain_scenario_pack_dir_missing"] + for path in sorted(orchestration_dir.glob("*.json")): + try: + payload = read_json(path) + except json.JSONDecodeError as error: + warnings.append(f"domain_scenario_pack_scan_invalid_json:{display_path(path)}:{error.msg}") + continue + if isinstance(payload, dict) and payload.get("schema_version") == DOMAIN_SCENARIO_PACK_SCHEMA_VERSION: + packs.append((path, payload)) + return packs, warnings + + +def check_domain_scenario_packs( + orchestration_dir: Path, + known_contract_ids: set[str], + detector_registry: dict[str, Any] | None = None, +) -> tuple[dict[str, Any], list[str], list[str]]: + failures: list[str] = [] + warnings: list[str] = [] + packs, scan_warnings = scenario_pack_paths(orchestration_dir) + warnings.extend(scan_warnings) + known_detectors = set() + if isinstance(detector_registry, dict): + detectors = detector_registry.get("detectors") if isinstance(detector_registry.get("detectors"), dict) else {} + known_detectors = set(str(name) for name in detectors) + + pack_ids: list[str] = [] + contract_bound_pack_count = 0 + step_count = 0 + contract_bound_step_count = 0 + wrong_domain_trap_step_count = 0 + for path, pack in packs: + display = display_path(path) + pack_id = str(pack.get("pack_id") or "").strip() + if not pack_id: + failures.append(f"domain_scenario_pack_missing_pack_id:{display}") + else: + pack_ids.append(pack_id) + if not str(pack.get("domain") or "").strip(): + failures.append(f"domain_scenario_pack_missing_domain:{display}") + scenarios = pack.get("scenarios") + if not isinstance(scenarios, list) or not scenarios: + failures.append(f"domain_scenario_pack_missing_scenarios:{display}") + continue + + source_contract_id = str(pack.get("source_contract_id") or "").strip() + if source_contract_id: + contract_bound_pack_count += 1 + if source_contract_id not in known_contract_ids: + failures.append(f"domain_scenario_pack_unknown_source_contract:{display}:{source_contract_id}") + if not isinstance(pack.get("acceptance"), dict) or not pack.get("acceptance"): + failures.append(f"domain_scenario_pack_missing_acceptance:{display}:{source_contract_id}") + if not normalize_string_list(pack.get("detectors_under_test")): + failures.append(f"domain_scenario_pack_missing_detectors_under_test:{display}:{source_contract_id}") + + analysis_context = pack.get("analysis_context") if isinstance(pack.get("analysis_context"), dict) else {} + expected_contract = str(analysis_context.get("expected_business_answer_contract") or "").strip() + if expected_contract and expected_contract not in known_contract_ids: + failures.append(f"domain_scenario_pack_unknown_analysis_contract:{display}:{expected_contract}") + + for detector_name in normalize_string_list(pack.get("detectors_under_test")): + if known_detectors and detector_name not in known_detectors: + failures.append(f"domain_scenario_pack_unknown_detector:{display}:{detector_name}") + + for scenario in scenarios: + if not isinstance(scenario, dict): + failures.append(f"domain_scenario_pack_scenario_not_object:{display}") + continue + scenario_id = str(scenario.get("scenario_id") or "").strip() + if not scenario_id: + failures.append(f"domain_scenario_pack_scenario_missing_id:{display}") + steps = scenario.get("steps") + if not isinstance(steps, list) or not steps: + failures.append(f"domain_scenario_pack_scenario_missing_steps:{display}:{scenario_id or 'unknown'}") + continue + for step in steps: + step_count += 1 + if not isinstance(step, dict): + failures.append(f"domain_scenario_pack_step_not_object:{display}:{scenario_id or 'unknown'}") + continue + step_id = str(step.get("step_id") or "").strip() + if not step_id: + failures.append(f"domain_scenario_pack_step_missing_id:{display}:{scenario_id or 'unknown'}") + if not str(step.get("question") or "").strip(): + failures.append(f"domain_scenario_pack_step_missing_question:{display}:{scenario_id or 'unknown'}:{step_id or 'unknown'}") + + step_contract = str( + step.get("expected_business_answer_contract") or step.get("required_answer_contract") or "" + ).strip() + if source_contract_id: + if not step_contract: + failures.append( + f"domain_scenario_pack_step_missing_expected_contract:{display}:{scenario_id or 'unknown'}:{step_id or 'unknown'}" + ) + else: + contract_bound_step_count += 1 + if step_contract and step_contract not in known_contract_ids: + failures.append( + f"domain_scenario_pack_step_unknown_contract:{display}:{scenario_id or 'unknown'}:{step_id or 'unknown'}:{step_contract}" + ) + + tags = normalize_string_list(step.get("semantic_tags")) + if "wrong_domain_trap" in tags: + wrong_domain_trap_step_count += 1 + if not normalize_string_list(step.get("forbidden_answer_patterns")): + failures.append( + f"domain_scenario_pack_wrong_domain_trap_missing_forbidden_patterns:{display}:{scenario_id or 'unknown'}:{step_id or 'unknown'}" + ) + + return ( + { + "path": display_path(orchestration_dir), + "exists": orchestration_dir.exists(), + "pack_count": len(packs), + "pack_ids": sorted(pack_ids), + "contract_bound_pack_count": contract_bound_pack_count, + "step_count": step_count, + "contract_bound_step_count": contract_bound_step_count, + "wrong_domain_trap_step_count": wrong_domain_trap_step_count, + }, + failures, + warnings, + ) + + def build_healthcheck() -> dict[str, Any]: schema_files, schema_failures = check_schema_files(SCHEMA_DIR) + answer_contracts, contract_failures, contract_warnings, contract_ids = check_answer_contracts(CONTRACTS_DIR) issue_catalog, catalog_failures, catalog_warnings = check_issue_catalog(ISSUE_CATALOG_PATH) issue_catalog_payload = read_json_object_or_empty(ISSUE_CATALOG_PATH) + detector_registry_payload = read_json_object_or_empty(DETECTOR_REGISTRY_PATH) detector_registry, detector_failures, detector_warnings = check_detector_registry( DETECTOR_REGISTRY_PATH, issue_catalog_payload, ) - failures = schema_failures + catalog_failures + detector_failures - warnings = catalog_warnings + detector_warnings + domain_packs, domain_pack_failures, domain_pack_warnings = check_domain_scenario_packs( + ORCHESTRATION_DIR, + contract_ids, + detector_registry_payload, + ) + failures = schema_failures + contract_failures + catalog_failures + detector_failures + domain_pack_failures + warnings = contract_warnings + catalog_warnings + detector_warnings + domain_pack_warnings return { "schema_version": "agent_reliability_contract_health_v1", "status": "pass" if not failures else "fail", "checked_at": datetime.now(timezone.utc).replace(microsecond=0).isoformat(), "schema_files": schema_files, + "answer_contracts": answer_contracts, "issue_catalog": issue_catalog, "detector_registry": detector_registry, + "domain_scenario_packs": domain_packs, "failures": failures, "warnings": warnings, } diff --git a/scripts/domain_case_loop.py b/scripts/domain_case_loop.py index 387a380..bb432e7 100644 --- a/scripts/domain_case_loop.py +++ b/scripts/domain_case_loop.py @@ -14,8 +14,19 @@ from typing import Any from urllib.error import HTTPError, URLError from urllib.request import Request, urlopen -import agent_runtime_manifest as runtime_manifest -import agent_detector_runner +try: + import agent_runtime_manifest as runtime_manifest +except ModuleNotFoundError as error: + if error.name != "agent_runtime_manifest": + raise + from scripts import agent_runtime_manifest as runtime_manifest + +try: + import agent_detector_runner +except ModuleNotFoundError as error: + if error.name != "agent_detector_runner": + raise + from scripts import agent_detector_runner REPO_ROOT = Path(__file__).resolve().parent.parent DEFAULT_ARTIFACTS_ROOT = REPO_ROOT / "artifacts" / "domain_runs" @@ -651,6 +662,22 @@ def merge_scenario_date_scope( current = current_date_scope if isinstance(current_date_scope, dict) else None if not current: return previous or current_date_scope + if previous and depends_on: + previous_source = str(previous.get("source") or "").strip() + current_source = str(current.get("source") or "").strip() + weak_current_sources = { + "current_analysis", + "active_domain_contract_default", + "domain_default", + "scenario_manifest_default", + } + stale_previous_sources = {"scenario_state_carryover"} + if current_source in weak_current_sources and previous_source not in stale_previous_sources: + merged = dict(current) + for key in ("as_of_date", "period_from", "period_to"): + if previous.get(key): + merged[key] = previous.get(key) + return merged return current diff --git a/scripts/test_agent_reliability_contract_healthcheck.py b/scripts/test_agent_reliability_contract_healthcheck.py index 4b791ca..3e291ea 100644 --- a/scripts/test_agent_reliability_contract_healthcheck.py +++ b/scripts/test_agent_reliability_contract_healthcheck.py @@ -18,6 +18,11 @@ class AgentReliabilityContractHealthcheckTests(unittest.TestCase): self.assertEqual(result["status"], "pass") self.assertEqual(result["failures"], []) + self.assertIn("margin_profitability_v1", result["answer_contracts"]["contract_ids"]) + self.assertIn( + "agent_margin_profitability_reliability_20260524", + result["domain_scenario_packs"]["pack_ids"], + ) def test_issue_catalog_healthcheck_blocks_auto_coder_issue_without_contract(self) -> None: with tempfile.TemporaryDirectory() as tmp: @@ -216,6 +221,128 @@ class AgentReliabilityContractHealthcheckTests(unittest.TestCase): failures, ) + def test_answer_contract_healthcheck_blocks_missing_required_fields(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + contracts_dir = Path(tmp) / "contracts" + contracts_dir.mkdir() + (contracts_dir / "demo_contract.json").write_text( + json.dumps( + { + "schema_version": "business_answer_contract_v1", + "contract_id": "demo_contract", + "domain": "demo", + "answer_surface": {"required_fields": []}, + "detectors": ["demo_detector"], + } + ), + encoding="utf-8", + ) + + _, failures, _, contract_ids = health.check_answer_contracts(contracts_dir) + + self.assertEqual(contract_ids, {"demo_contract"}) + self.assertTrue( + any(failure.endswith("contracts\\demo_contract.json") for failure in failures), + failures, + ) + + def test_domain_scenario_pack_blocks_unknown_source_contract(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + orchestration_dir = Path(tmp) + (orchestration_dir / "demo_pack.json").write_text( + json.dumps( + { + "schema_version": "domain_scenario_pack_v1", + "pack_id": "demo_pack", + "domain": "demo", + "source_contract_id": "missing_contract", + "detectors_under_test": ["demo_detector"], + "acceptance": {"min_score": 80}, + "scenarios": [ + { + "scenario_id": "demo_scenario", + "steps": [ + { + "step_id": "step_01", + "question": "Question?", + "expected_business_answer_contract": "missing_contract", + } + ], + } + ], + } + ), + encoding="utf-8", + ) + + _, failures, _ = health.check_domain_scenario_packs( + orchestration_dir, + {"known_contract"}, + {"detectors": {"demo_detector": {}}}, + ) + + self.assertTrue( + any( + failure.startswith("domain_scenario_pack_unknown_source_contract:") + and failure.endswith("demo_pack.json:missing_contract") + for failure in failures + ), + failures, + ) + self.assertTrue( + any( + failure.startswith("domain_scenario_pack_step_unknown_contract:") + and failure.endswith("demo_pack.json:demo_scenario:step_01:missing_contract") + for failure in failures + ), + failures, + ) + + def test_domain_scenario_pack_blocks_wrong_domain_trap_without_forbidden_patterns(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + orchestration_dir = Path(tmp) + (orchestration_dir / "demo_pack.json").write_text( + json.dumps( + { + "schema_version": "domain_scenario_pack_v1", + "pack_id": "demo_pack", + "domain": "demo", + "source_contract_id": "demo_contract", + "detectors_under_test": ["demo_detector"], + "acceptance": {"min_score": 80}, + "scenarios": [ + { + "scenario_id": "demo_scenario", + "steps": [ + { + "step_id": "step_01", + "question": "Question?", + "expected_business_answer_contract": "demo_contract", + "semantic_tags": ["wrong_domain_trap"], + } + ], + } + ], + } + ), + encoding="utf-8", + ) + + _, failures, _ = health.check_domain_scenario_packs( + orchestration_dir, + {"demo_contract"}, + {"detectors": {"demo_detector": {}}}, + ) + + self.assertTrue( + any( + failure.startswith("domain_scenario_pack_wrong_domain_trap_missing_forbidden_patterns:") + and failure.endswith("demo_pack.json:demo_scenario:step_01") + for failure in failures + ), + failures, + ) + if __name__ == "__main__": unittest.main()