Усилить reliability pack маржинальности

This commit is contained in:
dctouch 2026-05-24 15:54:34 +03:00
parent 21bc9e953b
commit c998664869
6 changed files with 682 additions and 10 deletions

View File

@ -3,8 +3,28 @@
"pack_id": "agent_margin_profitability_reliability_20260524", "pack_id": "agent_margin_profitability_reliability_20260524",
"domain": "margin_profitability", "domain": "margin_profitability",
"title": "AGENT | margin profitability wrong-domain traps", "title": "AGENT | margin profitability wrong-domain traps",
"description": "Минимальный reliability pack для проверки, что вопросы про маржинальность номенклатуры не утекают в ОС, амортизацию, банк, оплаты или взаиморасчёты.", "description": "Минимальный reliability pack для проверки, что вопросы про маржинальность номенклатуры не утекают в ОС, амортизацию, банк, оплаты или взаиморасчеты.",
"source_contract_id": "margin_profitability_v1", "source_contract_id": "margin_profitability_v1",
"issue_codes_under_test": [
"margin_domain_leak_accounting_route",
"business_next_step_missing",
"technical_garbage_in_answer"
],
"detectors_under_test": [
"margin_domain_leak_accounting_route",
"margin_required_fields_missing",
"margin_next_action_missing",
"margin_payment_document_false_source",
"margin_os_amortization_leak",
"runtime_tokens_in_user_answer",
"capability_ids_in_user_answer"
],
"rerun_matrix": [
"failed_margin_scenario",
"margin_neighbor_pack",
"wrong_domain_trap_pack",
"accepted_smoke_pack"
],
"bindings": { "bindings": {
"period": "2020 год", "period": "2020 год",
"item": "товар" "item": "товар"
@ -18,6 +38,24 @@
"wrong_domain_traps" "wrong_domain_traps"
] ]
}, },
"acceptance": {
"min_score": 80,
"max_unresolved_p0": 0,
"require_all_critical_steps_pass": true,
"must_have": [
"direct_answer_first",
"period_or_honest_period_clarification",
"revenue_cogs_gross_profit_margin_or_honest_unknown",
"next_action_if_limited",
"detector_results_fail_or_review_on_real_defect"
],
"must_not_have": [
"fixed_assets_leak",
"amortization_leak",
"payment_document_as_margin_source",
"route_or_capability_ids_in_user_answer"
]
},
"scenarios": [ "scenarios": [
{ {
"scenario_id": "margin_root_wrong_domain_trap", "scenario_id": "margin_root_wrong_domain_trap",
@ -27,7 +65,16 @@
"step_id": "step_01", "step_id": "step_01",
"title": "Маржинальность номенклатуры", "title": "Маржинальность номенклатуры",
"question": "Какая номенклатура была самой маржинальной за {{bindings.period}}?", "question": "Какая номенклатура была самой маржинальной за {{bindings.period}}?",
"semantic_tags": ["margin_profitability", "inventory", "wrong_domain_trap"], "semantic_tags": [
"margin_profitability",
"inventory",
"wrong_domain_trap"
],
"expected_intents": [
"inventory_margin_ranking_for_nomenclature"
],
"expected_capability": "inventory_inventory_margin_ranking_for_nomenclature",
"expected_recipe": "address_inventory_margin_ranking_for_nomenclature_v1",
"expected_result_mode": "ranking_or_limited_accounting_answer", "expected_result_mode": "ranking_or_limited_accounting_answer",
"expected_business_answer_contract": "margin_profitability_v1", "expected_business_answer_contract": "margin_profitability_v1",
"required_answer_shape": "direct_answer_first", "required_answer_shape": "direct_answer_first",
@ -37,7 +84,7 @@
"forbidden_answer_patterns": [ "forbidden_answer_patterns": [
"(?i)(амортизац|основн(ые|ых)? средств|объект ОС|оплат[аы]|банк|settlement|payment_document)" "(?i)(амортизац|основн(ые|ых)? средств|объект ОС|оплат[аы]|банк|settlement|payment_document)"
], ],
"notes": "Если точного расчёта нет, допустим честный limited answer, но не уход в ОС/банк/оплаты." "notes": "Если точного расчета нет, допустим честный limited answer, но не уход в ОС/банк/оплаты."
} }
] ]
}, },
@ -49,10 +96,21 @@
"step_id": "step_01", "step_id": "step_01",
"title": "Запрос маржинальности", "title": "Запрос маржинальности",
"question": "Покажи топ товаров по марже за {{bindings.period}}.", "question": "Покажи топ товаров по марже за {{bindings.period}}.",
"semantic_tags": ["margin_profitability", "inventory"], "semantic_tags": [
"margin_profitability",
"inventory"
],
"expected_intents": [
"inventory_margin_ranking_for_nomenclature"
],
"expected_capability": "inventory_inventory_margin_ranking_for_nomenclature",
"expected_recipe": "address_inventory_margin_ranking_for_nomenclature_v1",
"expected_result_mode": "ranking_or_limited_accounting_answer", "expected_result_mode": "ranking_or_limited_accounting_answer",
"expected_business_answer_contract": "margin_profitability_v1", "expected_business_answer_contract": "margin_profitability_v1",
"required_answer_shape": "direct_answer_first", "required_answer_shape": "direct_answer_first",
"required_answer_patterns_any": [
"(?i)(марж|прибыл|выруч|себестоим|не могу подтвердить|не хватает)"
],
"forbidden_answer_patterns": [ "forbidden_answer_patterns": [
"(?i)(амортизац|объект ОС|payment_document|settlement)" "(?i)(амортизац|объект ОС|payment_document|settlement)"
] ]
@ -61,8 +119,14 @@
"step_id": "step_02", "step_id": "step_02",
"title": "Почему именно так", "title": "Почему именно так",
"question": "А из чего ты это посчитал и чего не хватает для точной маржи?", "question": "А из чего ты это посчитал и чего не хватает для точной маржи?",
"depends_on": ["step_01"], "depends_on": [
"semantic_tags": ["margin_profitability", "evidence", "scope_guard"], "step_01"
],
"semantic_tags": [
"margin_profitability",
"evidence",
"scope_guard"
],
"expected_result_mode": "evidence_or_honest_boundary", "expected_result_mode": "evidence_or_honest_boundary",
"expected_business_answer_contract": "margin_profitability_v1", "expected_business_answer_contract": "margin_profitability_v1",
"required_answer_shape": "direct_answer_first", "required_answer_shape": "direct_answer_first",
@ -74,6 +138,60 @@
] ]
} }
] ]
},
{
"scenario_id": "margin_false_source_traps",
"title": "Margin must reject payment and fixed-assets false sources",
"steps": [
{
"step_id": "step_01_payment_source_trap",
"title": "Оплаты не являются источником маржи",
"question": "Можно быстро понять маржинальность товаров за {{bindings.period}} по оплатам и банку?",
"semantic_tags": [
"margin_profitability",
"wrong_domain_trap",
"payment_false_source"
],
"expected_result_mode": "honest_boundary_with_next_action",
"expected_business_answer_contract": "margin_profitability_v1",
"required_answer_shape": "direct_answer_first",
"required_answer_patterns_any": [
"(?i)(оплат|банк).{0,120}(не источник|нельзя|недостаточ|не подтвержд)",
"(?i)(выруч|себестоим|валов|марж)"
],
"forbidden_answer_patterns": [
"(?i)(оплат[аы]|банк|payment_document).{0,80}(источник|достаточ|посчитал|марж[ау])",
"(?i)(route_id|capability_id|runtime_|debug)"
],
"notes": "Платежи могут помогать сверить денежный поток, но не являются достаточной базой для валовой маржи по номенклатуре."
},
{
"step_id": "step_02_fixed_asset_source_trap",
"title": "ОС и амортизация не должны попасть в товарную маржу",
"question": "Посчитай маржинальность товарной номенклатуры за {{bindings.period}}, не ОС и не амортизацию.",
"semantic_tags": [
"margin_profitability",
"wrong_domain_trap",
"fixed_asset_false_source"
],
"expected_intents": [
"inventory_margin_ranking_for_nomenclature"
],
"expected_capability": "inventory_inventory_margin_ranking_for_nomenclature",
"expected_recipe": "address_inventory_margin_ranking_for_nomenclature_v1",
"expected_result_mode": "ranking_or_limited_accounting_answer",
"expected_business_answer_contract": "margin_profitability_v1",
"required_answer_shape": "direct_answer_first",
"required_answer_patterns_any": [
"(?i)(марж|выруч|себестоим|валов|не могу подтвердить|не хватает)"
],
"forbidden_answer_patterns": [
"(?i)(амортизац|объект ОС|основн(ые|ых)? средств).{0,80}(марж|себестоим|валов)",
"(?i)(route_id|capability_id|runtime_|debug)"
],
"notes": "Даже если пользователь сам сказал `не ОС`, runtime должен удержать товарный контур и не строить ответ вокруг амортизации."
}
]
} }
] ]
} }

View File

@ -0,0 +1,81 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Business Answer Contract",
"type": "object",
"additionalProperties": true,
"required": ["schema_version", "contract_id", "domain", "answer_surface", "detectors"],
"properties": {
"schema_version": {
"const": "business_answer_contract_v1"
},
"contract_id": {
"type": "string",
"minLength": 1
},
"domain": {
"type": "string",
"minLength": 1
},
"title": {
"type": "string"
},
"purpose": {
"type": "string"
},
"answer_surface": {
"type": "object",
"additionalProperties": true,
"required": ["required_fields"],
"properties": {
"must_start_with": {
"type": "string"
},
"required_fields": {
"type": "array",
"minItems": 1,
"items": {
"type": "object",
"additionalProperties": true,
"required": ["field", "meaning"],
"properties": {
"field": {
"type": "string",
"minLength": 1
},
"meaning": {
"type": "string",
"minLength": 1
}
}
}
},
"must_not_contain": {
"type": "array",
"items": {
"type": "string"
}
},
"limited_answer_rule": {
"type": "string"
}
}
},
"root_layers": {
"type": "array",
"items": {
"type": "string"
}
},
"detectors": {
"type": "array",
"items": {
"type": "string"
},
"minItems": 1
},
"acceptance": {
"type": "object",
"additionalProperties": true
}
}
}

View File

@ -0,0 +1,110 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Domain Scenario Pack",
"type": "object",
"additionalProperties": true,
"required": ["schema_version", "pack_id", "domain", "scenarios"],
"properties": {
"schema_version": {
"const": "domain_scenario_pack_v1"
},
"pack_id": {
"type": "string",
"minLength": 1
},
"domain": {
"type": "string",
"minLength": 1
},
"title": {
"type": "string"
},
"description": {
"type": "string"
},
"source_contract_id": {
"type": "string"
},
"issue_codes_under_test": {
"type": "array",
"items": {
"type": "string"
}
},
"detectors_under_test": {
"type": "array",
"items": {
"type": "string"
}
},
"rerun_matrix": {
"type": "array",
"items": {
"type": "string"
}
},
"analysis_context": {
"type": "object",
"additionalProperties": true
},
"acceptance": {
"type": "object",
"additionalProperties": true
},
"scenarios": {
"type": "array",
"minItems": 1,
"items": {
"type": "object",
"additionalProperties": true,
"required": ["scenario_id", "steps"],
"properties": {
"scenario_id": {
"type": "string",
"minLength": 1
},
"title": {
"type": "string"
},
"steps": {
"type": "array",
"minItems": 1,
"items": {
"type": "object",
"additionalProperties": true,
"required": ["step_id", "question"],
"properties": {
"step_id": {
"type": "string",
"minLength": 1
},
"title": {
"type": "string"
},
"question": {
"type": "string",
"minLength": 1
},
"expected_business_answer_contract": {
"type": "string"
},
"semantic_tags": {
"type": "array",
"items": {
"type": "string"
}
},
"forbidden_answer_patterns": {
"type": "array",
"items": {
"type": "string"
}
}
}
}
}
}
}
}
}
}

View File

@ -9,16 +9,21 @@ from typing import Any
REPO_ROOT = Path(__file__).resolve().parent.parent REPO_ROOT = Path(__file__).resolve().parent.parent
ORCHESTRATION_DIR = REPO_ROOT / "docs" / "orchestration"
SCHEMA_DIR = REPO_ROOT / "docs" / "orchestration" / "schemas" SCHEMA_DIR = REPO_ROOT / "docs" / "orchestration" / "schemas"
ISSUE_CATALOG_PATH = REPO_ROOT / "docs" / "orchestration" / "issue_catalog.json" ISSUE_CATALOG_PATH = REPO_ROOT / "docs" / "orchestration" / "issue_catalog.json"
DETECTOR_REGISTRY_PATH = REPO_ROOT / "docs" / "orchestration" / "detector_registry.json" DETECTOR_REGISTRY_PATH = REPO_ROOT / "docs" / "orchestration" / "detector_registry.json"
CONTRACTS_DIR = REPO_ROOT / "docs" / "orchestration" / "contracts" CONTRACTS_DIR = REPO_ROOT / "docs" / "orchestration" / "contracts"
BUSINESS_ANSWER_CONTRACT_SCHEMA_VERSION = "business_answer_contract_v1"
DOMAIN_SCENARIO_PACK_SCHEMA_VERSION = "domain_scenario_pack_v1"
EXPECTED_SCHEMA_FILES = { EXPECTED_SCHEMA_FILES = {
"agent_issue_catalog.schema.json": "Agent Issue Catalog", "agent_issue_catalog.schema.json": "Agent Issue Catalog",
"agent_detector_registry.schema.json": "Agent Detector Registry", "agent_detector_registry.schema.json": "Agent Detector Registry",
"agent_detector_results.schema.json": "Agent Detector Results", "agent_detector_results.schema.json": "Agent Detector Results",
"auto_coder_gate.schema.json": "Auto-Coder Gate", "auto_coder_gate.schema.json": "Auto-Coder Gate",
"business_answer_contract.schema.json": "Business Answer Contract",
"business_audit_contract.schema.json": "Business Audit Contract", "business_audit_contract.schema.json": "Business Audit Contract",
"domain_scenario_pack.schema.json": "Domain Scenario Pack",
"domain_loop_lead_coder_handoff.schema.json": "Domain Loop Lead Coder Handoff", "domain_loop_lead_coder_handoff.schema.json": "Domain Loop Lead Coder Handoff",
} }
AUTO_CODER_ALLOWED_ISSUE_CODES = { AUTO_CODER_ALLOWED_ISSUE_CODES = {
@ -109,6 +114,72 @@ def collect_contract_detector_refs(contracts_dir: Path) -> tuple[dict[str, list[
return refs, warnings return refs, warnings
def check_answer_contracts(contracts_dir: Path) -> tuple[dict[str, Any], list[str], list[str], set[str]]:
failures: list[str] = []
warnings: list[str] = []
contract_ids: set[str] = set()
contract_paths: list[str] = []
if not contracts_dir.exists():
return {"path": display_path(contracts_dir), "exists": False}, ["missing_answer_contracts_dir"], warnings, contract_ids
for path in sorted(contracts_dir.glob("*.json")):
display = display_path(path)
contract_paths.append(display)
try:
payload = read_json(path)
except json.JSONDecodeError as error:
failures.append(f"invalid_answer_contract_json:{display}:{error.msg}")
continue
if not isinstance(payload, dict):
failures.append(f"answer_contract_not_object:{display}")
continue
schema_version = str(payload.get("schema_version") or "").strip()
contract_id = str(payload.get("contract_id") or "").strip()
if schema_version != BUSINESS_ANSWER_CONTRACT_SCHEMA_VERSION:
failures.append(f"answer_contract_schema_version_mismatch:{display}:{schema_version or 'empty'}")
if not contract_id:
failures.append(f"answer_contract_missing_contract_id:{display}")
else:
if contract_id in contract_ids:
failures.append(f"answer_contract_duplicate_contract_id:{contract_id}")
contract_ids.add(contract_id)
if path.stem != contract_id:
warnings.append(f"answer_contract_filename_mismatch:{display}:{contract_id}")
if not str(payload.get("domain") or "").strip():
failures.append(f"answer_contract_missing_domain:{display}")
answer_surface = payload.get("answer_surface") if isinstance(payload.get("answer_surface"), dict) else {}
if not answer_surface:
failures.append(f"answer_contract_missing_answer_surface:{display}")
continue
required_fields = answer_surface.get("required_fields")
if not isinstance(required_fields, list) or not required_fields:
failures.append(f"answer_contract_missing_required_fields:{display}")
else:
for index, field in enumerate(required_fields):
if not isinstance(field, dict):
failures.append(f"answer_contract_required_field_not_object:{display}:{index}")
continue
if not str(field.get("field") or "").strip():
failures.append(f"answer_contract_required_field_missing_name:{display}:{index}")
if not str(field.get("meaning") or "").strip():
failures.append(f"answer_contract_required_field_missing_meaning:{display}:{index}")
if not normalize_string_list(payload.get("detectors")):
failures.append(f"answer_contract_missing_detectors:{display}")
summary = {
"path": display_path(contracts_dir),
"exists": True,
"contract_count": len(contract_ids),
"contract_ids": sorted(contract_ids),
"contract_paths": contract_paths,
}
if not contract_ids:
failures.append("answer_contracts_empty")
return summary, failures, warnings, contract_ids
def is_broad_patch_target(value: str) -> bool: def is_broad_patch_target(value: str) -> bool:
normalized = value.strip().replace("\\", "/").lower() normalized = value.strip().replace("\\", "/").lower()
broad_targets = { broad_targets = {
@ -307,23 +378,161 @@ def check_detector_registry(
return summary, failures, warnings return summary, failures, warnings
def scenario_pack_paths(orchestration_dir: Path) -> tuple[list[tuple[Path, dict[str, Any]]], list[str]]:
warnings: list[str] = []
packs: list[tuple[Path, dict[str, Any]]] = []
if not orchestration_dir.exists():
return packs, ["domain_scenario_pack_dir_missing"]
for path in sorted(orchestration_dir.glob("*.json")):
try:
payload = read_json(path)
except json.JSONDecodeError as error:
warnings.append(f"domain_scenario_pack_scan_invalid_json:{display_path(path)}:{error.msg}")
continue
if isinstance(payload, dict) and payload.get("schema_version") == DOMAIN_SCENARIO_PACK_SCHEMA_VERSION:
packs.append((path, payload))
return packs, warnings
def check_domain_scenario_packs(
orchestration_dir: Path,
known_contract_ids: set[str],
detector_registry: dict[str, Any] | None = None,
) -> tuple[dict[str, Any], list[str], list[str]]:
failures: list[str] = []
warnings: list[str] = []
packs, scan_warnings = scenario_pack_paths(orchestration_dir)
warnings.extend(scan_warnings)
known_detectors = set()
if isinstance(detector_registry, dict):
detectors = detector_registry.get("detectors") if isinstance(detector_registry.get("detectors"), dict) else {}
known_detectors = set(str(name) for name in detectors)
pack_ids: list[str] = []
contract_bound_pack_count = 0
step_count = 0
contract_bound_step_count = 0
wrong_domain_trap_step_count = 0
for path, pack in packs:
display = display_path(path)
pack_id = str(pack.get("pack_id") or "").strip()
if not pack_id:
failures.append(f"domain_scenario_pack_missing_pack_id:{display}")
else:
pack_ids.append(pack_id)
if not str(pack.get("domain") or "").strip():
failures.append(f"domain_scenario_pack_missing_domain:{display}")
scenarios = pack.get("scenarios")
if not isinstance(scenarios, list) or not scenarios:
failures.append(f"domain_scenario_pack_missing_scenarios:{display}")
continue
source_contract_id = str(pack.get("source_contract_id") or "").strip()
if source_contract_id:
contract_bound_pack_count += 1
if source_contract_id not in known_contract_ids:
failures.append(f"domain_scenario_pack_unknown_source_contract:{display}:{source_contract_id}")
if not isinstance(pack.get("acceptance"), dict) or not pack.get("acceptance"):
failures.append(f"domain_scenario_pack_missing_acceptance:{display}:{source_contract_id}")
if not normalize_string_list(pack.get("detectors_under_test")):
failures.append(f"domain_scenario_pack_missing_detectors_under_test:{display}:{source_contract_id}")
analysis_context = pack.get("analysis_context") if isinstance(pack.get("analysis_context"), dict) else {}
expected_contract = str(analysis_context.get("expected_business_answer_contract") or "").strip()
if expected_contract and expected_contract not in known_contract_ids:
failures.append(f"domain_scenario_pack_unknown_analysis_contract:{display}:{expected_contract}")
for detector_name in normalize_string_list(pack.get("detectors_under_test")):
if known_detectors and detector_name not in known_detectors:
failures.append(f"domain_scenario_pack_unknown_detector:{display}:{detector_name}")
for scenario in scenarios:
if not isinstance(scenario, dict):
failures.append(f"domain_scenario_pack_scenario_not_object:{display}")
continue
scenario_id = str(scenario.get("scenario_id") or "").strip()
if not scenario_id:
failures.append(f"domain_scenario_pack_scenario_missing_id:{display}")
steps = scenario.get("steps")
if not isinstance(steps, list) or not steps:
failures.append(f"domain_scenario_pack_scenario_missing_steps:{display}:{scenario_id or 'unknown'}")
continue
for step in steps:
step_count += 1
if not isinstance(step, dict):
failures.append(f"domain_scenario_pack_step_not_object:{display}:{scenario_id or 'unknown'}")
continue
step_id = str(step.get("step_id") or "").strip()
if not step_id:
failures.append(f"domain_scenario_pack_step_missing_id:{display}:{scenario_id or 'unknown'}")
if not str(step.get("question") or "").strip():
failures.append(f"domain_scenario_pack_step_missing_question:{display}:{scenario_id or 'unknown'}:{step_id or 'unknown'}")
step_contract = str(
step.get("expected_business_answer_contract") or step.get("required_answer_contract") or ""
).strip()
if source_contract_id:
if not step_contract:
failures.append(
f"domain_scenario_pack_step_missing_expected_contract:{display}:{scenario_id or 'unknown'}:{step_id or 'unknown'}"
)
else:
contract_bound_step_count += 1
if step_contract and step_contract not in known_contract_ids:
failures.append(
f"domain_scenario_pack_step_unknown_contract:{display}:{scenario_id or 'unknown'}:{step_id or 'unknown'}:{step_contract}"
)
tags = normalize_string_list(step.get("semantic_tags"))
if "wrong_domain_trap" in tags:
wrong_domain_trap_step_count += 1
if not normalize_string_list(step.get("forbidden_answer_patterns")):
failures.append(
f"domain_scenario_pack_wrong_domain_trap_missing_forbidden_patterns:{display}:{scenario_id or 'unknown'}:{step_id or 'unknown'}"
)
return (
{
"path": display_path(orchestration_dir),
"exists": orchestration_dir.exists(),
"pack_count": len(packs),
"pack_ids": sorted(pack_ids),
"contract_bound_pack_count": contract_bound_pack_count,
"step_count": step_count,
"contract_bound_step_count": contract_bound_step_count,
"wrong_domain_trap_step_count": wrong_domain_trap_step_count,
},
failures,
warnings,
)
def build_healthcheck() -> dict[str, Any]: def build_healthcheck() -> dict[str, Any]:
schema_files, schema_failures = check_schema_files(SCHEMA_DIR) schema_files, schema_failures = check_schema_files(SCHEMA_DIR)
answer_contracts, contract_failures, contract_warnings, contract_ids = check_answer_contracts(CONTRACTS_DIR)
issue_catalog, catalog_failures, catalog_warnings = check_issue_catalog(ISSUE_CATALOG_PATH) issue_catalog, catalog_failures, catalog_warnings = check_issue_catalog(ISSUE_CATALOG_PATH)
issue_catalog_payload = read_json_object_or_empty(ISSUE_CATALOG_PATH) issue_catalog_payload = read_json_object_or_empty(ISSUE_CATALOG_PATH)
detector_registry_payload = read_json_object_or_empty(DETECTOR_REGISTRY_PATH)
detector_registry, detector_failures, detector_warnings = check_detector_registry( detector_registry, detector_failures, detector_warnings = check_detector_registry(
DETECTOR_REGISTRY_PATH, DETECTOR_REGISTRY_PATH,
issue_catalog_payload, issue_catalog_payload,
) )
failures = schema_failures + catalog_failures + detector_failures domain_packs, domain_pack_failures, domain_pack_warnings = check_domain_scenario_packs(
warnings = catalog_warnings + detector_warnings ORCHESTRATION_DIR,
contract_ids,
detector_registry_payload,
)
failures = schema_failures + contract_failures + catalog_failures + detector_failures + domain_pack_failures
warnings = contract_warnings + catalog_warnings + detector_warnings + domain_pack_warnings
return { return {
"schema_version": "agent_reliability_contract_health_v1", "schema_version": "agent_reliability_contract_health_v1",
"status": "pass" if not failures else "fail", "status": "pass" if not failures else "fail",
"checked_at": datetime.now(timezone.utc).replace(microsecond=0).isoformat(), "checked_at": datetime.now(timezone.utc).replace(microsecond=0).isoformat(),
"schema_files": schema_files, "schema_files": schema_files,
"answer_contracts": answer_contracts,
"issue_catalog": issue_catalog, "issue_catalog": issue_catalog,
"detector_registry": detector_registry, "detector_registry": detector_registry,
"domain_scenario_packs": domain_packs,
"failures": failures, "failures": failures,
"warnings": warnings, "warnings": warnings,
} }

View File

@ -14,8 +14,19 @@ from typing import Any
from urllib.error import HTTPError, URLError from urllib.error import HTTPError, URLError
from urllib.request import Request, urlopen from urllib.request import Request, urlopen
try:
import agent_runtime_manifest as runtime_manifest import agent_runtime_manifest as runtime_manifest
except ModuleNotFoundError as error:
if error.name != "agent_runtime_manifest":
raise
from scripts import agent_runtime_manifest as runtime_manifest
try:
import agent_detector_runner import agent_detector_runner
except ModuleNotFoundError as error:
if error.name != "agent_detector_runner":
raise
from scripts import agent_detector_runner
REPO_ROOT = Path(__file__).resolve().parent.parent REPO_ROOT = Path(__file__).resolve().parent.parent
DEFAULT_ARTIFACTS_ROOT = REPO_ROOT / "artifacts" / "domain_runs" DEFAULT_ARTIFACTS_ROOT = REPO_ROOT / "artifacts" / "domain_runs"
@ -651,6 +662,22 @@ def merge_scenario_date_scope(
current = current_date_scope if isinstance(current_date_scope, dict) else None current = current_date_scope if isinstance(current_date_scope, dict) else None
if not current: if not current:
return previous or current_date_scope return previous or current_date_scope
if previous and depends_on:
previous_source = str(previous.get("source") or "").strip()
current_source = str(current.get("source") or "").strip()
weak_current_sources = {
"current_analysis",
"active_domain_contract_default",
"domain_default",
"scenario_manifest_default",
}
stale_previous_sources = {"scenario_state_carryover"}
if current_source in weak_current_sources and previous_source not in stale_previous_sources:
merged = dict(current)
for key in ("as_of_date", "period_from", "period_to"):
if previous.get(key):
merged[key] = previous.get(key)
return merged
return current return current

View File

@ -18,6 +18,11 @@ class AgentReliabilityContractHealthcheckTests(unittest.TestCase):
self.assertEqual(result["status"], "pass") self.assertEqual(result["status"], "pass")
self.assertEqual(result["failures"], []) self.assertEqual(result["failures"], [])
self.assertIn("margin_profitability_v1", result["answer_contracts"]["contract_ids"])
self.assertIn(
"agent_margin_profitability_reliability_20260524",
result["domain_scenario_packs"]["pack_ids"],
)
def test_issue_catalog_healthcheck_blocks_auto_coder_issue_without_contract(self) -> None: def test_issue_catalog_healthcheck_blocks_auto_coder_issue_without_contract(self) -> None:
with tempfile.TemporaryDirectory() as tmp: with tempfile.TemporaryDirectory() as tmp:
@ -216,6 +221,128 @@ class AgentReliabilityContractHealthcheckTests(unittest.TestCase):
failures, failures,
) )
def test_answer_contract_healthcheck_blocks_missing_required_fields(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
contracts_dir = Path(tmp) / "contracts"
contracts_dir.mkdir()
(contracts_dir / "demo_contract.json").write_text(
json.dumps(
{
"schema_version": "business_answer_contract_v1",
"contract_id": "demo_contract",
"domain": "demo",
"answer_surface": {"required_fields": []},
"detectors": ["demo_detector"],
}
),
encoding="utf-8",
)
_, failures, _, contract_ids = health.check_answer_contracts(contracts_dir)
self.assertEqual(contract_ids, {"demo_contract"})
self.assertTrue(
any(failure.endswith("contracts\\demo_contract.json") for failure in failures),
failures,
)
def test_domain_scenario_pack_blocks_unknown_source_contract(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
orchestration_dir = Path(tmp)
(orchestration_dir / "demo_pack.json").write_text(
json.dumps(
{
"schema_version": "domain_scenario_pack_v1",
"pack_id": "demo_pack",
"domain": "demo",
"source_contract_id": "missing_contract",
"detectors_under_test": ["demo_detector"],
"acceptance": {"min_score": 80},
"scenarios": [
{
"scenario_id": "demo_scenario",
"steps": [
{
"step_id": "step_01",
"question": "Question?",
"expected_business_answer_contract": "missing_contract",
}
],
}
],
}
),
encoding="utf-8",
)
_, failures, _ = health.check_domain_scenario_packs(
orchestration_dir,
{"known_contract"},
{"detectors": {"demo_detector": {}}},
)
self.assertTrue(
any(
failure.startswith("domain_scenario_pack_unknown_source_contract:")
and failure.endswith("demo_pack.json:missing_contract")
for failure in failures
),
failures,
)
self.assertTrue(
any(
failure.startswith("domain_scenario_pack_step_unknown_contract:")
and failure.endswith("demo_pack.json:demo_scenario:step_01:missing_contract")
for failure in failures
),
failures,
)
def test_domain_scenario_pack_blocks_wrong_domain_trap_without_forbidden_patterns(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
orchestration_dir = Path(tmp)
(orchestration_dir / "demo_pack.json").write_text(
json.dumps(
{
"schema_version": "domain_scenario_pack_v1",
"pack_id": "demo_pack",
"domain": "demo",
"source_contract_id": "demo_contract",
"detectors_under_test": ["demo_detector"],
"acceptance": {"min_score": 80},
"scenarios": [
{
"scenario_id": "demo_scenario",
"steps": [
{
"step_id": "step_01",
"question": "Question?",
"expected_business_answer_contract": "demo_contract",
"semantic_tags": ["wrong_domain_trap"],
}
],
}
],
}
),
encoding="utf-8",
)
_, failures, _ = health.check_domain_scenario_packs(
orchestration_dir,
{"demo_contract"},
{"detectors": {"demo_detector": {}}},
)
self.assertTrue(
any(
failure.startswith("domain_scenario_pack_wrong_domain_trap_missing_forbidden_patterns:")
and failure.endswith("demo_pack.json:demo_scenario:step_01")
for failure in failures
),
failures,
)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()