Усилить reliability pack маржинальности
This commit is contained in:
parent
21bc9e953b
commit
c998664869
|
|
@ -3,8 +3,28 @@
|
||||||
"pack_id": "agent_margin_profitability_reliability_20260524",
|
"pack_id": "agent_margin_profitability_reliability_20260524",
|
||||||
"domain": "margin_profitability",
|
"domain": "margin_profitability",
|
||||||
"title": "AGENT | margin profitability wrong-domain traps",
|
"title": "AGENT | margin profitability wrong-domain traps",
|
||||||
"description": "Минимальный reliability pack для проверки, что вопросы про маржинальность номенклатуры не утекают в ОС, амортизацию, банк, оплаты или взаиморасчёты.",
|
"description": "Минимальный reliability pack для проверки, что вопросы про маржинальность номенклатуры не утекают в ОС, амортизацию, банк, оплаты или взаиморасчеты.",
|
||||||
"source_contract_id": "margin_profitability_v1",
|
"source_contract_id": "margin_profitability_v1",
|
||||||
|
"issue_codes_under_test": [
|
||||||
|
"margin_domain_leak_accounting_route",
|
||||||
|
"business_next_step_missing",
|
||||||
|
"technical_garbage_in_answer"
|
||||||
|
],
|
||||||
|
"detectors_under_test": [
|
||||||
|
"margin_domain_leak_accounting_route",
|
||||||
|
"margin_required_fields_missing",
|
||||||
|
"margin_next_action_missing",
|
||||||
|
"margin_payment_document_false_source",
|
||||||
|
"margin_os_amortization_leak",
|
||||||
|
"runtime_tokens_in_user_answer",
|
||||||
|
"capability_ids_in_user_answer"
|
||||||
|
],
|
||||||
|
"rerun_matrix": [
|
||||||
|
"failed_margin_scenario",
|
||||||
|
"margin_neighbor_pack",
|
||||||
|
"wrong_domain_trap_pack",
|
||||||
|
"accepted_smoke_pack"
|
||||||
|
],
|
||||||
"bindings": {
|
"bindings": {
|
||||||
"period": "2020 год",
|
"period": "2020 год",
|
||||||
"item": "товар"
|
"item": "товар"
|
||||||
|
|
@ -18,6 +38,24 @@
|
||||||
"wrong_domain_traps"
|
"wrong_domain_traps"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
"acceptance": {
|
||||||
|
"min_score": 80,
|
||||||
|
"max_unresolved_p0": 0,
|
||||||
|
"require_all_critical_steps_pass": true,
|
||||||
|
"must_have": [
|
||||||
|
"direct_answer_first",
|
||||||
|
"period_or_honest_period_clarification",
|
||||||
|
"revenue_cogs_gross_profit_margin_or_honest_unknown",
|
||||||
|
"next_action_if_limited",
|
||||||
|
"detector_results_fail_or_review_on_real_defect"
|
||||||
|
],
|
||||||
|
"must_not_have": [
|
||||||
|
"fixed_assets_leak",
|
||||||
|
"amortization_leak",
|
||||||
|
"payment_document_as_margin_source",
|
||||||
|
"route_or_capability_ids_in_user_answer"
|
||||||
|
]
|
||||||
|
},
|
||||||
"scenarios": [
|
"scenarios": [
|
||||||
{
|
{
|
||||||
"scenario_id": "margin_root_wrong_domain_trap",
|
"scenario_id": "margin_root_wrong_domain_trap",
|
||||||
|
|
@ -27,7 +65,16 @@
|
||||||
"step_id": "step_01",
|
"step_id": "step_01",
|
||||||
"title": "Маржинальность номенклатуры",
|
"title": "Маржинальность номенклатуры",
|
||||||
"question": "Какая номенклатура была самой маржинальной за {{bindings.period}}?",
|
"question": "Какая номенклатура была самой маржинальной за {{bindings.period}}?",
|
||||||
"semantic_tags": ["margin_profitability", "inventory", "wrong_domain_trap"],
|
"semantic_tags": [
|
||||||
|
"margin_profitability",
|
||||||
|
"inventory",
|
||||||
|
"wrong_domain_trap"
|
||||||
|
],
|
||||||
|
"expected_intents": [
|
||||||
|
"inventory_margin_ranking_for_nomenclature"
|
||||||
|
],
|
||||||
|
"expected_capability": "inventory_inventory_margin_ranking_for_nomenclature",
|
||||||
|
"expected_recipe": "address_inventory_margin_ranking_for_nomenclature_v1",
|
||||||
"expected_result_mode": "ranking_or_limited_accounting_answer",
|
"expected_result_mode": "ranking_or_limited_accounting_answer",
|
||||||
"expected_business_answer_contract": "margin_profitability_v1",
|
"expected_business_answer_contract": "margin_profitability_v1",
|
||||||
"required_answer_shape": "direct_answer_first",
|
"required_answer_shape": "direct_answer_first",
|
||||||
|
|
@ -37,7 +84,7 @@
|
||||||
"forbidden_answer_patterns": [
|
"forbidden_answer_patterns": [
|
||||||
"(?i)(амортизац|основн(ые|ых)? средств|объект ОС|оплат[аы]|банк|settlement|payment_document)"
|
"(?i)(амортизац|основн(ые|ых)? средств|объект ОС|оплат[аы]|банк|settlement|payment_document)"
|
||||||
],
|
],
|
||||||
"notes": "Если точного расчёта нет, допустим честный limited answer, но не уход в ОС/банк/оплаты."
|
"notes": "Если точного расчета нет, допустим честный limited answer, но не уход в ОС/банк/оплаты."
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
@ -49,10 +96,21 @@
|
||||||
"step_id": "step_01",
|
"step_id": "step_01",
|
||||||
"title": "Запрос маржинальности",
|
"title": "Запрос маржинальности",
|
||||||
"question": "Покажи топ товаров по марже за {{bindings.period}}.",
|
"question": "Покажи топ товаров по марже за {{bindings.period}}.",
|
||||||
"semantic_tags": ["margin_profitability", "inventory"],
|
"semantic_tags": [
|
||||||
|
"margin_profitability",
|
||||||
|
"inventory"
|
||||||
|
],
|
||||||
|
"expected_intents": [
|
||||||
|
"inventory_margin_ranking_for_nomenclature"
|
||||||
|
],
|
||||||
|
"expected_capability": "inventory_inventory_margin_ranking_for_nomenclature",
|
||||||
|
"expected_recipe": "address_inventory_margin_ranking_for_nomenclature_v1",
|
||||||
"expected_result_mode": "ranking_or_limited_accounting_answer",
|
"expected_result_mode": "ranking_or_limited_accounting_answer",
|
||||||
"expected_business_answer_contract": "margin_profitability_v1",
|
"expected_business_answer_contract": "margin_profitability_v1",
|
||||||
"required_answer_shape": "direct_answer_first",
|
"required_answer_shape": "direct_answer_first",
|
||||||
|
"required_answer_patterns_any": [
|
||||||
|
"(?i)(марж|прибыл|выруч|себестоим|не могу подтвердить|не хватает)"
|
||||||
|
],
|
||||||
"forbidden_answer_patterns": [
|
"forbidden_answer_patterns": [
|
||||||
"(?i)(амортизац|объект ОС|payment_document|settlement)"
|
"(?i)(амортизац|объект ОС|payment_document|settlement)"
|
||||||
]
|
]
|
||||||
|
|
@ -61,8 +119,14 @@
|
||||||
"step_id": "step_02",
|
"step_id": "step_02",
|
||||||
"title": "Почему именно так",
|
"title": "Почему именно так",
|
||||||
"question": "А из чего ты это посчитал и чего не хватает для точной маржи?",
|
"question": "А из чего ты это посчитал и чего не хватает для точной маржи?",
|
||||||
"depends_on": ["step_01"],
|
"depends_on": [
|
||||||
"semantic_tags": ["margin_profitability", "evidence", "scope_guard"],
|
"step_01"
|
||||||
|
],
|
||||||
|
"semantic_tags": [
|
||||||
|
"margin_profitability",
|
||||||
|
"evidence",
|
||||||
|
"scope_guard"
|
||||||
|
],
|
||||||
"expected_result_mode": "evidence_or_honest_boundary",
|
"expected_result_mode": "evidence_or_honest_boundary",
|
||||||
"expected_business_answer_contract": "margin_profitability_v1",
|
"expected_business_answer_contract": "margin_profitability_v1",
|
||||||
"required_answer_shape": "direct_answer_first",
|
"required_answer_shape": "direct_answer_first",
|
||||||
|
|
@ -74,6 +138,60 @@
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"scenario_id": "margin_false_source_traps",
|
||||||
|
"title": "Margin must reject payment and fixed-assets false sources",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"step_id": "step_01_payment_source_trap",
|
||||||
|
"title": "Оплаты не являются источником маржи",
|
||||||
|
"question": "Можно быстро понять маржинальность товаров за {{bindings.period}} по оплатам и банку?",
|
||||||
|
"semantic_tags": [
|
||||||
|
"margin_profitability",
|
||||||
|
"wrong_domain_trap",
|
||||||
|
"payment_false_source"
|
||||||
|
],
|
||||||
|
"expected_result_mode": "honest_boundary_with_next_action",
|
||||||
|
"expected_business_answer_contract": "margin_profitability_v1",
|
||||||
|
"required_answer_shape": "direct_answer_first",
|
||||||
|
"required_answer_patterns_any": [
|
||||||
|
"(?i)(оплат|банк).{0,120}(не источник|нельзя|недостаточ|не подтвержд)",
|
||||||
|
"(?i)(выруч|себестоим|валов|марж)"
|
||||||
|
],
|
||||||
|
"forbidden_answer_patterns": [
|
||||||
|
"(?i)(оплат[аы]|банк|payment_document).{0,80}(источник|достаточ|посчитал|марж[ау])",
|
||||||
|
"(?i)(route_id|capability_id|runtime_|debug)"
|
||||||
|
],
|
||||||
|
"notes": "Платежи могут помогать сверить денежный поток, но не являются достаточной базой для валовой маржи по номенклатуре."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"step_id": "step_02_fixed_asset_source_trap",
|
||||||
|
"title": "ОС и амортизация не должны попасть в товарную маржу",
|
||||||
|
"question": "Посчитай маржинальность товарной номенклатуры за {{bindings.period}}, не ОС и не амортизацию.",
|
||||||
|
"semantic_tags": [
|
||||||
|
"margin_profitability",
|
||||||
|
"wrong_domain_trap",
|
||||||
|
"fixed_asset_false_source"
|
||||||
|
],
|
||||||
|
"expected_intents": [
|
||||||
|
"inventory_margin_ranking_for_nomenclature"
|
||||||
|
],
|
||||||
|
"expected_capability": "inventory_inventory_margin_ranking_for_nomenclature",
|
||||||
|
"expected_recipe": "address_inventory_margin_ranking_for_nomenclature_v1",
|
||||||
|
"expected_result_mode": "ranking_or_limited_accounting_answer",
|
||||||
|
"expected_business_answer_contract": "margin_profitability_v1",
|
||||||
|
"required_answer_shape": "direct_answer_first",
|
||||||
|
"required_answer_patterns_any": [
|
||||||
|
"(?i)(марж|выруч|себестоим|валов|не могу подтвердить|не хватает)"
|
||||||
|
],
|
||||||
|
"forbidden_answer_patterns": [
|
||||||
|
"(?i)(амортизац|объект ОС|основн(ые|ых)? средств).{0,80}(марж|себестоим|валов)",
|
||||||
|
"(?i)(route_id|capability_id|runtime_|debug)"
|
||||||
|
],
|
||||||
|
"notes": "Даже если пользователь сам сказал `не ОС`, runtime должен удержать товарный контур и не строить ответ вокруг амортизации."
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,81 @@
|
||||||
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"title": "Business Answer Contract",
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": true,
|
||||||
|
"required": ["schema_version", "contract_id", "domain", "answer_surface", "detectors"],
|
||||||
|
"properties": {
|
||||||
|
"schema_version": {
|
||||||
|
"const": "business_answer_contract_v1"
|
||||||
|
},
|
||||||
|
"contract_id": {
|
||||||
|
"type": "string",
|
||||||
|
"minLength": 1
|
||||||
|
},
|
||||||
|
"domain": {
|
||||||
|
"type": "string",
|
||||||
|
"minLength": 1
|
||||||
|
},
|
||||||
|
"title": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"purpose": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"answer_surface": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": true,
|
||||||
|
"required": ["required_fields"],
|
||||||
|
"properties": {
|
||||||
|
"must_start_with": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"required_fields": {
|
||||||
|
"type": "array",
|
||||||
|
"minItems": 1,
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": true,
|
||||||
|
"required": ["field", "meaning"],
|
||||||
|
"properties": {
|
||||||
|
"field": {
|
||||||
|
"type": "string",
|
||||||
|
"minLength": 1
|
||||||
|
},
|
||||||
|
"meaning": {
|
||||||
|
"type": "string",
|
||||||
|
"minLength": 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"must_not_contain": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"limited_answer_rule": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"root_layers": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"detectors": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"minItems": 1
|
||||||
|
},
|
||||||
|
"acceptance": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,110 @@
|
||||||
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"title": "Domain Scenario Pack",
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": true,
|
||||||
|
"required": ["schema_version", "pack_id", "domain", "scenarios"],
|
||||||
|
"properties": {
|
||||||
|
"schema_version": {
|
||||||
|
"const": "domain_scenario_pack_v1"
|
||||||
|
},
|
||||||
|
"pack_id": {
|
||||||
|
"type": "string",
|
||||||
|
"minLength": 1
|
||||||
|
},
|
||||||
|
"domain": {
|
||||||
|
"type": "string",
|
||||||
|
"minLength": 1
|
||||||
|
},
|
||||||
|
"title": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"description": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"source_contract_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"issue_codes_under_test": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"detectors_under_test": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"rerun_matrix": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"analysis_context": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": true
|
||||||
|
},
|
||||||
|
"acceptance": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": true
|
||||||
|
},
|
||||||
|
"scenarios": {
|
||||||
|
"type": "array",
|
||||||
|
"minItems": 1,
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": true,
|
||||||
|
"required": ["scenario_id", "steps"],
|
||||||
|
"properties": {
|
||||||
|
"scenario_id": {
|
||||||
|
"type": "string",
|
||||||
|
"minLength": 1
|
||||||
|
},
|
||||||
|
"title": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"steps": {
|
||||||
|
"type": "array",
|
||||||
|
"minItems": 1,
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": true,
|
||||||
|
"required": ["step_id", "question"],
|
||||||
|
"properties": {
|
||||||
|
"step_id": {
|
||||||
|
"type": "string",
|
||||||
|
"minLength": 1
|
||||||
|
},
|
||||||
|
"title": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"question": {
|
||||||
|
"type": "string",
|
||||||
|
"minLength": 1
|
||||||
|
},
|
||||||
|
"expected_business_answer_contract": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"semantic_tags": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"forbidden_answer_patterns": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -9,16 +9,21 @@ from typing import Any
|
||||||
|
|
||||||
|
|
||||||
REPO_ROOT = Path(__file__).resolve().parent.parent
|
REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||||
|
ORCHESTRATION_DIR = REPO_ROOT / "docs" / "orchestration"
|
||||||
SCHEMA_DIR = REPO_ROOT / "docs" / "orchestration" / "schemas"
|
SCHEMA_DIR = REPO_ROOT / "docs" / "orchestration" / "schemas"
|
||||||
ISSUE_CATALOG_PATH = REPO_ROOT / "docs" / "orchestration" / "issue_catalog.json"
|
ISSUE_CATALOG_PATH = REPO_ROOT / "docs" / "orchestration" / "issue_catalog.json"
|
||||||
DETECTOR_REGISTRY_PATH = REPO_ROOT / "docs" / "orchestration" / "detector_registry.json"
|
DETECTOR_REGISTRY_PATH = REPO_ROOT / "docs" / "orchestration" / "detector_registry.json"
|
||||||
CONTRACTS_DIR = REPO_ROOT / "docs" / "orchestration" / "contracts"
|
CONTRACTS_DIR = REPO_ROOT / "docs" / "orchestration" / "contracts"
|
||||||
|
BUSINESS_ANSWER_CONTRACT_SCHEMA_VERSION = "business_answer_contract_v1"
|
||||||
|
DOMAIN_SCENARIO_PACK_SCHEMA_VERSION = "domain_scenario_pack_v1"
|
||||||
EXPECTED_SCHEMA_FILES = {
|
EXPECTED_SCHEMA_FILES = {
|
||||||
"agent_issue_catalog.schema.json": "Agent Issue Catalog",
|
"agent_issue_catalog.schema.json": "Agent Issue Catalog",
|
||||||
"agent_detector_registry.schema.json": "Agent Detector Registry",
|
"agent_detector_registry.schema.json": "Agent Detector Registry",
|
||||||
"agent_detector_results.schema.json": "Agent Detector Results",
|
"agent_detector_results.schema.json": "Agent Detector Results",
|
||||||
"auto_coder_gate.schema.json": "Auto-Coder Gate",
|
"auto_coder_gate.schema.json": "Auto-Coder Gate",
|
||||||
|
"business_answer_contract.schema.json": "Business Answer Contract",
|
||||||
"business_audit_contract.schema.json": "Business Audit Contract",
|
"business_audit_contract.schema.json": "Business Audit Contract",
|
||||||
|
"domain_scenario_pack.schema.json": "Domain Scenario Pack",
|
||||||
"domain_loop_lead_coder_handoff.schema.json": "Domain Loop Lead Coder Handoff",
|
"domain_loop_lead_coder_handoff.schema.json": "Domain Loop Lead Coder Handoff",
|
||||||
}
|
}
|
||||||
AUTO_CODER_ALLOWED_ISSUE_CODES = {
|
AUTO_CODER_ALLOWED_ISSUE_CODES = {
|
||||||
|
|
@ -109,6 +114,72 @@ def collect_contract_detector_refs(contracts_dir: Path) -> tuple[dict[str, list[
|
||||||
return refs, warnings
|
return refs, warnings
|
||||||
|
|
||||||
|
|
||||||
|
def check_answer_contracts(contracts_dir: Path) -> tuple[dict[str, Any], list[str], list[str], set[str]]:
|
||||||
|
failures: list[str] = []
|
||||||
|
warnings: list[str] = []
|
||||||
|
contract_ids: set[str] = set()
|
||||||
|
contract_paths: list[str] = []
|
||||||
|
if not contracts_dir.exists():
|
||||||
|
return {"path": display_path(contracts_dir), "exists": False}, ["missing_answer_contracts_dir"], warnings, contract_ids
|
||||||
|
|
||||||
|
for path in sorted(contracts_dir.glob("*.json")):
|
||||||
|
display = display_path(path)
|
||||||
|
contract_paths.append(display)
|
||||||
|
try:
|
||||||
|
payload = read_json(path)
|
||||||
|
except json.JSONDecodeError as error:
|
||||||
|
failures.append(f"invalid_answer_contract_json:{display}:{error.msg}")
|
||||||
|
continue
|
||||||
|
if not isinstance(payload, dict):
|
||||||
|
failures.append(f"answer_contract_not_object:{display}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
schema_version = str(payload.get("schema_version") or "").strip()
|
||||||
|
contract_id = str(payload.get("contract_id") or "").strip()
|
||||||
|
if schema_version != BUSINESS_ANSWER_CONTRACT_SCHEMA_VERSION:
|
||||||
|
failures.append(f"answer_contract_schema_version_mismatch:{display}:{schema_version or 'empty'}")
|
||||||
|
if not contract_id:
|
||||||
|
failures.append(f"answer_contract_missing_contract_id:{display}")
|
||||||
|
else:
|
||||||
|
if contract_id in contract_ids:
|
||||||
|
failures.append(f"answer_contract_duplicate_contract_id:{contract_id}")
|
||||||
|
contract_ids.add(contract_id)
|
||||||
|
if path.stem != contract_id:
|
||||||
|
warnings.append(f"answer_contract_filename_mismatch:{display}:{contract_id}")
|
||||||
|
if not str(payload.get("domain") or "").strip():
|
||||||
|
failures.append(f"answer_contract_missing_domain:{display}")
|
||||||
|
|
||||||
|
answer_surface = payload.get("answer_surface") if isinstance(payload.get("answer_surface"), dict) else {}
|
||||||
|
if not answer_surface:
|
||||||
|
failures.append(f"answer_contract_missing_answer_surface:{display}")
|
||||||
|
continue
|
||||||
|
required_fields = answer_surface.get("required_fields")
|
||||||
|
if not isinstance(required_fields, list) or not required_fields:
|
||||||
|
failures.append(f"answer_contract_missing_required_fields:{display}")
|
||||||
|
else:
|
||||||
|
for index, field in enumerate(required_fields):
|
||||||
|
if not isinstance(field, dict):
|
||||||
|
failures.append(f"answer_contract_required_field_not_object:{display}:{index}")
|
||||||
|
continue
|
||||||
|
if not str(field.get("field") or "").strip():
|
||||||
|
failures.append(f"answer_contract_required_field_missing_name:{display}:{index}")
|
||||||
|
if not str(field.get("meaning") or "").strip():
|
||||||
|
failures.append(f"answer_contract_required_field_missing_meaning:{display}:{index}")
|
||||||
|
if not normalize_string_list(payload.get("detectors")):
|
||||||
|
failures.append(f"answer_contract_missing_detectors:{display}")
|
||||||
|
|
||||||
|
summary = {
|
||||||
|
"path": display_path(contracts_dir),
|
||||||
|
"exists": True,
|
||||||
|
"contract_count": len(contract_ids),
|
||||||
|
"contract_ids": sorted(contract_ids),
|
||||||
|
"contract_paths": contract_paths,
|
||||||
|
}
|
||||||
|
if not contract_ids:
|
||||||
|
failures.append("answer_contracts_empty")
|
||||||
|
return summary, failures, warnings, contract_ids
|
||||||
|
|
||||||
|
|
||||||
def is_broad_patch_target(value: str) -> bool:
|
def is_broad_patch_target(value: str) -> bool:
|
||||||
normalized = value.strip().replace("\\", "/").lower()
|
normalized = value.strip().replace("\\", "/").lower()
|
||||||
broad_targets = {
|
broad_targets = {
|
||||||
|
|
@ -307,23 +378,161 @@ def check_detector_registry(
|
||||||
return summary, failures, warnings
|
return summary, failures, warnings
|
||||||
|
|
||||||
|
|
||||||
|
def scenario_pack_paths(orchestration_dir: Path) -> tuple[list[tuple[Path, dict[str, Any]]], list[str]]:
|
||||||
|
warnings: list[str] = []
|
||||||
|
packs: list[tuple[Path, dict[str, Any]]] = []
|
||||||
|
if not orchestration_dir.exists():
|
||||||
|
return packs, ["domain_scenario_pack_dir_missing"]
|
||||||
|
for path in sorted(orchestration_dir.glob("*.json")):
|
||||||
|
try:
|
||||||
|
payload = read_json(path)
|
||||||
|
except json.JSONDecodeError as error:
|
||||||
|
warnings.append(f"domain_scenario_pack_scan_invalid_json:{display_path(path)}:{error.msg}")
|
||||||
|
continue
|
||||||
|
if isinstance(payload, dict) and payload.get("schema_version") == DOMAIN_SCENARIO_PACK_SCHEMA_VERSION:
|
||||||
|
packs.append((path, payload))
|
||||||
|
return packs, warnings
|
||||||
|
|
||||||
|
|
||||||
|
def check_domain_scenario_packs(
|
||||||
|
orchestration_dir: Path,
|
||||||
|
known_contract_ids: set[str],
|
||||||
|
detector_registry: dict[str, Any] | None = None,
|
||||||
|
) -> tuple[dict[str, Any], list[str], list[str]]:
|
||||||
|
failures: list[str] = []
|
||||||
|
warnings: list[str] = []
|
||||||
|
packs, scan_warnings = scenario_pack_paths(orchestration_dir)
|
||||||
|
warnings.extend(scan_warnings)
|
||||||
|
known_detectors = set()
|
||||||
|
if isinstance(detector_registry, dict):
|
||||||
|
detectors = detector_registry.get("detectors") if isinstance(detector_registry.get("detectors"), dict) else {}
|
||||||
|
known_detectors = set(str(name) for name in detectors)
|
||||||
|
|
||||||
|
pack_ids: list[str] = []
|
||||||
|
contract_bound_pack_count = 0
|
||||||
|
step_count = 0
|
||||||
|
contract_bound_step_count = 0
|
||||||
|
wrong_domain_trap_step_count = 0
|
||||||
|
for path, pack in packs:
|
||||||
|
display = display_path(path)
|
||||||
|
pack_id = str(pack.get("pack_id") or "").strip()
|
||||||
|
if not pack_id:
|
||||||
|
failures.append(f"domain_scenario_pack_missing_pack_id:{display}")
|
||||||
|
else:
|
||||||
|
pack_ids.append(pack_id)
|
||||||
|
if not str(pack.get("domain") or "").strip():
|
||||||
|
failures.append(f"domain_scenario_pack_missing_domain:{display}")
|
||||||
|
scenarios = pack.get("scenarios")
|
||||||
|
if not isinstance(scenarios, list) or not scenarios:
|
||||||
|
failures.append(f"domain_scenario_pack_missing_scenarios:{display}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
source_contract_id = str(pack.get("source_contract_id") or "").strip()
|
||||||
|
if source_contract_id:
|
||||||
|
contract_bound_pack_count += 1
|
||||||
|
if source_contract_id not in known_contract_ids:
|
||||||
|
failures.append(f"domain_scenario_pack_unknown_source_contract:{display}:{source_contract_id}")
|
||||||
|
if not isinstance(pack.get("acceptance"), dict) or not pack.get("acceptance"):
|
||||||
|
failures.append(f"domain_scenario_pack_missing_acceptance:{display}:{source_contract_id}")
|
||||||
|
if not normalize_string_list(pack.get("detectors_under_test")):
|
||||||
|
failures.append(f"domain_scenario_pack_missing_detectors_under_test:{display}:{source_contract_id}")
|
||||||
|
|
||||||
|
analysis_context = pack.get("analysis_context") if isinstance(pack.get("analysis_context"), dict) else {}
|
||||||
|
expected_contract = str(analysis_context.get("expected_business_answer_contract") or "").strip()
|
||||||
|
if expected_contract and expected_contract not in known_contract_ids:
|
||||||
|
failures.append(f"domain_scenario_pack_unknown_analysis_contract:{display}:{expected_contract}")
|
||||||
|
|
||||||
|
for detector_name in normalize_string_list(pack.get("detectors_under_test")):
|
||||||
|
if known_detectors and detector_name not in known_detectors:
|
||||||
|
failures.append(f"domain_scenario_pack_unknown_detector:{display}:{detector_name}")
|
||||||
|
|
||||||
|
for scenario in scenarios:
|
||||||
|
if not isinstance(scenario, dict):
|
||||||
|
failures.append(f"domain_scenario_pack_scenario_not_object:{display}")
|
||||||
|
continue
|
||||||
|
scenario_id = str(scenario.get("scenario_id") or "").strip()
|
||||||
|
if not scenario_id:
|
||||||
|
failures.append(f"domain_scenario_pack_scenario_missing_id:{display}")
|
||||||
|
steps = scenario.get("steps")
|
||||||
|
if not isinstance(steps, list) or not steps:
|
||||||
|
failures.append(f"domain_scenario_pack_scenario_missing_steps:{display}:{scenario_id or 'unknown'}")
|
||||||
|
continue
|
||||||
|
for step in steps:
|
||||||
|
step_count += 1
|
||||||
|
if not isinstance(step, dict):
|
||||||
|
failures.append(f"domain_scenario_pack_step_not_object:{display}:{scenario_id or 'unknown'}")
|
||||||
|
continue
|
||||||
|
step_id = str(step.get("step_id") or "").strip()
|
||||||
|
if not step_id:
|
||||||
|
failures.append(f"domain_scenario_pack_step_missing_id:{display}:{scenario_id or 'unknown'}")
|
||||||
|
if not str(step.get("question") or "").strip():
|
||||||
|
failures.append(f"domain_scenario_pack_step_missing_question:{display}:{scenario_id or 'unknown'}:{step_id or 'unknown'}")
|
||||||
|
|
||||||
|
step_contract = str(
|
||||||
|
step.get("expected_business_answer_contract") or step.get("required_answer_contract") or ""
|
||||||
|
).strip()
|
||||||
|
if source_contract_id:
|
||||||
|
if not step_contract:
|
||||||
|
failures.append(
|
||||||
|
f"domain_scenario_pack_step_missing_expected_contract:{display}:{scenario_id or 'unknown'}:{step_id or 'unknown'}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
contract_bound_step_count += 1
|
||||||
|
if step_contract and step_contract not in known_contract_ids:
|
||||||
|
failures.append(
|
||||||
|
f"domain_scenario_pack_step_unknown_contract:{display}:{scenario_id or 'unknown'}:{step_id or 'unknown'}:{step_contract}"
|
||||||
|
)
|
||||||
|
|
||||||
|
tags = normalize_string_list(step.get("semantic_tags"))
|
||||||
|
if "wrong_domain_trap" in tags:
|
||||||
|
wrong_domain_trap_step_count += 1
|
||||||
|
if not normalize_string_list(step.get("forbidden_answer_patterns")):
|
||||||
|
failures.append(
|
||||||
|
f"domain_scenario_pack_wrong_domain_trap_missing_forbidden_patterns:{display}:{scenario_id or 'unknown'}:{step_id or 'unknown'}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return (
|
||||||
|
{
|
||||||
|
"path": display_path(orchestration_dir),
|
||||||
|
"exists": orchestration_dir.exists(),
|
||||||
|
"pack_count": len(packs),
|
||||||
|
"pack_ids": sorted(pack_ids),
|
||||||
|
"contract_bound_pack_count": contract_bound_pack_count,
|
||||||
|
"step_count": step_count,
|
||||||
|
"contract_bound_step_count": contract_bound_step_count,
|
||||||
|
"wrong_domain_trap_step_count": wrong_domain_trap_step_count,
|
||||||
|
},
|
||||||
|
failures,
|
||||||
|
warnings,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def build_healthcheck() -> dict[str, Any]:
|
def build_healthcheck() -> dict[str, Any]:
|
||||||
schema_files, schema_failures = check_schema_files(SCHEMA_DIR)
|
schema_files, schema_failures = check_schema_files(SCHEMA_DIR)
|
||||||
|
answer_contracts, contract_failures, contract_warnings, contract_ids = check_answer_contracts(CONTRACTS_DIR)
|
||||||
issue_catalog, catalog_failures, catalog_warnings = check_issue_catalog(ISSUE_CATALOG_PATH)
|
issue_catalog, catalog_failures, catalog_warnings = check_issue_catalog(ISSUE_CATALOG_PATH)
|
||||||
issue_catalog_payload = read_json_object_or_empty(ISSUE_CATALOG_PATH)
|
issue_catalog_payload = read_json_object_or_empty(ISSUE_CATALOG_PATH)
|
||||||
|
detector_registry_payload = read_json_object_or_empty(DETECTOR_REGISTRY_PATH)
|
||||||
detector_registry, detector_failures, detector_warnings = check_detector_registry(
|
detector_registry, detector_failures, detector_warnings = check_detector_registry(
|
||||||
DETECTOR_REGISTRY_PATH,
|
DETECTOR_REGISTRY_PATH,
|
||||||
issue_catalog_payload,
|
issue_catalog_payload,
|
||||||
)
|
)
|
||||||
failures = schema_failures + catalog_failures + detector_failures
|
domain_packs, domain_pack_failures, domain_pack_warnings = check_domain_scenario_packs(
|
||||||
warnings = catalog_warnings + detector_warnings
|
ORCHESTRATION_DIR,
|
||||||
|
contract_ids,
|
||||||
|
detector_registry_payload,
|
||||||
|
)
|
||||||
|
failures = schema_failures + contract_failures + catalog_failures + detector_failures + domain_pack_failures
|
||||||
|
warnings = contract_warnings + catalog_warnings + detector_warnings + domain_pack_warnings
|
||||||
return {
|
return {
|
||||||
"schema_version": "agent_reliability_contract_health_v1",
|
"schema_version": "agent_reliability_contract_health_v1",
|
||||||
"status": "pass" if not failures else "fail",
|
"status": "pass" if not failures else "fail",
|
||||||
"checked_at": datetime.now(timezone.utc).replace(microsecond=0).isoformat(),
|
"checked_at": datetime.now(timezone.utc).replace(microsecond=0).isoformat(),
|
||||||
"schema_files": schema_files,
|
"schema_files": schema_files,
|
||||||
|
"answer_contracts": answer_contracts,
|
||||||
"issue_catalog": issue_catalog,
|
"issue_catalog": issue_catalog,
|
||||||
"detector_registry": detector_registry,
|
"detector_registry": detector_registry,
|
||||||
|
"domain_scenario_packs": domain_packs,
|
||||||
"failures": failures,
|
"failures": failures,
|
||||||
"warnings": warnings,
|
"warnings": warnings,
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -14,8 +14,19 @@ from typing import Any
|
||||||
from urllib.error import HTTPError, URLError
|
from urllib.error import HTTPError, URLError
|
||||||
from urllib.request import Request, urlopen
|
from urllib.request import Request, urlopen
|
||||||
|
|
||||||
import agent_runtime_manifest as runtime_manifest
|
try:
|
||||||
import agent_detector_runner
|
import agent_runtime_manifest as runtime_manifest
|
||||||
|
except ModuleNotFoundError as error:
|
||||||
|
if error.name != "agent_runtime_manifest":
|
||||||
|
raise
|
||||||
|
from scripts import agent_runtime_manifest as runtime_manifest
|
||||||
|
|
||||||
|
try:
|
||||||
|
import agent_detector_runner
|
||||||
|
except ModuleNotFoundError as error:
|
||||||
|
if error.name != "agent_detector_runner":
|
||||||
|
raise
|
||||||
|
from scripts import agent_detector_runner
|
||||||
|
|
||||||
REPO_ROOT = Path(__file__).resolve().parent.parent
|
REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||||
DEFAULT_ARTIFACTS_ROOT = REPO_ROOT / "artifacts" / "domain_runs"
|
DEFAULT_ARTIFACTS_ROOT = REPO_ROOT / "artifacts" / "domain_runs"
|
||||||
|
|
@ -651,6 +662,22 @@ def merge_scenario_date_scope(
|
||||||
current = current_date_scope if isinstance(current_date_scope, dict) else None
|
current = current_date_scope if isinstance(current_date_scope, dict) else None
|
||||||
if not current:
|
if not current:
|
||||||
return previous or current_date_scope
|
return previous or current_date_scope
|
||||||
|
if previous and depends_on:
|
||||||
|
previous_source = str(previous.get("source") or "").strip()
|
||||||
|
current_source = str(current.get("source") or "").strip()
|
||||||
|
weak_current_sources = {
|
||||||
|
"current_analysis",
|
||||||
|
"active_domain_contract_default",
|
||||||
|
"domain_default",
|
||||||
|
"scenario_manifest_default",
|
||||||
|
}
|
||||||
|
stale_previous_sources = {"scenario_state_carryover"}
|
||||||
|
if current_source in weak_current_sources and previous_source not in stale_previous_sources:
|
||||||
|
merged = dict(current)
|
||||||
|
for key in ("as_of_date", "period_from", "period_to"):
|
||||||
|
if previous.get(key):
|
||||||
|
merged[key] = previous.get(key)
|
||||||
|
return merged
|
||||||
return current
|
return current
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,11 @@ class AgentReliabilityContractHealthcheckTests(unittest.TestCase):
|
||||||
|
|
||||||
self.assertEqual(result["status"], "pass")
|
self.assertEqual(result["status"], "pass")
|
||||||
self.assertEqual(result["failures"], [])
|
self.assertEqual(result["failures"], [])
|
||||||
|
self.assertIn("margin_profitability_v1", result["answer_contracts"]["contract_ids"])
|
||||||
|
self.assertIn(
|
||||||
|
"agent_margin_profitability_reliability_20260524",
|
||||||
|
result["domain_scenario_packs"]["pack_ids"],
|
||||||
|
)
|
||||||
|
|
||||||
def test_issue_catalog_healthcheck_blocks_auto_coder_issue_without_contract(self) -> None:
|
def test_issue_catalog_healthcheck_blocks_auto_coder_issue_without_contract(self) -> None:
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
|
@ -216,6 +221,128 @@ class AgentReliabilityContractHealthcheckTests(unittest.TestCase):
|
||||||
failures,
|
failures,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_answer_contract_healthcheck_blocks_missing_required_fields(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
contracts_dir = Path(tmp) / "contracts"
|
||||||
|
contracts_dir.mkdir()
|
||||||
|
(contracts_dir / "demo_contract.json").write_text(
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
"schema_version": "business_answer_contract_v1",
|
||||||
|
"contract_id": "demo_contract",
|
||||||
|
"domain": "demo",
|
||||||
|
"answer_surface": {"required_fields": []},
|
||||||
|
"detectors": ["demo_detector"],
|
||||||
|
}
|
||||||
|
),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
_, failures, _, contract_ids = health.check_answer_contracts(contracts_dir)
|
||||||
|
|
||||||
|
self.assertEqual(contract_ids, {"demo_contract"})
|
||||||
|
self.assertTrue(
|
||||||
|
any(failure.endswith("contracts\\demo_contract.json") for failure in failures),
|
||||||
|
failures,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_domain_scenario_pack_blocks_unknown_source_contract(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
orchestration_dir = Path(tmp)
|
||||||
|
(orchestration_dir / "demo_pack.json").write_text(
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
"schema_version": "domain_scenario_pack_v1",
|
||||||
|
"pack_id": "demo_pack",
|
||||||
|
"domain": "demo",
|
||||||
|
"source_contract_id": "missing_contract",
|
||||||
|
"detectors_under_test": ["demo_detector"],
|
||||||
|
"acceptance": {"min_score": 80},
|
||||||
|
"scenarios": [
|
||||||
|
{
|
||||||
|
"scenario_id": "demo_scenario",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"step_id": "step_01",
|
||||||
|
"question": "Question?",
|
||||||
|
"expected_business_answer_contract": "missing_contract",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
_, failures, _ = health.check_domain_scenario_packs(
|
||||||
|
orchestration_dir,
|
||||||
|
{"known_contract"},
|
||||||
|
{"detectors": {"demo_detector": {}}},
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertTrue(
|
||||||
|
any(
|
||||||
|
failure.startswith("domain_scenario_pack_unknown_source_contract:")
|
||||||
|
and failure.endswith("demo_pack.json:missing_contract")
|
||||||
|
for failure in failures
|
||||||
|
),
|
||||||
|
failures,
|
||||||
|
)
|
||||||
|
self.assertTrue(
|
||||||
|
any(
|
||||||
|
failure.startswith("domain_scenario_pack_step_unknown_contract:")
|
||||||
|
and failure.endswith("demo_pack.json:demo_scenario:step_01:missing_contract")
|
||||||
|
for failure in failures
|
||||||
|
),
|
||||||
|
failures,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_domain_scenario_pack_blocks_wrong_domain_trap_without_forbidden_patterns(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
orchestration_dir = Path(tmp)
|
||||||
|
(orchestration_dir / "demo_pack.json").write_text(
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
"schema_version": "domain_scenario_pack_v1",
|
||||||
|
"pack_id": "demo_pack",
|
||||||
|
"domain": "demo",
|
||||||
|
"source_contract_id": "demo_contract",
|
||||||
|
"detectors_under_test": ["demo_detector"],
|
||||||
|
"acceptance": {"min_score": 80},
|
||||||
|
"scenarios": [
|
||||||
|
{
|
||||||
|
"scenario_id": "demo_scenario",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"step_id": "step_01",
|
||||||
|
"question": "Question?",
|
||||||
|
"expected_business_answer_contract": "demo_contract",
|
||||||
|
"semantic_tags": ["wrong_domain_trap"],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
_, failures, _ = health.check_domain_scenario_packs(
|
||||||
|
orchestration_dir,
|
||||||
|
{"demo_contract"},
|
||||||
|
{"detectors": {"demo_detector": {}}},
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertTrue(
|
||||||
|
any(
|
||||||
|
failure.startswith("domain_scenario_pack_wrong_domain_trap_missing_forbidden_patterns:")
|
||||||
|
and failure.endswith("demo_pack.json:demo_scenario:step_01")
|
||||||
|
for failure in failures
|
||||||
|
),
|
||||||
|
failures,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue