From 34e854f03070a5c2b487115637526ca564023ad5 Mon Sep 17 00:00:00 2001
From: dctouch <support@dctouch.ru>
Date: Sun, 24 May 2026 14:17:16 +0300
Subject: [PATCH] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=B8=D1=82?=
 =?UTF-8?q?=D1=8C=20=D1=80=D0=B5=D0=B5=D1=81=D1=82=D1=80=20=D0=B4=D0=B5?=
 =?UTF-8?q?=D1=82=D0=B5=D0=BA=D1=82=D0=BE=D1=80=D0=BE=D0=B2=20=D0=BD=D0=B0?=
 =?UTF-8?q?=D0=B4=D0=B5=D0=B6=D0=BD=D0=BE=D1=81=D1=82=D0=B8=20=D0=B0=D0=B3?=
 =?UTF-8?q?=D0=B5=D0=BD=D1=82=D0=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/orchestration/detector_registry.json     | 204 ++++++++++++++++++
 .../agent_detector_registry.schema.json       |  59 +++++
 .../agent_reliability_contract_healthcheck.py | 124 ++++++++++-
 ..._agent_reliability_contract_healthcheck.py |  86 ++++++++
 4 files changed, 471 insertions(+), 2 deletions(-)
 create mode 100644 docs/orchestration/detector_registry.json
 create mode 100644 docs/orchestration/schemas/agent_detector_registry.schema.json

diff --git a/docs/orchestration/detector_registry.json b/docs/orchestration/detector_registry.json
new file mode 100644
index 0000000..313435d
--- /dev/null
+++ b/docs/orchestration/detector_registry.json
@@ -0,0 +1,204 @@
+{
+  "schema_version": "agent_detector_registry_v1",
+  "updated_at": "2026-05-24",
+  "purpose": "Machine-readable registry for detector names referenced by issue_catalog and business answer contracts. This keeps manual semantic findings on the path to repeatable replay/eval checks.",
+  "detectors": {
+    "missing_effective_runtime_json": {
+      "kind": "artifact_presence",
+      "automation_level": "automatic",
+      "description": "Run artifact directory must contain effective_runtime.json before a replay can be accepted or saved as evidence.",
+      "issue_codes": ["runtime_manifest_missing"],
+      "inputs": ["artifacts/domain_runs/<run_id>/effective_runtime.json"],
+      "check": {
+        "required_files": ["effective_runtime.json"]
+      }
+    },
+    "default_prompt_version_missing_files": {
+      "kind": "prompt_registry_healthcheck",
+      "automation_level": "automatic",
+      "description": "DEFAULT_PROMPT_VERSION points to prompt files that are missing from the prompt registry.",
+      "issue_codes": ["prompt_registry_opaque"],
+      "inputs": ["llm_normalizer/backend/src/services/promptBuilder.ts", "llm_normalizer/data/presets/*.json"],
+      "check": {
+        "command": "python scripts/prompt_registry_healthcheck.py"
+      }
+    },
+    "silent_prompt_fallback": {
+      "kind": "prompt_registry_healthcheck",
+      "automation_level": "automatic",
+      "description": "Runtime used a fallback prompt without explicit source/hash metadata in artifacts.",
+      "issue_codes": ["prompt_registry_opaque"],
+      "inputs": ["effective_runtime.json", "prompt registry"],
+      "check": {
+        "manifest_fields": ["prompt_source", "prompt_hash"],
+        "forbidden_prompt_sources": ["fallback", "unknown"]
+      }
+    },
+    "preset_version_mismatch": {
+      "kind": "prompt_registry_healthcheck",
+      "automation_level": "automatic",
+      "description": "Active preset prompt version does not match the runtime prompt version used by the replay.",
+      "issue_codes": ["prompt_registry_opaque"],
+      "inputs": ["shared_llm_connection.json", "effective_runtime.json", "prompt presets"],
+      "check": {
+        "compare_fields": ["prompt_version"]
+      }
+    },
+    "forbidden_margin_terms": {
+      "kind": "answer_text_regex_forbidden",
+      "automation_level": "automatic",
+      "description": "Margin answer contains wrong-domain terms such as fixed assets, amortization, bank, payment, or settlement vocabulary.",
+      "issue_codes": ["margin_domain_leak_accounting_route"],
+      "inputs": ["steps/<step_id>/output.md", "steps/<step_id>/turn.json"],
+      "check": {
+        "forbidden_patterns": [
+          "(?i)(амортизац|объект\\s+ОС|основн(ые|ых)?\\s+средств|payment_document|settlement|банк|оплат[аы])"
+        ]
+      }
+    },
+    "missing_revenue_cogs_margin_fields": {
+      "kind": "answer_text_required_any",
+      "automation_level": "semi_automatic",
+      "description": "Margin answer does not mention revenue, COGS/cost, gross profit, margin, or an honest limitation around those fields.",
+      "issue_codes": ["margin_domain_leak_accounting_route"],
+      "inputs": ["steps/<step_id>/output.md"],
+      "check": {
+        "required_patterns_any": [
+          "(?i)(выруч|себестоим|валов|марж|не хватает|не могу подтвердить|не подтвержден|unknown)"
+        ]
+      }
+    },
+    "wrong_capability_family": {
+      "kind": "trace_value_guard",
+      "automation_level": "semi_automatic",
+      "description": "Trace/capability family for a margin question points to another accounting family instead of margin/profitability/inventory evidence.",
+      "issue_codes": ["margin_domain_leak_accounting_route"],
+      "inputs": ["steps/<step_id>/turn.json"],
+      "check": {
+        "forbidden_trace_markers": ["fixed_asset", "amortization", "payment_document", "settlement"]
+      }
+    },
+    "margin_domain_leak_accounting_route": {
+      "kind": "composite_detector",
+      "automation_level": "semi_automatic",
+      "description": "Composite margin-domain detector used by margin_profitability_v1 contract to group wrong-domain route leaks.",
+      "issue_codes": ["margin_domain_leak_accounting_route"],
+      "inputs": ["steps/<step_id>/output.md", "steps/<step_id>/turn.json"],
+      "check": {
+        "uses_detectors": ["forbidden_margin_terms", "wrong_capability_family"]
+      }
+    },
+    "margin_required_fields_missing": {
+      "kind": "contract_field_detector",
+      "automation_level": "semi_automatic",
+      "description": "Margin answer misses required revenue/COGS/gross profit/margin fields or honest unknowns.",
+      "issue_codes": ["margin_domain_leak_accounting_route"],
+      "inputs": ["steps/<step_id>/output.md", "docs/orchestration/contracts/margin_profitability_v1.json"],
+      "check": {
+        "uses_detectors": ["missing_revenue_cogs_margin_fields"]
+      }
+    },
+    "margin_next_action_missing": {
+      "kind": "limited_answer_next_action",
+      "automation_level": "semi_automatic",
+      "description": "Limited margin answer does not propose the next verifiable action.",
+      "issue_codes": ["business_next_step_missing", "margin_domain_leak_accounting_route"],
+      "inputs": ["steps/<step_id>/output.md"],
+      "check": {
+        "uses_detectors": ["limited_answer_without_next_action"]
+      }
+    },
+    "margin_payment_document_false_source": {
+      "kind": "answer_text_regex_forbidden",
+      "automation_level": "automatic",
+      "description": "Margin answer treats payment/bank documents as the source for margin calculation.",
+      "issue_codes": ["margin_domain_leak_accounting_route"],
+      "inputs": ["steps/<step_id>/output.md"],
+      "check": {
+        "forbidden_patterns": ["(?i)(payment_document|банковск|плат[её]ж|оплат[аы]).{0,80}(марж|себестоим|валов)"]
+      }
+    },
+    "margin_os_amortization_leak": {
+      "kind": "answer_text_regex_forbidden",
+      "automation_level": "automatic",
+      "description": "Margin answer leaks fixed-assets or amortization language.",
+      "issue_codes": ["margin_domain_leak_accounting_route"],
+      "inputs": ["steps/<step_id>/output.md"],
+      "check": {
+        "forbidden_patterns": ["(?i)(амортизац|объект\\s+ОС|основн(ые|ых)?\\s+средств)"]
+      }
+    },
+    "first_line_not_direct_answer": {
+      "kind": "answer_text_shape",
+      "automation_level": "semi_automatic",
+      "description": "The first meaningful line is not a direct business answer for a direct user question.",
+      "issue_codes": ["business_direct_answer_missing"],
+      "inputs": ["steps/<step_id>/output.md"],
+      "check": {
+        "first_line_should_be": "business_answer_or_honest_boundary"
+      }
+    },
+    "top_level_scaffold_before_answer": {
+      "kind": "answer_text_regex_forbidden_in_prefix",
+      "automation_level": "automatic",
+      "description": "Answer starts with scaffold/service narration before the user-facing business conclusion.",
+      "issue_codes": ["business_direct_answer_missing"],
+      "inputs": ["steps/<step_id>/output.md"],
+      "check": {
+        "prefix_line_count": 3,
+        "forbidden_patterns": ["(?i)(для ответа|сначала|я проверю|я посмотрю|route|debug|capability)"]
+      }
+    },
+    "runtime_tokens_in_user_answer": {
+      "kind": "answer_text_regex_forbidden",
+      "automation_level": "automatic",
+      "description": "Final user-facing answer contains runtime/debug/service tokens.",
+      "issue_codes": ["technical_garbage_in_answer"],
+      "inputs": ["steps/<step_id>/output.md"],
+      "check": {
+        "forbidden_patterns": ["(?i)(route_id|capability_id|runtime_|snapshot_items|debug|answer_object|selected_object)"]
+      }
+    },
+    "capability_ids_in_user_answer": {
+      "kind": "answer_text_regex_forbidden",
+      "automation_level": "automatic",
+      "description": "Final user-facing answer contains capability ids or route ids.",
+      "issue_codes": ["technical_garbage_in_answer"],
+      "inputs": ["steps/<step_id>/output.md"],
+      "check": {
+        "forbidden_patterns": ["(?i)(capability[_ -]?id|route[_ -]?id|address\\.[a-z0-9_\\.]+)"]
+      }
+    },
+    "required_contract_fields_missing": {
+      "kind": "contract_field_detector",
+      "automation_level": "semi_automatic",
+      "description": "Answer does not satisfy required fields from the expected business answer contract.",
+      "issue_codes": ["accounting_contract_missing"],
+      "inputs": ["steps/<step_id>/output.md", "docs/orchestration/contracts/<contract_id>.json"],
+      "check": {
+        "contract_field": "answer_surface.required_fields"
+      }
+    },
+    "limited_answer_without_next_action": {
+      "kind": "answer_text_required_when_limited",
+      "automation_level": "semi_automatic",
+      "description": "Answer states a limitation but gives no concrete next action for recovering or narrowing the answer.",
+      "issue_codes": ["business_next_step_missing"],
+      "inputs": ["steps/<step_id>/output.md"],
+      "check": {
+        "limited_patterns": ["(?i)(не могу|не хватает|не подтвержден|нет данных|недостаточно)"],
+        "required_next_action_patterns_any": ["(?i)(можно|следующ|уточн|перезапусти|проверь|выбери|нужен|добавь)"]
+      }
+    },
+    "route_candidate_needs_enablement": {
+      "kind": "stage_review_signal",
+      "automation_level": "semi_automatic",
+      "description": "Stage review produced a route candidate that still needs runtime capability enablement.",
+      "issue_codes": ["route_candidate_enablement_gap"],
+      "inputs": ["run_review.json", "repair_targets.json"],
+      "check": {
+        "target_status": "needs_route_enablement"
+      }
+    }
+  }
+}
diff --git a/docs/orchestration/schemas/agent_detector_registry.schema.json b/docs/orchestration/schemas/agent_detector_registry.schema.json
new file mode 100644
index 0000000..d13f129
--- /dev/null
+++ b/docs/orchestration/schemas/agent_detector_registry.schema.json
@@ -0,0 +1,59 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "title": "Agent Detector Registry",
+  "type": "object",
+  "additionalProperties": true,
+  "required": ["schema_version", "detectors"],
+  "properties": {
+    "schema_version": {
+      "const": "agent_detector_registry_v1"
+    },
+    "updated_at": {
+      "type": "string"
+    },
+    "purpose": {
+      "type": "string"
+    },
+    "detectors": {
+      "type": "object",
+      "minProperties": 1,
+      "additionalProperties": {
+        "type": "object",
+        "additionalProperties": true,
+        "required": ["kind", "automation_level", "description", "issue_codes", "inputs", "check"],
+        "properties": {
+          "kind": {
+            "type": "string",
+            "minLength": 1
+          },
+          "automation_level": {
+            "type": "string",
+            "enum": ["automatic", "semi_automatic", "manual_review"]
+          },
+          "description": {
+            "type": "string",
+            "minLength": 1
+          },
+          "issue_codes": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "minItems": 1
+          },
+          "inputs": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "minItems": 1
+          },
+          "check": {
+            "type": "object",
+            "additionalProperties": true
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/scripts/agent_reliability_contract_healthcheck.py b/scripts/agent_reliability_contract_healthcheck.py
index 28b73b1..d432622 100644
--- a/scripts/agent_reliability_contract_healthcheck.py
+++ b/scripts/agent_reliability_contract_healthcheck.py
@@ -11,8 +11,11 @@ from typing import Any
 REPO_ROOT = Path(__file__).resolve().parent.parent
 SCHEMA_DIR = REPO_ROOT / "docs" / "orchestration" / "schemas"
 ISSUE_CATALOG_PATH = REPO_ROOT / "docs" / "orchestration" / "issue_catalog.json"
+DETECTOR_REGISTRY_PATH = REPO_ROOT / "docs" / "orchestration" / "detector_registry.json"
+CONTRACTS_DIR = REPO_ROOT / "docs" / "orchestration" / "contracts"
 EXPECTED_SCHEMA_FILES = {
     "agent_issue_catalog.schema.json": "Agent Issue Catalog",
+    "agent_detector_registry.schema.json": "Agent Detector Registry",
     "auto_coder_gate.schema.json": "Auto-Coder Gate",
     "business_audit_contract.schema.json": "Business Audit Contract",
     "domain_loop_lead_coder_handoff.schema.json": "Domain Loop Lead Coder Handoff",
@@ -68,6 +71,43 @@ def has_answer_contract(issue: dict[str, Any]) -> bool:
     return bool(normalize_string_list(acceptance.get("must_have")) or normalize_string_list(acceptance.get("must_not_have")))
 
 
+def read_json_object_or_empty(path: Path) -> dict[str, Any]:
+    try:
+        payload = read_json(path)
+    except (json.JSONDecodeError, OSError):
+        return {}
+    return payload if isinstance(payload, dict) else {}
+
+
+def collect_issue_detector_links(issue_catalog: dict[str, Any]) -> dict[str, set[str]]:
+    issues = issue_catalog.get("issues") if isinstance(issue_catalog.get("issues"), dict) else {}
+    links: dict[str, set[str]] = {}
+    for issue_code, issue in issues.items():
+        if not isinstance(issue, dict):
+            continue
+        for detector in normalize_string_list(issue.get("detectors")):
+            links.setdefault(detector, set()).add(str(issue_code))
+    return links
+
+
+def collect_contract_detector_refs(contracts_dir: Path) -> tuple[dict[str, list[str]], list[str]]:
+    refs: dict[str, list[str]] = {}
+    warnings: list[str] = []
+    if not contracts_dir.exists():
+        return refs, warnings
+    for path in sorted(contracts_dir.glob("*.json")):
+        try:
+            payload = read_json(path)
+        except json.JSONDecodeError as error:
+            warnings.append(f"contract_detector_scan_invalid_json:{display_path(path)}:{error.msg}")
+            continue
+        if not isinstance(payload, dict):
+            continue
+        for detector in normalize_string_list(payload.get("detectors")):
+            refs.setdefault(detector, []).append(display_path(path))
+    return refs, warnings
+
+
 def is_broad_patch_target(value: str) -> bool:
     normalized = value.strip().replace("\\", "/").lower()
     broad_targets = {
@@ -192,17 +232,97 @@ def check_issue_catalog(path: Path) -> tuple[dict[str, Any], list[str], list[str
     return summary, failures, warnings
 
 
+def check_detector_registry(
+    path: Path,
+    issue_catalog: dict[str, Any] | None = None,
+    *,
+    include_contracts: bool = True,
+) -> tuple[dict[str, Any], list[str], list[str]]:
+    failures: list[str] = []
+    warnings: list[str] = []
+    if not path.exists():
+        return {"path": display_path(path), "exists": False}, ["missing_detector_registry"], warnings
+    try:
+        payload = read_json(path)
+    except json.JSONDecodeError as error:
+        return {"path": display_path(path), "exists": True}, [f"invalid_detector_registry_json:{error.msg}"], warnings
+
+    detectors = payload.get("detectors") if isinstance(payload.get("detectors"), dict) else {}
+    catalog = issue_catalog if isinstance(issue_catalog, dict) else {}
+    issues = catalog.get("issues") if isinstance(catalog.get("issues"), dict) else {}
+    known_issue_codes = set(str(issue_code) for issue_code in issues)
+    detector_links = collect_issue_detector_links(catalog)
+    contract_refs, contract_warnings = collect_contract_detector_refs(CONTRACTS_DIR) if include_contracts else ({}, [])
+    warnings.extend(contract_warnings)
+    summary = {
+        "path": display_path(path),
+        "exists": True,
+        "schema_version": payload.get("schema_version"),
+        "detector_count": len(detectors),
+        "catalog_referenced_detector_count": len(detector_links),
+        "contract_referenced_detector_count": len(contract_refs),
+    }
+    if payload.get("schema_version") != "agent_detector_registry_v1":
+        failures.append("detector_registry_schema_version_mismatch")
+    if not detectors:
+        failures.append("detector_registry_empty")
+
+    for detector_name, issue_codes in sorted(detector_links.items()):
+        if detector_name not in detectors:
+            for issue_code in sorted(issue_codes):
+                failures.append(f"detector_registry_missing_catalog_detector:{issue_code}:{detector_name}")
+
+    for detector_name, paths in sorted(contract_refs.items()):
+        if detector_name not in detectors:
+            for contract_path in paths:
+                failures.append(f"detector_registry_missing_contract_detector:{contract_path}:{detector_name}")
+
+    for detector_name, detector in sorted(detectors.items()):
+        if not isinstance(detector, dict):
+            failures.append(f"detector_registry_detector_not_object:{detector_name}")
+            continue
+        for field_name in ("kind", "automation_level", "description"):
+            if not str(detector.get(field_name) or "").strip():
+                failures.append(f"detector_registry_missing_field:{detector_name}:{field_name}")
+        issue_codes = normalize_string_list(detector.get("issue_codes"))
+        inputs = normalize_string_list(detector.get("inputs"))
+        check = detector.get("check")
+        if not issue_codes:
+            failures.append(f"detector_registry_empty_issue_codes:{detector_name}")
+        if not inputs:
+            failures.append(f"detector_registry_empty_inputs:{detector_name}")
+        if not isinstance(check, dict) or not check:
+            failures.append(f"detector_registry_empty_check:{detector_name}")
+        if known_issue_codes:
+            for issue_code in issue_codes:
+                if issue_code not in known_issue_codes:
+                    failures.append(f"detector_registry_unknown_issue_code:{detector_name}:{issue_code}")
+        for issue_code in sorted(detector_links.get(detector_name, set())):
+            if issue_code not in issue_codes:
+                failures.append(f"detector_registry_missing_issue_link:{detector_name}:{issue_code}")
+
+    if not isinstance(catalog, dict) or not issues:
+        warnings.append("detector_registry_issue_catalog_unavailable")
+    return summary, failures, warnings
+
+
 def build_healthcheck() -> dict[str, Any]:
     schema_files, schema_failures = check_schema_files(SCHEMA_DIR)
     issue_catalog, catalog_failures, catalog_warnings = check_issue_catalog(ISSUE_CATALOG_PATH)
-    failures = schema_failures + catalog_failures
-    warnings = catalog_warnings
+    issue_catalog_payload = read_json_object_or_empty(ISSUE_CATALOG_PATH)
+    detector_registry, detector_failures, detector_warnings = check_detector_registry(
+        DETECTOR_REGISTRY_PATH,
+        issue_catalog_payload,
+    )
+    failures = schema_failures + catalog_failures + detector_failures
+    warnings = catalog_warnings + detector_warnings
     return {
         "schema_version": "agent_reliability_contract_health_v1",
         "status": "pass" if not failures else "fail",
         "checked_at": datetime.now(timezone.utc).replace(microsecond=0).isoformat(),
         "schema_files": schema_files,
         "issue_catalog": issue_catalog,
+        "detector_registry": detector_registry,
         "failures": failures,
         "warnings": warnings,
     }
diff --git a/scripts/test_agent_reliability_contract_healthcheck.py b/scripts/test_agent_reliability_contract_healthcheck.py
index 73dc212..4b791ca 100644
--- a/scripts/test_agent_reliability_contract_healthcheck.py
+++ b/scripts/test_agent_reliability_contract_healthcheck.py
@@ -130,6 +130,92 @@ class AgentReliabilityContractHealthcheckTests(unittest.TestCase):
             failures,
         )
 
+    def test_detector_registry_blocks_missing_catalog_detector(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp:
+            registry_path = Path(tmp) / "detector_registry.json"
+            registry_path.write_text(
+                json.dumps(
+                    {
+                        "schema_version": "agent_detector_registry_v1",
+                        "detectors": {
+                            "known_detector": {
+                                "kind": "answer_text_shape",
+                                "automation_level": "semi_automatic",
+                                "description": "Known detector.",
+                                "issue_codes": ["business_direct_answer_missing"],
+                                "inputs": ["output.md"],
+                                "check": {"first_line_should_be": "business_answer"},
+                            }
+                        },
+                    }
+                ),
+                encoding="utf-8",
+            )
+            issue_catalog = {
+                "schema_version": "agent_issue_catalog_v1",
+                "issues": {
+                    "business_direct_answer_missing": {
+                        "detectors": ["missing_detector"],
+                    }
+                },
+            }
+
+            _, failures, _ = health.check_detector_registry(
+                registry_path,
+                issue_catalog,
+                include_contracts=False,
+            )
+
+        self.assertIn(
+            "detector_registry_missing_catalog_detector:business_direct_answer_missing:missing_detector",
+            failures,
+        )
+
+    def test_detector_registry_blocks_unknown_issue_link(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp:
+            registry_path = Path(tmp) / "detector_registry.json"
+            registry_path.write_text(
+                json.dumps(
+                    {
+                        "schema_version": "agent_detector_registry_v1",
+                        "detectors": {
+                            "first_line_not_direct_answer": {
+                                "kind": "answer_text_shape",
+                                "automation_level": "semi_automatic",
+                                "description": "Direct answer detector.",
+                                "issue_codes": ["unknown_issue_code"],
+                                "inputs": ["output.md"],
+                                "check": {"first_line_should_be": "business_answer"},
+                            }
+                        },
+                    }
+                ),
+                encoding="utf-8",
+            )
+            issue_catalog = {
+                "schema_version": "agent_issue_catalog_v1",
+                "issues": {
+                    "business_direct_answer_missing": {
+                        "detectors": ["first_line_not_direct_answer"],
+                    }
+                },
+            }
+
+            _, failures, _ = health.check_detector_registry(
+                registry_path,
+                issue_catalog,
+                include_contracts=False,
+            )
+
+        self.assertIn(
+            "detector_registry_unknown_issue_code:first_line_not_direct_answer:unknown_issue_code",
+            failures,
+        )
+        self.assertIn(
+            "detector_registry_missing_issue_link:first_line_not_direct_answer:business_direct_answer_missing",
+            failures,
+        )
+
 
 if __name__ == "__main__":
     unittest.main()