NODEDC_1C/canonical_layer/risk.py

308 lines
11 KiB
Python

from __future__ import annotations
from collections import defaultdict
from dataclasses import dataclass
from typing import Any
from canonical_layer.store import CanonicalStore
from config.settings import OneCSettings, load_settings
SIGNAL_TO_DOMAIN = {
"stale_refresh": "operational_freshness",
"missing_refresh_baseline": "operational_freshness",
"no_canonical_data": "operational_freshness",
"high_link_degree": "suspicious_link_hub",
"entity_count_drift": "structural_drift",
"empty_display_share_high": "data_quality",
}
DOMAIN_PATTERN_KEY = {
"operational_freshness": "operational_freshness_risk",
"suspicious_link_hub": "suspicious_link_hub_risk",
"structural_drift": "structural_drift_risk",
"data_quality": "data_quality_risk",
"miscellaneous": "miscellaneous_risk",
}
DOMAIN_WEIGHTS = {
"operational_freshness": 0.35,
"suspicious_link_hub": 0.25,
"structural_drift": 0.25,
"data_quality": 0.15,
"miscellaneous": 0.20,
}
SEVERITY_BASE = {
"low": 0.25,
"medium": 0.50,
"high": 0.80,
"critical": 0.95,
}
@dataclass
class RiskEngineResult:
run_id: str
status: str
source_feature_run_id: str | None
active_anomalies_scanned: int
patterns_written: int
global_score: float
error_message: str | None = None
def to_dict(self) -> dict[str, Any]:
return {
"run_id": self.run_id,
"status": self.status,
"source_feature_run_id": self.source_feature_run_id,
"active_anomalies_scanned": self.active_anomalies_scanned,
"patterns_written": self.patterns_written,
"global_score": self.global_score,
"error_message": self.error_message,
}
class RiskService:
def __init__(self, *, settings: OneCSettings, store: CanonicalStore) -> None:
self.settings = settings
self.store = store
self.store.ensure_created()
@classmethod
def build(cls) -> "RiskService":
settings = load_settings()
store = CanonicalStore(settings.canonical_db_url)
return cls(settings=settings, store=store)
def _severity_from_score(self, score: float) -> str:
if score >= self.settings.risk_high_threshold:
return "high"
if score >= self.settings.risk_medium_threshold:
return "medium"
return "low"
@staticmethod
def _normalize_score(raw_score: float) -> float:
safe = max(0.0, raw_score)
if safe <= 1.0:
return safe
if safe >= 3.0:
return 1.0
return safe / 3.0
def _signal_score(self, anomaly: dict[str, Any]) -> float:
severity = str(anomaly.get("severity", "medium")).lower()
base = SEVERITY_BASE.get(severity, 0.50)
raw = self._normalize_score(float(anomaly.get("score", 0.0)))
return min(1.0, (0.45 * base) + (0.55 * raw))
def _build_domain_pattern(self, domain: str, anomalies: list[dict[str, Any]]) -> dict[str, Any]:
signal_scores = [self._signal_score(item) for item in anomalies]
average_score = (sum(signal_scores) / len(signal_scores)) if signal_scores else 0.0
max_score = max(signal_scores) if signal_scores else 0.0
density_bonus = min(0.30, max(0, len(anomalies) - 1) * 0.03)
domain_score = min(1.0, (0.60 * max_score) + (0.40 * average_score) + density_bonus)
confidence = min(1.0, 0.55 + min(0.40, len(anomalies) * 0.05))
severity = self._severity_from_score(domain_score)
by_signal_type: dict[str, int] = defaultdict(int)
for item in anomalies:
by_signal_type[str(item.get("signal_type", "unknown"))] += 1
top_examples = sorted(
anomalies,
key=lambda item: float(item.get("score", 0.0)),
reverse=True,
)[:5]
examples_payload = [
{
"signal_type": item.get("signal_type"),
"severity": item.get("severity"),
"scope": item.get("scope"),
"scope_id": item.get("scope_id"),
"score": item.get("score"),
"details": item.get("details", {}),
}
for item in top_examples
]
return {
"pattern_key": DOMAIN_PATTERN_KEY.get(domain, "miscellaneous_risk"),
"severity": severity,
"scope": "domain",
"scope_id": domain,
"score": round(domain_score, 6),
"confidence": round(confidence, 6),
"details": {
"anomalies_count": len(anomalies),
"signal_types": dict(sorted(by_signal_type.items(), key=lambda item: item[0])),
"top_examples": examples_payload,
},
}
def _compute_global_score(self, domain_patterns: list[dict[str, Any]]) -> float:
if not domain_patterns:
return 0.05
weighted_sum = 0.0
weight_total = 0.0
for item in domain_patterns:
domain = str(item.get("scope_id", "miscellaneous"))
weight = DOMAIN_WEIGHTS.get(domain, DOMAIN_WEIGHTS["miscellaneous"])
score = float(item.get("score", 0.0))
weighted_sum += weight * score
weight_total += weight
if weight_total <= 0.0:
return 0.05
return min(1.0, weighted_sum / weight_total)
def run_risk_engine(
self,
*,
source_feature_run_id: str | None = None,
anomaly_limit: int | None = None,
) -> RiskEngineResult:
selected_feature_run_id = source_feature_run_id
if not selected_feature_run_id:
latest = self.store.latest_successful_feature_run()
if latest is not None:
selected_feature_run_id = str(latest.get("run_id"))
run_id = self.store.start_risk_run(source_feature_run_id=selected_feature_run_id)
safe_limit = anomaly_limit or self.settings.risk_anomaly_scan_limit
try:
if selected_feature_run_id:
anomalies = self.store.list_anomaly_signals(
limit=safe_limit,
active_only=False,
run_id=selected_feature_run_id,
)
else:
anomalies = []
grouped: dict[str, list[dict[str, Any]]] = defaultdict(list)
for anomaly in anomalies:
signal_type = str(anomaly.get("signal_type", "unknown"))
domain = SIGNAL_TO_DOMAIN.get(signal_type, "miscellaneous")
grouped[domain].append(anomaly)
domain_patterns: list[dict[str, Any]] = []
for domain, domain_anomalies in grouped.items():
if not domain_anomalies:
continue
domain_patterns.append(self._build_domain_pattern(domain, domain_anomalies))
if not selected_feature_run_id:
domain_patterns.append(
{
"pattern_key": DOMAIN_PATTERN_KEY["operational_freshness"],
"severity": "high",
"scope": "domain",
"scope_id": "operational_freshness",
"score": 0.90,
"confidence": 0.95,
"details": {
"anomalies_count": 0,
"signal_types": {},
"top_examples": [],
"reason": "No successful feature run found",
},
}
)
global_score = self._compute_global_score(domain_patterns)
global_severity = self._severity_from_score(global_score)
global_pattern = {
"pattern_key": "global_risk_summary",
"severity": global_severity,
"scope": "global",
"scope_id": "",
"score": round(global_score, 6),
"confidence": round(min(1.0, 0.60 + 0.05 * len(domain_patterns)), 6),
"details": {
"source_feature_run_id": selected_feature_run_id,
"domain_scores": [
{
"domain": item.get("scope_id"),
"score": item.get("score"),
"severity": item.get("severity"),
}
for item in sorted(
domain_patterns,
key=lambda item: float(item.get("score", 0.0)),
reverse=True,
)
],
"anomalies_scanned": len(anomalies),
},
}
all_patterns = [global_pattern] + domain_patterns
patterns_written = self.store.replace_risk_patterns(run_id=run_id, patterns=all_patterns)
self.store.finish_risk_run(
run_id=run_id,
status="success",
patterns_written=patterns_written,
global_score=global_score,
details={
"source_feature_run_id": selected_feature_run_id,
"anomalies_scanned": len(anomalies),
"domains_total": len(domain_patterns),
},
)
return RiskEngineResult(
run_id=run_id,
status="success",
source_feature_run_id=selected_feature_run_id,
active_anomalies_scanned=len(anomalies),
patterns_written=patterns_written,
global_score=round(global_score, 6),
)
except Exception as exc:
self.store.finish_risk_run(
run_id=run_id,
status="failed",
patterns_written=0,
global_score=0.0,
details={},
error_message=str(exc),
)
return RiskEngineResult(
run_id=run_id,
status="failed",
source_feature_run_id=selected_feature_run_id,
active_anomalies_scanned=0,
patterns_written=0,
global_score=0.0,
error_message=str(exc),
)
def list_recent_runs(self, limit: int = 20) -> list[dict[str, Any]]:
return self.store.list_recent_risk_runs(limit=limit)
def list_patterns(
self,
*,
limit: int = 200,
severity: str | None = None,
active_only: bool = True,
run_id: str | None = None,
pattern_key: str | None = None,
scope: str | None = None,
) -> list[dict[str, Any]]:
return self.store.list_risk_patterns(
limit=limit,
severity=severity,
active_only=active_only,
run_id=run_id,
pattern_key=pattern_key,
scope=scope,
)
def stats(self) -> dict[str, Any]:
return self.store.risk_store_stats()