from __future__ import annotations from collections import defaultdict from dataclasses import dataclass from typing import Any from canonical_layer.store import CanonicalStore from config.settings import OneCSettings, load_settings SIGNAL_TO_DOMAIN = { "stale_refresh": "operational_freshness", "missing_refresh_baseline": "operational_freshness", "no_canonical_data": "operational_freshness", "high_link_degree": "suspicious_link_hub", "entity_count_drift": "structural_drift", "empty_display_share_high": "data_quality", } DOMAIN_PATTERN_KEY = { "operational_freshness": "operational_freshness_risk", "suspicious_link_hub": "suspicious_link_hub_risk", "structural_drift": "structural_drift_risk", "data_quality": "data_quality_risk", "miscellaneous": "miscellaneous_risk", } DOMAIN_WEIGHTS = { "operational_freshness": 0.35, "suspicious_link_hub": 0.25, "structural_drift": 0.25, "data_quality": 0.15, "miscellaneous": 0.20, } SEVERITY_BASE = { "low": 0.25, "medium": 0.50, "high": 0.80, "critical": 0.95, } @dataclass class RiskEngineResult: run_id: str status: str source_feature_run_id: str | None active_anomalies_scanned: int patterns_written: int global_score: float error_message: str | None = None def to_dict(self) -> dict[str, Any]: return { "run_id": self.run_id, "status": self.status, "source_feature_run_id": self.source_feature_run_id, "active_anomalies_scanned": self.active_anomalies_scanned, "patterns_written": self.patterns_written, "global_score": self.global_score, "error_message": self.error_message, } class RiskService: def __init__(self, *, settings: OneCSettings, store: CanonicalStore) -> None: self.settings = settings self.store = store self.store.ensure_created() @classmethod def build(cls) -> "RiskService": settings = load_settings() store = CanonicalStore(settings.canonical_db_url) return cls(settings=settings, store=store) def _severity_from_score(self, score: float) -> str: if score >= self.settings.risk_high_threshold: return "high" if score >= self.settings.risk_medium_threshold: return "medium" return "low" @staticmethod def _normalize_score(raw_score: float) -> float: safe = max(0.0, raw_score) if safe <= 1.0: return safe if safe >= 3.0: return 1.0 return safe / 3.0 def _signal_score(self, anomaly: dict[str, Any]) -> float: severity = str(anomaly.get("severity", "medium")).lower() base = SEVERITY_BASE.get(severity, 0.50) raw = self._normalize_score(float(anomaly.get("score", 0.0))) return min(1.0, (0.45 * base) + (0.55 * raw)) def _build_domain_pattern(self, domain: str, anomalies: list[dict[str, Any]]) -> dict[str, Any]: signal_scores = [self._signal_score(item) for item in anomalies] average_score = (sum(signal_scores) / len(signal_scores)) if signal_scores else 0.0 max_score = max(signal_scores) if signal_scores else 0.0 density_bonus = min(0.30, max(0, len(anomalies) - 1) * 0.03) domain_score = min(1.0, (0.60 * max_score) + (0.40 * average_score) + density_bonus) confidence = min(1.0, 0.55 + min(0.40, len(anomalies) * 0.05)) severity = self._severity_from_score(domain_score) by_signal_type: dict[str, int] = defaultdict(int) for item in anomalies: by_signal_type[str(item.get("signal_type", "unknown"))] += 1 top_examples = sorted( anomalies, key=lambda item: float(item.get("score", 0.0)), reverse=True, )[:5] examples_payload = [ { "signal_type": item.get("signal_type"), "severity": item.get("severity"), "scope": item.get("scope"), "scope_id": item.get("scope_id"), "score": item.get("score"), "details": item.get("details", {}), } for item in top_examples ] return { "pattern_key": DOMAIN_PATTERN_KEY.get(domain, "miscellaneous_risk"), "severity": severity, "scope": "domain", "scope_id": domain, "score": round(domain_score, 6), "confidence": round(confidence, 6), "details": { "anomalies_count": len(anomalies), "signal_types": dict(sorted(by_signal_type.items(), key=lambda item: item[0])), "top_examples": examples_payload, }, } def _compute_global_score(self, domain_patterns: list[dict[str, Any]]) -> float: if not domain_patterns: return 0.05 weighted_sum = 0.0 weight_total = 0.0 for item in domain_patterns: domain = str(item.get("scope_id", "miscellaneous")) weight = DOMAIN_WEIGHTS.get(domain, DOMAIN_WEIGHTS["miscellaneous"]) score = float(item.get("score", 0.0)) weighted_sum += weight * score weight_total += weight if weight_total <= 0.0: return 0.05 return min(1.0, weighted_sum / weight_total) def run_risk_engine( self, *, source_feature_run_id: str | None = None, anomaly_limit: int | None = None, ) -> RiskEngineResult: selected_feature_run_id = source_feature_run_id if not selected_feature_run_id: latest = self.store.latest_successful_feature_run() if latest is not None: selected_feature_run_id = str(latest.get("run_id")) run_id = self.store.start_risk_run(source_feature_run_id=selected_feature_run_id) safe_limit = anomaly_limit or self.settings.risk_anomaly_scan_limit try: if selected_feature_run_id: anomalies = self.store.list_anomaly_signals( limit=safe_limit, active_only=False, run_id=selected_feature_run_id, ) else: anomalies = [] grouped: dict[str, list[dict[str, Any]]] = defaultdict(list) for anomaly in anomalies: signal_type = str(anomaly.get("signal_type", "unknown")) domain = SIGNAL_TO_DOMAIN.get(signal_type, "miscellaneous") grouped[domain].append(anomaly) domain_patterns: list[dict[str, Any]] = [] for domain, domain_anomalies in grouped.items(): if not domain_anomalies: continue domain_patterns.append(self._build_domain_pattern(domain, domain_anomalies)) if not selected_feature_run_id: domain_patterns.append( { "pattern_key": DOMAIN_PATTERN_KEY["operational_freshness"], "severity": "high", "scope": "domain", "scope_id": "operational_freshness", "score": 0.90, "confidence": 0.95, "details": { "anomalies_count": 0, "signal_types": {}, "top_examples": [], "reason": "No successful feature run found", }, } ) global_score = self._compute_global_score(domain_patterns) global_severity = self._severity_from_score(global_score) global_pattern = { "pattern_key": "global_risk_summary", "severity": global_severity, "scope": "global", "scope_id": "", "score": round(global_score, 6), "confidence": round(min(1.0, 0.60 + 0.05 * len(domain_patterns)), 6), "details": { "source_feature_run_id": selected_feature_run_id, "domain_scores": [ { "domain": item.get("scope_id"), "score": item.get("score"), "severity": item.get("severity"), } for item in sorted( domain_patterns, key=lambda item: float(item.get("score", 0.0)), reverse=True, ) ], "anomalies_scanned": len(anomalies), }, } all_patterns = [global_pattern] + domain_patterns patterns_written = self.store.replace_risk_patterns(run_id=run_id, patterns=all_patterns) self.store.finish_risk_run( run_id=run_id, status="success", patterns_written=patterns_written, global_score=global_score, details={ "source_feature_run_id": selected_feature_run_id, "anomalies_scanned": len(anomalies), "domains_total": len(domain_patterns), }, ) return RiskEngineResult( run_id=run_id, status="success", source_feature_run_id=selected_feature_run_id, active_anomalies_scanned=len(anomalies), patterns_written=patterns_written, global_score=round(global_score, 6), ) except Exception as exc: self.store.finish_risk_run( run_id=run_id, status="failed", patterns_written=0, global_score=0.0, details={}, error_message=str(exc), ) return RiskEngineResult( run_id=run_id, status="failed", source_feature_run_id=selected_feature_run_id, active_anomalies_scanned=0, patterns_written=0, global_score=0.0, error_message=str(exc), ) def list_recent_runs(self, limit: int = 20) -> list[dict[str, Any]]: return self.store.list_recent_risk_runs(limit=limit) def list_patterns( self, *, limit: int = 200, severity: str | None = None, active_only: bool = True, run_id: str | None = None, pattern_key: str | None = None, scope: str | None = None, ) -> list[dict[str, Any]]: return self.store.list_risk_patterns( limit=limit, severity=severity, active_only=active_only, run_id=run_id, pattern_key=pattern_key, scope=scope, ) def stats(self) -> dict[str, Any]: return self.store.risk_store_stats()