113 lines
3.5 KiB
Python
113 lines
3.5 KiB
Python
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
from canonical_layer.features import FeatureService
|
|
from canonical_layer.models import CanonicalEntity, EntityLink
|
|
from canonical_layer.store import CanonicalStore
|
|
from config.settings import OneCSettings
|
|
|
|
|
|
def _build_settings(db_url: str) -> OneCSettings:
|
|
return OneCSettings(
|
|
base_url="http://localhost",
|
|
infobase="buh_test",
|
|
username="",
|
|
password="",
|
|
odata_path="/odata/standard.odata/",
|
|
timeout=30,
|
|
verify_tls=False,
|
|
probe_top=5,
|
|
probe_entity_sets=(),
|
|
canonical_db_url=db_url,
|
|
refresh_default_limit_per_set=50,
|
|
refresh_default_entity_keywords=("document", "posting"),
|
|
feature_default_baseline_window_hours=24,
|
|
anomaly_stale_refresh_threshold_hours=6,
|
|
feature_entity_scan_limit=200000,
|
|
risk_medium_threshold=0.45,
|
|
risk_high_threshold=0.75,
|
|
risk_anomaly_scan_limit=5000,
|
|
)
|
|
|
|
|
|
def _seed_entities(store: CanonicalStore) -> None:
|
|
links = [
|
|
EntityLink(
|
|
relation="reference",
|
|
target_entity="Counterparty",
|
|
target_id=f"cp-{idx}",
|
|
source_field="Counterparty_Key",
|
|
)
|
|
for idx in range(12)
|
|
]
|
|
entities = [
|
|
CanonicalEntity(
|
|
source_entity="DocumentSales",
|
|
source_id="doc-001",
|
|
display_name="Invoice 001",
|
|
attributes={"Amount": 1000},
|
|
links=links,
|
|
),
|
|
CanonicalEntity(
|
|
source_entity="DocumentSales",
|
|
source_id="doc-002",
|
|
display_name="Invoice 002",
|
|
attributes={"Amount": 500},
|
|
links=[],
|
|
),
|
|
]
|
|
store.upsert_entities(run_id="seed", entities=entities)
|
|
|
|
|
|
def test_feature_engine_generates_metrics_and_anomalies(tmp_path: Path) -> None:
|
|
db_url = f"sqlite:///{(tmp_path / 'feature_engine.db').as_posix()}"
|
|
settings = _build_settings(db_url)
|
|
store = CanonicalStore(settings.canonical_db_url)
|
|
store.ensure_created()
|
|
_seed_entities(store)
|
|
|
|
refresh_run_id = store.start_refresh_run(
|
|
mode="incremental",
|
|
requested_entity_sets=["DocumentSales"],
|
|
date_from=None,
|
|
date_to=None,
|
|
limit_per_set=10,
|
|
)
|
|
store.finish_refresh_run(
|
|
run_id=refresh_run_id,
|
|
status="success",
|
|
records_read=2,
|
|
entities_written=2,
|
|
links_written=12,
|
|
checkpoints_updated=1,
|
|
details={},
|
|
)
|
|
|
|
service = FeatureService(settings=settings, store=store)
|
|
result = service.run_feature_engine(top_account_tokens=10)
|
|
|
|
assert result.status == "success"
|
|
assert result.metrics_written > 0
|
|
assert result.anomalies_written > 0
|
|
|
|
anomalies = service.list_anomalies(limit=100, active_only=True)
|
|
signal_types = {item["signal_type"] for item in anomalies}
|
|
assert "high_link_degree" in signal_types
|
|
|
|
|
|
def test_feature_engine_detects_missing_refresh_baseline(tmp_path: Path) -> None:
|
|
db_url = f"sqlite:///{(tmp_path / 'feature_engine_no_refresh.db').as_posix()}"
|
|
settings = _build_settings(db_url)
|
|
store = CanonicalStore(settings.canonical_db_url)
|
|
store.ensure_created()
|
|
_seed_entities(store)
|
|
|
|
service = FeatureService(settings=settings, store=store)
|
|
result = service.run_feature_engine(top_account_tokens=10)
|
|
|
|
assert result.status == "success"
|
|
anomalies = service.list_anomalies(limit=100, active_only=True)
|
|
signal_types = {item["signal_type"] for item in anomalies}
|
|
assert "missing_refresh_baseline" in signal_types
|