from __future__ import annotations from pathlib import Path import runpy from canonical_layer.store import CanonicalStore def _load_validation_module() -> dict[str, object]: script_path = Path(__file__).resolve().parents[1] / "scripts" / "run_validation_accounting_analytics.py" return runpy.run_path(str(script_path), run_name="validation_mod") def test_ingestion_handles_unknown_ids_and_deduplicates(tmp_path: Path) -> None: module = _load_validation_module() ingest_slice_to_store = module["ingest_slice_to_store"] store = CanonicalStore(f"sqlite:///{(tmp_path / 'validation_ingest.db').as_posix()}") store.ensure_created() payload = { "selected_window_key": "2020-06", "records_exported_total": 3, "links_exported_total": 0, "records_per_entity_set": {"DocumentJournal_Test": 3}, "items": [ { "source_entity": "DocumentJournal_Test", "source_id": "unknown", "display_name": "A", "attributes": {"Recorder": "rec-1", "LineNumber": 1, "Period": "2020-06-01T00:00:00"}, "links": [], }, { "source_entity": "DocumentJournal_Test", "source_id": "unknown", "display_name": "A duplicate", "attributes": {"Recorder": "rec-1", "LineNumber": 1, "Period": "2020-06-01T00:00:00"}, "links": [], }, { "source_entity": "DocumentJournal_Test", "source_id": "unknown", "display_name": "B", "attributes": {"Recorder": "rec-1", "LineNumber": 2, "Period": "2020-06-01T00:00:00"}, "links": [], }, ], } result = ingest_slice_to_store( store=store, slice_payload=payload, slice_start="2020-06-01T00:00:00+00:00", slice_end_exclusive="2020-07-01T00:00:00+00:00", ) assert result["entities_written"] == 2 assert result["details"]["items_total_raw"] == 3 assert result["details"]["items_after_dedupe"] == 2 assert result["details"]["duplicate_rows_skipped"] == 1 assert result["details"]["synthetic_ids_assigned"] == 3 stats = store.store_stats() assert stats["entities_total"] == 2