from __future__ import annotations import argparse import json from pathlib import Path import sys PROJECT_ROOT = Path(__file__).resolve().parents[1] if str(PROJECT_ROOT) not in sys.path: sys.path.insert(0, str(PROJECT_ROOT)) from canonical_layer.refresh import REFRESH_MODES, RefreshService from config.settings import LOGS_DIR def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( description="Run canonical refresh into local/remote canonical store", ) parser.add_argument( "--mode", choices=sorted(REFRESH_MODES), default="incremental", help="Refresh mode: historical, incremental, targeted", ) parser.add_argument("--from-date", dest="date_from", default=None, help="ISO date lower bound") parser.add_argument("--to-date", dest="date_to", default=None, help="ISO date upper bound") parser.add_argument("--target-id", default=None, help="Entity/document id fragment for targeted mode") parser.add_argument( "--limit-per-set", type=int, default=None, help="How many source rows to read per entity set", ) parser.add_argument( "--entity-set", action="append", dest="entity_sets", default=None, help="Specific entity set to include (repeat flag for multiple values)", ) parser.add_argument( "--keyword", action="append", dest="keywords", default=None, help="Entity-set matcher keyword (repeat flag for multiple values)", ) parser.add_argument( "--output", default=str(LOGS_DIR / "refresh_last_run.json"), help="Where to write run summary json", ) parser.add_argument( "--strict", action="store_true", help="Exit with code 1 when run status is failed or partial_success", ) return parser.parse_args() def main() -> int: args = parse_args() service = RefreshService.build() result = service.run_refresh( mode=args.mode, date_from=args.date_from, date_to=args.date_to, target_id=args.target_id, limit_per_set=args.limit_per_set, requested_entity_sets=args.entity_sets, entity_keywords=args.keywords, ) payload = result.to_dict() payload["store_stats"] = service.store_stats() output_path = Path(args.output) output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8") print(json.dumps(payload, ensure_ascii=False, indent=2)) if args.strict and result.status != "success": return 1 return 0 if __name__ == "__main__": sys.exit(main())