from __future__ import annotations import json from pathlib import Path from typing import Any from config.client import ODataClient, flatten_guid_like_fields, utc_now_iso from config.settings import LOGS_DIR, load_settings TARGET_KEYWORDS = [ "документ", "контрагент", "договор", "организац", "счет", "плансчетов", "регистр", "хозрасчет", "document", "counterparty", "contract", "account", "posting", ] def _read_entity_sets() -> list[dict[str, Any]]: source_file = LOGS_DIR / "entity_sets_annotated.json" if source_file.exists(): payload = json.loads(source_file.read_text(encoding="utf-8")) return payload.get("entity_sets", []) fallback_file = LOGS_DIR / "entity_sets.json" if fallback_file.exists(): payload = json.loads(fallback_file.read_text(encoding="utf-8")) return payload.get("entity_sets", []) return [] def _select_targets(entity_sets: list[dict[str, Any]], explicit_targets: tuple[str, ...]) -> list[str]: if explicit_targets: return list(dict.fromkeys(explicit_targets)) ranked: list[str] = [] for item in entity_sets: name = str(item.get("name", "")) lowered = name.lower().replace("_", "") if any(keyword in lowered for keyword in TARGET_KEYWORDS): ranked.append(name) if ranked: return list(dict.fromkeys(ranked[:20])) return [str(item.get("name", "")) for item in entity_sets[:10] if item.get("name")] def _guess_link_fields(records: list[dict[str, Any]]) -> list[str]: fields: list[str] = [] for record in records: fields.extend(flatten_guid_like_fields(record)) return sorted(set(fields)) def main() -> int: settings = load_settings() entity_sets = _read_entity_sets() if not entity_sets: print( "[error] no entity sets found in logs. " "Run fetch_metadata and list_entity_sets first." ) return 1 targets = _select_targets(entity_sets, settings.probe_entity_sets) if not targets: print("[error] no target entities to probe") return 1 client = ODataClient(settings) report_items: list[dict[str, Any]] = [] success = 0 for entity_set in targets: try: records = client.read_entity_set_records(entity_set, top=settings.probe_top) field_names = sorted({field for row in records for field in row.keys()}) link_fields = _guess_link_fields(records) sample_ids = [] for row in records: for key in ("Ref_Key", "ID", "Id", "id"): value = row.get(key) if isinstance(value, str) and value: sample_ids.append(value) break report_items.append( { "entity_set": entity_set, "status": "ok", "records_fetched": len(records), "field_count": len(field_names), "fields": field_names, "suspected_link_fields": link_fields, "sample_ids": sample_ids[:10], } ) success += 1 print(f"[ok] {entity_set}: {len(records)} records, {len(link_fields)} link fields") except Exception as exc: report_items.append( { "entity_set": entity_set, "status": "error", "error": str(exc), } ) print(f"[warn] {entity_set}: {exc}") report = { "generated_at": utc_now_iso(), "service_root": settings.service_root, "probe_top": settings.probe_top, "targets_total": len(targets), "targets_success": success, "targets_failed": len(targets) - success, "entities": report_items, } output_file = LOGS_DIR / "probe_report.json" output_file.write_text( json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8", ) print(f"[ok] saved report: {output_file}") return 0 if __name__ == "__main__": raise SystemExit(main())