NODEDC_1C/odata_probe/probe_entities.py

from __future__ import annotations

import json
from pathlib import Path
from typing import Any

from config.client import ODataClient, flatten_guid_like_fields, utc_now_iso
from config.settings import LOGS_DIR, load_settings


TARGET_KEYWORDS = [
    "документ",
    "контрагент",
    "договор",
    "организац",
    "счет",
    "плансчетов",
    "регистр",
    "хозрасчет",
    "document",
    "counterparty",
    "contract",
    "account",
    "posting",
]


def _read_entity_sets() -> list[dict[str, Any]]:
    source_file = LOGS_DIR / "entity_sets_annotated.json"
    if source_file.exists():
        payload = json.loads(source_file.read_text(encoding="utf-8"))
        return payload.get("entity_sets", [])

    fallback_file = LOGS_DIR / "entity_sets.json"
    if fallback_file.exists():
        payload = json.loads(fallback_file.read_text(encoding="utf-8"))
        return payload.get("entity_sets", [])

    return []


def _select_targets(entity_sets: list[dict[str, Any]], explicit_targets: tuple[str, ...]) -> list[str]:
    if explicit_targets:
        return list(dict.fromkeys(explicit_targets))

    ranked: list[str] = []
    for item in entity_sets:
        name = str(item.get("name", ""))
        lowered = name.lower().replace("_", "")
        if any(keyword in lowered for keyword in TARGET_KEYWORDS):
            ranked.append(name)

    if ranked:
        return list(dict.fromkeys(ranked[:20]))

    return [str(item.get("name", "")) for item in entity_sets[:10] if item.get("name")]


def _guess_link_fields(records: list[dict[str, Any]]) -> list[str]:
    fields: list[str] = []
    for record in records:
        fields.extend(flatten_guid_like_fields(record))
    return sorted(set(fields))


def main() -> int:
    settings = load_settings()
    entity_sets = _read_entity_sets()
    if not entity_sets:
        print(
            "[error] no entity sets found in logs. "
            "Run fetch_metadata and list_entity_sets first."
        )
        return 1

    targets = _select_targets(entity_sets, settings.probe_entity_sets)
    if not targets:
        print("[error] no target entities to probe")
        return 1

    client = ODataClient(settings)
    report_items: list[dict[str, Any]] = []
    success = 0

    for entity_set in targets:
        try:
            records = client.read_entity_set_records(entity_set, top=settings.probe_top)
            field_names = sorted({field for row in records for field in row.keys()})
            link_fields = _guess_link_fields(records)
            sample_ids = []
            for row in records:
                for key in ("Ref_Key", "ID", "Id", "id"):
                    value = row.get(key)
                    if isinstance(value, str) and value:
                        sample_ids.append(value)
                        break

            report_items.append(
                {
                    "entity_set": entity_set,
                    "status": "ok",
                    "records_fetched": len(records),
                    "field_count": len(field_names),
                    "fields": field_names,
                    "suspected_link_fields": link_fields,
                    "sample_ids": sample_ids[:10],
                }
            )
            success += 1
            print(f"[ok] {entity_set}: {len(records)} records, {len(link_fields)} link fields")
        except Exception as exc:
            report_items.append(
                {
                    "entity_set": entity_set,
                    "status": "error",
                    "error": str(exc),
                }
            )
            print(f"[warn] {entity_set}: {exc}")

    report = {
        "generated_at": utc_now_iso(),
        "service_root": settings.service_root,
        "probe_top": settings.probe_top,
        "targets_total": len(targets),
        "targets_success": success,
        "targets_failed": len(targets) - success,
        "entities": report_items,
    }

    output_file = LOGS_DIR / "probe_report.json"
    output_file.write_text(
        json.dumps(report, ensure_ascii=False, indent=2),
        encoding="utf-8",
    )
    print(f"[ok] saved report: {output_file}")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())