from __future__ import annotations import json from pathlib import Path from config.client import extract_entity_sets, utc_now_iso from config.settings import LOGS_DIR IMPORTANT_KEYWORDS = [ "документ", "справочник", "регистр", "плансчетов", "хозрасчетный", "контрагенты", "договоры", "организации", "банковскиесчета", "document", "counterparty", "account", "posting", "contract", ] def mark_priority(name: str) -> tuple[bool, list[str]]: lowered = name.lower().replace("_", "").replace(" ", "") matched = [kw for kw in IMPORTANT_KEYWORDS if kw in lowered] return bool(matched), matched def main() -> int: metadata_path = LOGS_DIR / "metadata.xml" if not metadata_path.exists(): print( "[error] logs/metadata.xml not found. " "Run `python -m odata_probe.fetch_metadata` first." ) return 1 metadata_xml = metadata_path.read_text(encoding="utf-8") entity_sets = extract_entity_sets(metadata_xml) annotated: list[dict[str, object]] = [] for item in entity_sets: priority, matched_keywords = mark_priority(item["name"]) annotated.append( { **item, "priority": priority, "matched_keywords": matched_keywords, } ) output = { "generated_at": utc_now_iso(), "total": len(annotated), "priority_total": sum(1 for item in annotated if item["priority"]), "entity_sets": annotated, } output_json = LOGS_DIR / "entity_sets_annotated.json" output_json.write_text( json.dumps(output, ensure_ascii=False, indent=2), encoding="utf-8", ) output_txt = LOGS_DIR / "entity_sets_annotated.txt" lines = [] for item in annotated: flag = "*" if item["priority"] else " " matched = ", ".join(item["matched_keywords"]) if item["matched_keywords"] else "-" lines.append(f"{flag} {item['name']} | {item['entity_type']} | matched: {matched}") output_txt.write_text("\n".join(lines), encoding="utf-8") print(f"[ok] total entity sets: {output['total']}") print(f"[ok] priority entity sets: {output['priority_total']}") print(f"[ok] saved: {output_json}") print(f"[ok] saved: {output_txt}") return 0 if __name__ == "__main__": raise SystemExit(main())