NODEDC_1C/llm_normalizer/backend/src/services/assistantMcpDiscoveryPolicy.ts

export const ASSISTANT_MCP_DISCOVERY_PLAN_SCHEMA_VERSION = "assistant_mcp_discovery_plan_v1" as const;
export const ASSISTANT_MCP_DISCOVERY_EVIDENCE_SCHEMA_VERSION = "assistant_mcp_discovery_evidence_v1" as const;

export const ASSISTANT_MCP_DISCOVERY_PRIMITIVES = [
  "search_business_entity",
  "inspect_1c_metadata",
  "resolve_entity_reference",
  "query_movements",
  "query_documents",
  "aggregate_by_axis",
  "drilldown_related_objects",
  "probe_coverage",
  "explain_evidence_basis"
] as const;

export type AssistantMcpDiscoveryPrimitive = (typeof ASSISTANT_MCP_DISCOVERY_PRIMITIVES)[number];

export type AssistantMcpDiscoveryPlanStatus = "allowed" | "needs_clarification" | "blocked";
export type AssistantMcpDiscoveryCoverageStatus = "full" | "partial" | "blocked";
export type AssistantMcpDiscoveryEvidenceStatus = "confirmed" | "inferred_only" | "insufficient" | "blocked";
export type AssistantMcpDiscoveryAnswerPermission = "confirmed_answer" | "bounded_inference" | "checked_sources_only";

export interface AssistantMcpDiscoveryTurnMeaningRef {
  asked_domain_family?: string | null;
  asked_action_family?: string | null;
  asked_aggregation_axis?: string | null;
  seeded_ranking_need?: string | null;
  explicit_entity_candidates?: string[];
  metadata_ambiguity_entity_sets?: string[];
  metadata_scope_hint?: string | null;
  explicit_organization_scope?: string | null;
  explicit_date_scope?: string | null;
  subject_resolution_optional?: boolean | null;
  meaning_confidence?: number | null;
  unsupported_but_understood_family?: string | null;
  stale_replay_forbidden?: boolean | null;
}

export interface AssistantMcpDiscoveryExecutionBudget {
  max_probe_count: number;
  max_rows_per_probe: number;
}

export interface AssistantMcpDiscoveryPlanContract {
  schema_version: typeof ASSISTANT_MCP_DISCOVERY_PLAN_SCHEMA_VERSION;
  policy_owner: "assistantMcpDiscoveryPolicy";
  plan_status: AssistantMcpDiscoveryPlanStatus;
  semantic_data_need: string | null;
  turn_meaning_ref: AssistantMcpDiscoveryTurnMeaningRef | null;
  allowed_primitives: AssistantMcpDiscoveryPrimitive[];
  rejected_primitives: string[];
  required_axes: string[];
  clarification_gaps: string[];
  execution_budget: AssistantMcpDiscoveryExecutionBudget;
  requires_evidence_gate: true;
  answer_may_use_raw_model_claims: false;
  reason_codes: string[];
}

export interface BuildAssistantMcpDiscoveryPlanInput {
  semanticDataNeed?: string | null;
  turnMeaning?: AssistantMcpDiscoveryTurnMeaningRef | null;
  proposedPrimitives?: string[] | null;
  requiredAxes?: string[] | null;
  clarificationGaps?: string[] | null;
  maxProbeCount?: number | null;
  maxRowsPerProbe?: number | null;
}

export interface AssistantMcpDiscoveryProbeResult {
  primitive_id: string;
  status: "ok" | "error" | "skipped";
  rows_received?: number | null;
  rows_matched?: number | null;
  limitation?: string | null;
}

export interface ResolveAssistantMcpDiscoveryEvidenceInput {
  plan: AssistantMcpDiscoveryPlanContract;
  probeResults?: AssistantMcpDiscoveryProbeResult[] | null;
  confirmedFacts?: string[] | null;
  inferredFacts?: string[] | null;
  unknownFacts?: string[] | null;
  sourceRowsSummary?: string | null;
  queryLimitations?: string[] | null;
  recommendedNextProbe?: string | null;
}

export interface AssistantMcpDiscoveryEvidenceContract {
  schema_version: typeof ASSISTANT_MCP_DISCOVERY_EVIDENCE_SCHEMA_VERSION;
  policy_owner: "assistantMcpDiscoveryPolicy";
  evidence_status: AssistantMcpDiscoveryEvidenceStatus;
  coverage_status: AssistantMcpDiscoveryCoverageStatus;
  answer_permission: AssistantMcpDiscoveryAnswerPermission;
  confirmed_facts: string[];
  inferred_facts: string[];
  unknown_facts: string[];
  source_rows_summary: string | null;
  query_plan: AssistantMcpDiscoveryPlanContract;
  query_limitations: string[];
  confidence_reason: string;
  recommended_next_probe: string | null;
  reason_codes: string[];
}

const DEFAULT_DISCOVERY_BUDGET: AssistantMcpDiscoveryExecutionBudget = {
  max_probe_count: 3,
  max_rows_per_probe: 100
};

const MAX_PROBE_COUNT = 36;
const MAX_ROWS_PER_PROBE = 500;

const ALLOWED_PRIMITIVE_SET = new Set<string>(ASSISTANT_MCP_DISCOVERY_PRIMITIVES);

function toNonEmptyString(value: unknown): string | null {
  if (value === null || value === undefined) {
    return null;
  }
  const text = String(value).trim();
  return text.length > 0 ? text : null;
}

function toStringList(value: unknown): string[] {
  if (!Array.isArray(value)) {
    return [];
  }
  const result: string[] = [];
  for (const item of value) {
    const text = toNonEmptyString(item);
    if (text && !result.includes(text)) {
      result.push(text);
    }
  }
  return result;
}

function normalizeReasonCode(value: string): string | null {
  const normalized = value
    .trim()
    .replace(/[^\p{L}\p{N}_.:-]+/gu, "_")
    .replace(/^_+|_+$/g, "")
    .toLowerCase();
  return normalized.length > 0 ? normalized.slice(0, 120) : null;
}

function pushReason(target: string[], value: string): void {
  const normalized = normalizeReasonCode(value);
  if (normalized && !target.includes(normalized)) {
    target.push(normalized);
  }
}

function clampInteger(value: number | null | undefined, fallback: number, min: number, max: number): number {
  if (!Number.isFinite(value)) {
    return fallback;
  }
  return Math.min(max, Math.max(min, Math.trunc(Number(value))));
}

function isAllowedPrimitive(value: string): value is AssistantMcpDiscoveryPrimitive {
  return ALLOWED_PRIMITIVE_SET.has(value);
}

function normalizeTurnMeaning(
  value: AssistantMcpDiscoveryTurnMeaningRef | null | undefined
): AssistantMcpDiscoveryTurnMeaningRef | null {
  if (!value) {
    return null;
  }
  const result: AssistantMcpDiscoveryTurnMeaningRef = {};
  const domain = toNonEmptyString(value.asked_domain_family);
  const action = toNonEmptyString(value.asked_action_family);
  const aggregationAxis = toNonEmptyString(value.asked_aggregation_axis);
  const seededRankingNeed = toNonEmptyString(value.seeded_ranking_need);
  const organization = toNonEmptyString(value.explicit_organization_scope);
  const dateScope = toNonEmptyString(value.explicit_date_scope);
  const unsupported = toNonEmptyString(value.unsupported_but_understood_family);
  const entities = toStringList(value.explicit_entity_candidates);
  const metadataAmbiguityEntitySets = toStringList(value.metadata_ambiguity_entity_sets);
  if (domain) {
    result.asked_domain_family = domain;
  }
  if (action) {
    result.asked_action_family = action;
  }
  if (aggregationAxis) {
    result.asked_aggregation_axis = aggregationAxis;
  }
  if (seededRankingNeed) {
    result.seeded_ranking_need = seededRankingNeed;
  }
  if (entities.length > 0) {
    result.explicit_entity_candidates = entities;
  }
  if (metadataAmbiguityEntitySets.length > 0) {
    result.metadata_ambiguity_entity_sets = metadataAmbiguityEntitySets;
  }
  if (organization) {
    result.explicit_organization_scope = organization;
  }
  if (dateScope) {
    result.explicit_date_scope = dateScope;
  }
  if (Number.isFinite(value.meaning_confidence)) {
    result.meaning_confidence = Math.max(0, Math.min(1, Number(value.meaning_confidence)));
  }
  if (unsupported) {
    result.unsupported_but_understood_family = unsupported;
  }
  if (value.stale_replay_forbidden !== null && value.stale_replay_forbidden !== undefined) {
    result.stale_replay_forbidden = Boolean(value.stale_replay_forbidden);
  }
  return Object.keys(result).length > 0 ? result : null;
}

function hasGroundingAxis(input: {
  turnMeaning: AssistantMcpDiscoveryTurnMeaningRef | null;
  requiredAxes: string[];
}): boolean {
  if (input.requiredAxes.length > 0) {
    return true;
  }
  const meaning = input.turnMeaning;
  return Boolean(
    meaning?.asked_domain_family ||
      meaning?.asked_action_family ||
      meaning?.explicit_organization_scope ||
      meaning?.explicit_date_scope ||
      (meaning?.explicit_entity_candidates?.length ?? 0) > 0
  );
}

export function isAssistantMcpDiscoveryPrimitive(value: string): value is AssistantMcpDiscoveryPrimitive {
  return isAllowedPrimitive(value);
}

export function buildAssistantMcpDiscoveryPlan(
  input: BuildAssistantMcpDiscoveryPlanInput
): AssistantMcpDiscoveryPlanContract {
  const semanticDataNeed = toNonEmptyString(input.semanticDataNeed);
  const turnMeaning = normalizeTurnMeaning(input.turnMeaning);
  const requiredAxes = toStringList(input.requiredAxes);
  const clarificationGaps = toStringList(input.clarificationGaps);
  const proposed = toStringList(input.proposedPrimitives);
  const reasonCodes: string[] = [];
  const allowedPrimitives: AssistantMcpDiscoveryPrimitive[] = [];
  const rejectedPrimitives: string[] = [];

  for (const primitive of proposed) {
    if (isAllowedPrimitive(primitive)) {
      if (!allowedPrimitives.includes(primitive)) {
        allowedPrimitives.push(primitive);
      }
    } else {
      rejectedPrimitives.push(primitive);
    }
  }

  if (rejectedPrimitives.length > 0) {
    pushReason(reasonCodes, "model_proposed_unregistered_mcp_primitive");
  }
  if (!semanticDataNeed) {
    pushReason(reasonCodes, "semantic_data_need_missing");
  }
  if (!turnMeaning) {
    pushReason(reasonCodes, "turn_meaning_ref_missing");
  }
  if (!hasGroundingAxis({ turnMeaning, requiredAxes })) {
    pushReason(reasonCodes, "grounding_axis_missing");
  }
  if (allowedPrimitives.length === 0 && proposed.length > 0) {
    pushReason(reasonCodes, "no_allowed_mcp_primitives_after_runtime_filter");
  }
  if (allowedPrimitives.length === 0 && proposed.length === 0) {
    pushReason(reasonCodes, "mcp_primitives_not_proposed");
  }

  let planStatus: AssistantMcpDiscoveryPlanStatus = "allowed";
  if (allowedPrimitives.length === 0 && proposed.length > 0) {
    planStatus = "blocked";
  } else if (!semanticDataNeed || !turnMeaning || !hasGroundingAxis({ turnMeaning, requiredAxes })) {
    planStatus = "needs_clarification";
  } else if (allowedPrimitives.length === 0) {
    planStatus = "needs_clarification";
  }

  if (planStatus === "allowed") {
    pushReason(reasonCodes, "guarded_mcp_discovery_plan_allowed");
  } else if (planStatus === "blocked") {
    pushReason(reasonCodes, "guarded_mcp_discovery_plan_blocked");
  } else {
    pushReason(reasonCodes, "guarded_mcp_discovery_plan_needs_clarification");
  }

  return {
    schema_version: ASSISTANT_MCP_DISCOVERY_PLAN_SCHEMA_VERSION,
    policy_owner: "assistantMcpDiscoveryPolicy",
    plan_status: planStatus,
    semantic_data_need: semanticDataNeed,
    turn_meaning_ref: turnMeaning,
    allowed_primitives: allowedPrimitives,
    rejected_primitives: rejectedPrimitives,
    required_axes: requiredAxes,
    clarification_gaps: clarificationGaps,
    execution_budget: {
      max_probe_count: clampInteger(input.maxProbeCount, DEFAULT_DISCOVERY_BUDGET.max_probe_count, 1, MAX_PROBE_COUNT),
      max_rows_per_probe: clampInteger(
        input.maxRowsPerProbe,
        DEFAULT_DISCOVERY_BUDGET.max_rows_per_probe,
        1,
        MAX_ROWS_PER_PROBE
      )
    },
    requires_evidence_gate: true,
    answer_may_use_raw_model_claims: false,
    reason_codes: reasonCodes
  };
}

function collectProbeLimitations(probeResults: AssistantMcpDiscoveryProbeResult[]): string[] {
  const limitations: string[] = [];
  for (const probe of probeResults) {
    const limitation = toNonEmptyString(probe.limitation);
    if (limitation && !limitations.includes(limitation)) {
      limitations.push(limitation);
    }
  }
  return limitations;
}

function probeRowsMatched(probeResults: AssistantMcpDiscoveryProbeResult[]): number {
  return probeResults.reduce((sum, probe) => {
    const rows = Number(probe.rows_matched ?? 0);
    return sum + (Number.isFinite(rows) && rows > 0 ? rows : 0);
  }, 0);
}

function probeRowsReceived(probeResults: AssistantMcpDiscoveryProbeResult[]): number {
  return probeResults.reduce((sum, probe) => {
    const rows = Number(probe.rows_received ?? 0);
    return sum + (Number.isFinite(rows) && rows > 0 ? rows : 0);
  }, 0);
}

function hasProbeBypass(plan: AssistantMcpDiscoveryPlanContract, probeResults: AssistantMcpDiscoveryProbeResult[]): boolean {
  const allowed = new Set<string>(plan.allowed_primitives);
  return probeResults.some((probe) => !allowed.has(probe.primitive_id));
}

function confidenceReasonFor(status: AssistantMcpDiscoveryEvidenceStatus): string {
  if (status === "confirmed") {
    return "confirmed_facts_backed_by_allowed_mcp_probe_rows";
  }
  if (status === "inferred_only") {
    return "only_inferred_facts_available_from_allowed_mcp_probe_rows";
  }
  if (status === "blocked") {
    return "runtime_evidence_gate_blocked_discovery_answer";
  }
  return "allowed_mcp_probes_did_not_produce_sufficient_evidence";
}

export function resolveAssistantMcpDiscoveryEvidence(
  input: ResolveAssistantMcpDiscoveryEvidenceInput
): AssistantMcpDiscoveryEvidenceContract {
  const probeResults = Array.isArray(input.probeResults) ? input.probeResults : [];
  const confirmedFacts = toStringList(input.confirmedFacts);
  const inferredFacts = toStringList(input.inferredFacts);
  const unknownFacts = toStringList(input.unknownFacts);
  const sourceRowsSummary = toNonEmptyString(input.sourceRowsSummary);
  const queryLimitations = [
    ...toStringList(input.queryLimitations),
    ...collectProbeLimitations(probeResults)
  ].filter((item, index, all) => all.indexOf(item) === index);
  const reasonCodes: string[] = [...input.plan.reason_codes];
  const rowsMatched = probeRowsMatched(probeResults);
  const rowsReceived = probeRowsReceived(probeResults);
  const bypassDetected = hasProbeBypass(input.plan, probeResults);

  if (bypassDetected) {
    pushReason(reasonCodes, "probe_result_used_primitive_outside_runtime_plan");
  }
  if (input.plan.plan_status !== "allowed") {
    pushReason(reasonCodes, "plan_not_allowed_by_runtime");
  }
  if (confirmedFacts.length > 0 && rowsMatched <= 0) {
    pushReason(reasonCodes, "confirmed_facts_without_matched_probe_rows");
  }
  if (!sourceRowsSummary && rowsReceived > 0) {
    pushReason(reasonCodes, "source_rows_summary_missing");
  }

  let evidenceStatus: AssistantMcpDiscoveryEvidenceStatus = "insufficient";
  let coverageStatus: AssistantMcpDiscoveryCoverageStatus = "blocked";
  let answerPermission: AssistantMcpDiscoveryAnswerPermission = "checked_sources_only";

  if (bypassDetected || input.plan.plan_status !== "allowed") {
    evidenceStatus = "blocked";
    coverageStatus = "blocked";
    answerPermission = "checked_sources_only";
  } else if (confirmedFacts.length > 0 && rowsMatched > 0 && sourceRowsSummary) {
    evidenceStatus = "confirmed";
    coverageStatus = "full";
    answerPermission = "confirmed_answer";
    pushReason(reasonCodes, "confirmed_facts_with_allowed_mcp_evidence");
  } else if (inferredFacts.length > 0 && rowsReceived > 0) {
    evidenceStatus = "inferred_only";
    coverageStatus = "partial";
    answerPermission = "bounded_inference";
    pushReason(reasonCodes, "inferred_facts_require_bounded_answer");
  } else {
    pushReason(reasonCodes, "mcp_discovery_evidence_insufficient");
  }

  return {
    schema_version: ASSISTANT_MCP_DISCOVERY_EVIDENCE_SCHEMA_VERSION,
    policy_owner: "assistantMcpDiscoveryPolicy",
    evidence_status: evidenceStatus,
    coverage_status: coverageStatus,
    answer_permission: answerPermission,
    confirmed_facts: confirmedFacts,
    inferred_facts: inferredFacts,
    unknown_facts: unknownFacts,
    source_rows_summary: sourceRowsSummary,
    query_plan: input.plan,
    query_limitations: queryLimitations,
    confidence_reason: confidenceReasonFor(evidenceStatus),
    recommended_next_probe: toNonEmptyString(input.recommendedNextProbe),
    reason_codes: reasonCodes
  };
}