import type { AssistantMcpDiscoveryTurnMeaningRef } from "./assistantMcpDiscoveryPolicy"; export const ASSISTANT_MCP_DISCOVERY_DATA_NEED_GRAPH_SCHEMA_VERSION = "assistant_data_need_graph_v1" as const; export type AssistantMcpDiscoveryDataNeedProofExpectation = | "schema_surface" | "entity_grounding" | "coverage_checked_fact" | "bounded_inference" | "clarification_required"; export interface AssistantMcpDiscoveryDataNeedGraphContract { schema_version: typeof ASSISTANT_MCP_DISCOVERY_DATA_NEED_GRAPH_SCHEMA_VERSION; policy_owner: "assistantMcpDiscoveryDataNeedGraph"; subject_candidates: string[]; business_fact_family: string | null; action_family: string | null; aggregation_need: string | null; time_scope_need: string | null; comparison_need: string | null; ranking_need: string | null; proof_expectation: AssistantMcpDiscoveryDataNeedProofExpectation; clarification_gaps: string[]; decomposition_candidates: string[]; forbidden_overclaim_flags: string[]; reason_codes: string[]; } export interface BuildAssistantMcpDiscoveryDataNeedGraphInput { semanticDataNeed?: string | null; rawUtterance?: string | null; turnMeaning?: AssistantMcpDiscoveryTurnMeaningRef | null; } function toNonEmptyString(value: unknown): string | null { if (value === null || value === undefined) { return null; } const text = String(value).trim(); return text.length > 0 ? text : null; } function lower(value: unknown): string { return String(value ?? "").trim().toLowerCase(); } function normalizeReasonCode(value: string): string | null { const normalized = value .trim() .replace(/[^\p{L}\p{N}_.:-]+/gu, "_") .replace(/^_+|_+$/g, "") .toLowerCase(); return normalized.length > 0 ? normalized.slice(0, 120) : null; } function pushReason(target: string[], value: string): void { const normalized = normalizeReasonCode(value); if (normalized && !target.includes(normalized)) { target.push(normalized); } } function pushUnique(target: string[], value: string | null | undefined): void { const text = toNonEmptyString(value); if (text && !target.includes(text)) { target.push(text); } } function businessFactFamilyFor(input: { semanticDataNeed: string; domain: string; action: string; unsupported: string; }): string | null { const combined = `${input.semanticDataNeed} ${input.domain} ${input.action} ${input.unsupported}`.trim(); if (combined.includes("metadata lane clarification")) { return "schema_surface"; } if (combined.includes("metadata")) { return "schema_surface"; } if (combined.includes("entity discovery") || combined.includes("entity_resolution")) { return "entity_grounding"; } if (combined.includes("lifecycle") || combined.includes("activity")) { return "activity_lifecycle"; } if (combined.includes("movement")) { return "movement_evidence"; } if (combined.includes("document")) { return "document_evidence"; } if (combined.includes("value-flow") || combined.includes("turnover") || combined.includes("payout") || combined.includes("net")) { return "value_flow"; } return null; } function aggregationNeedFor(axis: string): string | null { if (!axis) { return null; } if (axis === "month") { return "by_month"; } return `by_${axis}`; } function hasAllTimeScopeHint(rawUtterance: string): boolean { if (!rawUtterance) { return false; } return /(?:\u0437\u0430\s+\u0432\u0441[\u0435\u0451]\s+\u0432\u0440\u0435\u043c\u044f|\u0437\u0430\s+\u0432\u0435\u0441\u044c\s+\u043f\u0435\u0440\u0438\u043e\u0434|\u0437\u0430\s+\u0432\u0441\u044e\s+\u0438\u0441\u0442\u043e\u0440\u0438(?:\u044e|\u0438)|\u0437\u0430\s+\u043b\u044e\u0431\u043e\u0439\s+\u043f\u0435\u0440\u0438\u043e\u0434|for\s+all\s+time|all\s+time|entire\s+period|full\s+history|any\s+period)/iu.test( rawUtterance ); } function timeScopeNeedFor(input: { family: string | null; explicitDateScope: string | null; allTimeScopeHint: boolean; }): string | null { if (input.explicitDateScope) { return "explicit_period"; } if ( input.allTimeScopeHint && (input.family === "value_flow" || input.family === "movement_evidence" || input.family === "document_evidence") ) { return "all_time_scope"; } if (input.family === "value_flow" || input.family === "movement_evidence" || input.family === "document_evidence") { return "period_required"; } if (input.family === "activity_lifecycle") { return "open_activity_window"; } return null; } function comparisonNeedFor(action: string): string | null { if (action === "net_value_flow") { return "incoming_vs_outgoing"; } return null; } function hasOpenScopeOneSidedValueTotalHint(rawUtterance: string, action: string): boolean { if (!rawUtterance) { return false; } if (action === "turnover") { return /(?:\bсколько\s+(?:(?:вообще|всего|реально)\s+){0,2}(?:мы\s+)?(?:получили|получено|входящих(?:\s+денег)?(?:\s+было)?|поступлений|денег\s+пришло)\b|(?:сумма|объем)\s+(?:входящих|поступлений)|поступлений\s+за\b)/iu.test( rawUtterance ); } if (action === "payout") { return /(?:\bсколько\s+(?:(?:вообще|всего|реально)\s+){0,2}(?:мы\s+)?(?:заплатили|выплатили|потратили|исходящих(?:\s+денег)?(?:\s+было)?|платежей(?:\s+было)?|списаний(?:\s+было)?)\b|(?:сумма|объем)\s+(?:исходящих|платежей|списаний)|(?:платежей|списаний)\s+за\b)/iu.test( rawUtterance ); } return false; } function hasOpenScopeOneSidedValueTotalHintUtf8Safe(rawUtterance: string, action: string): boolean { if (!rawUtterance) { return false; } if (action === "turnover") { return /(?:\u0441\u043a\u043e\u043b\u044c\u043a\u043e\s+(?:(?:\u0432\u043e\u043e\u0431\u0449\u0435|\u0432\u0441\u0435\u0433\u043e|\u0440\u0435\u0430\u043b\u044c\u043d\u043e)\s+){0,2}(?:\u043c\u044b\s+)?(?:\u043f\u043e\u043b\u0443\u0447\u0438\u043b\u0438|\u043f\u043e\u043b\u0443\u0447\u0435\u043d\u043e|\u0432\u0445\u043e\u0434\u044f\u0449\u0438\u0445(?:\s+\u0434\u0435\u043d\u0435\u0433)?(?:\s+\u0431\u044b\u043b\u043e)?|\u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d\u0438\u0439|\u0434\u0435\u043d\u0435\u0433\s+\u043f\u0440\u0438\u0448\u043b\u043e)|(?:\u0441\u0443\u043c\u043c\u0430|\u043e\u0431\u044a\u0435\u043c)\s+(?:\u0432\u0445\u043e\u0434\u044f\u0449\u0438\u0445|\u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d\u0438\u0439)|\u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d\u0438\u0439\s+\u0437\u0430)/u.test( rawUtterance ); } if (action === "payout") { return /(?:\u0441\u043a\u043e\u043b\u044c\u043a\u043e\s+(?:(?:\u0432\u043e\u043e\u0431\u0449\u0435|\u0432\u0441\u0435\u0433\u043e|\u0440\u0435\u0430\u043b\u044c\u043d\u043e)\s+){0,2}(?:\u043c\u044b\s+)?(?:\u0437\u0430\u043f\u043b\u0430\u0442\u0438\u043b\u0438|\u0432\u044b\u043f\u043b\u0430\u0442\u0438\u043b\u0438|\u043f\u043e\u0442\u0440\u0430\u0442\u0438\u043b\u0438|\u0438\u0441\u0445\u043e\u0434\u044f\u0449\u0438\u0445(?:\s+\u0434\u0435\u043d\u0435\u0433)?(?:\s+\u0431\u044b\u043b\u043e)?|\u043f\u043b\u0430\u0442\u0435\u0436\u0435\u0439(?:\s+\u0431\u044b\u043b\u043e)?|\u0441\u043f\u0438\u0441\u0430\u043d\u0438\u0439(?:\s+\u0431\u044b\u043b\u043e)?)|(?:\u0441\u0443\u043c\u043c\u0430|\u043e\u0431\u044a\u0435\u043c)\s+(?:\u0438\u0441\u0445\u043e\u0434\u044f\u0449\u0438\u0445|\u043f\u043b\u0430\u0442\u0435\u0436\u0435\u0439|\u0441\u043f\u0438\u0441\u0430\u043d\u0438\u0439)|(?:\u043f\u043b\u0430\u0442\u0435\u0436\u0435\u0439|\u0441\u043f\u0438\u0441\u0430\u043d\u0438\u0439)\s+\u0437\u0430)/u.test( rawUtterance ); } return false; } function supportsOrganizationScopedOpenTotal(action: string): boolean { return action === "turnover" || action === "payout"; } function allowsOpenScopeWithoutSubject(input: { family: string | null; action: string; organizationScope: string | null; comparisonNeed: string | null; rankingNeed: string | null; oneSidedOpenScopeTotalHint: boolean; }): boolean { if (input.family !== "value_flow") { return false; } if (input.rankingNeed || input.comparisonNeed === "incoming_vs_outgoing") { return true; } return Boolean( supportsOrganizationScopedOpenTotal(input.action) && (input.organizationScope || input.oneSidedOpenScopeTotalHint) ); } function rankingNeedFromRawUtterance(value: string): string | null { const text = lower(value); if (!text) { return null; } if ( /(?:\btop[-\s]?\d+\b|\btop\b|топ[-\s]?\d+|топ\b|сам(?:ый|ая|ое|ые)\b|больше\s+всего|наибол[её]е|highest|largest|most)/iu.test( text ) ) { return "top_desc"; } if (/(?:меньше\s+всего|наимен[ьш]е|lowest|smallest|least)/iu.test(text)) { return "bottom_asc"; } return null; } function proofExpectationFor(input: { family: string | null; clarificationGaps: string[]; }): AssistantMcpDiscoveryDataNeedProofExpectation { if (input.clarificationGaps.length > 0) { return "clarification_required"; } if (input.family === "schema_surface") { return "schema_surface"; } if (input.family === "entity_grounding") { return "entity_grounding"; } if (input.family === "activity_lifecycle") { return "bounded_inference"; } return "coverage_checked_fact"; } function decompositionCandidatesFor(input: { family: string | null; action: string; aggregationNeed: string | null; comparisonNeed: string | null; rankingNeed: string | null; openScopeWithoutSubject: boolean; }): string[] { const result: string[] = []; if (input.family === "schema_surface") { pushUnique(result, "inspect_metadata_surface"); return result; } if (input.family === "entity_grounding") { pushUnique(result, "search_business_entity"); pushUnique(result, "resolve_entity_reference"); pushUnique(result, "probe_coverage"); return result; } if (input.family === "value_flow") { if (input.rankingNeed && input.openScopeWithoutSubject) { pushUnique(result, "collect_scoped_movements"); pushUnique(result, "aggregate_ranked_axis_values"); pushUnique(result, "probe_coverage"); return result; } if (input.comparisonNeed === "incoming_vs_outgoing" && input.openScopeWithoutSubject) { pushUnique(result, "collect_incoming_movements"); pushUnique(result, "collect_outgoing_movements"); if (input.aggregationNeed === "by_month") { pushUnique(result, "aggregate_by_month"); } pushUnique(result, "probe_coverage"); return result; } if (input.openScopeWithoutSubject) { pushUnique(result, "collect_scoped_movements"); pushUnique(result, input.aggregationNeed === "by_month" ? "aggregate_by_month" : "aggregate_checked_amounts"); pushUnique(result, "probe_coverage"); return result; } pushUnique(result, "resolve_entity_reference"); if (input.action === "net_value_flow") { pushUnique(result, "collect_incoming_movements"); pushUnique(result, "collect_outgoing_movements"); } else { pushUnique(result, "collect_scoped_movements"); } pushUnique(result, input.aggregationNeed === "by_month" ? "aggregate_by_month" : "aggregate_checked_amounts"); pushUnique(result, "probe_coverage"); return result; } if (input.family === "movement_evidence") { pushUnique(result, "resolve_entity_reference"); pushUnique(result, "fetch_scoped_movements"); pushUnique(result, "probe_coverage"); return result; } if (input.family === "document_evidence") { pushUnique(result, "resolve_entity_reference"); pushUnique(result, "fetch_scoped_documents"); pushUnique(result, "probe_coverage"); return result; } if (input.family === "activity_lifecycle") { pushUnique(result, "resolve_entity_reference"); pushUnique(result, "fetch_supporting_documents"); pushUnique(result, "probe_coverage"); pushUnique(result, "explain_evidence_basis"); } return result; } function forbiddenOverclaimFlagsFor(family: string | null): string[] { const result: string[] = ["no_raw_model_claims"]; if (family === "schema_surface") { pushUnique(result, "no_fake_schema_surface"); } if (family === "entity_grounding") { pushUnique(result, "no_unresolved_entity_claim"); } if (family === "activity_lifecycle") { pushUnique(result, "no_legal_age_claim_without_evidence"); } if (family === "value_flow" || family === "movement_evidence" || family === "document_evidence") { pushUnique(result, "no_unchecked_fact_totals"); } return result; } export function buildAssistantMcpDiscoveryDataNeedGraph( input: BuildAssistantMcpDiscoveryDataNeedGraphInput ): AssistantMcpDiscoveryDataNeedGraphContract { const semanticDataNeed = lower(input.semanticDataNeed); const turnMeaning = input.turnMeaning ?? null; const domain = lower(turnMeaning?.asked_domain_family); const action = lower(turnMeaning?.asked_action_family); const unsupported = lower(turnMeaning?.unsupported_but_understood_family); const rawUtterance = lower(input.rawUtterance); const aggregationAxis = lower(turnMeaning?.asked_aggregation_axis); const seededRankingNeed = toNonEmptyString(turnMeaning?.seeded_ranking_need); const explicitDateScope = toNonEmptyString(turnMeaning?.explicit_date_scope); const explicitOrganizationScope = toNonEmptyString(turnMeaning?.explicit_organization_scope); const subjectCandidates = (turnMeaning?.explicit_entity_candidates ?? []) .map((item) => toNonEmptyString(item)) .filter((item): item is string => Boolean(item)); const businessFactFamily = businessFactFamilyFor({ semanticDataNeed, domain, action, unsupported }); const aggregationNeed = aggregationNeedFor(aggregationAxis); const comparisonNeed = comparisonNeedFor(action); const rankingNeed = rankingNeedFromRawUtterance(rawUtterance) ?? seededRankingNeed; const allTimeScopeHint = hasAllTimeScopeHint(rawUtterance); const oneSidedOpenScopeTotalHint = hasOpenScopeOneSidedValueTotalHintUtf8Safe(rawUtterance, action); const openScopeWithoutSubject = subjectCandidates.length === 0 && allowsOpenScopeWithoutSubject({ family: businessFactFamily, action, organizationScope: explicitOrganizationScope, comparisonNeed, rankingNeed, oneSidedOpenScopeTotalHint }); const clarificationGaps: string[] = []; if (unsupported === "metadata_lane_choice_clarification" || action === "resolve_next_lane") { pushUnique(clarificationGaps, "lane_family_choice"); } if ( subjectCandidates.length === 0 && businessFactFamily === "value_flow" && openScopeWithoutSubject && !explicitOrganizationScope ) { pushUnique(clarificationGaps, "organization"); } else if (subjectCandidates.length === 0 && businessFactFamily !== "schema_surface" && !openScopeWithoutSubject) { pushUnique(clarificationGaps, "subject"); } const timeScopeNeed = timeScopeNeedFor({ family: businessFactFamily, explicitDateScope, allTimeScopeHint }); if (timeScopeNeed === "period_required" && !explicitDateScope) { pushUnique(clarificationGaps, "period"); } const decompositionCandidates = decompositionCandidatesFor({ family: businessFactFamily, action, aggregationNeed, comparisonNeed, rankingNeed, openScopeWithoutSubject }); const reasonCodes: string[] = []; pushReason(reasonCodes, "data_need_graph_built"); if (businessFactFamily) { pushReason(reasonCodes, `data_need_graph_family_${businessFactFamily}`); } else { pushReason(reasonCodes, "data_need_graph_family_unknown"); } if (aggregationNeed) { pushReason(reasonCodes, `data_need_graph_aggregation_${aggregationNeed}`); } if (rankingNeed) { pushReason(reasonCodes, `data_need_graph_ranking_${rankingNeed}`); } if (comparisonNeed) { pushReason(reasonCodes, `data_need_graph_comparison_${comparisonNeed}`); } if (openScopeWithoutSubject && !rankingNeed && !comparisonNeed) { pushReason(reasonCodes, "data_need_graph_open_scope_total_without_subject"); } if (allTimeScopeHint) { pushReason(reasonCodes, "data_need_graph_all_time_scope_hint"); } if (clarificationGaps.includes("organization")) { pushReason(reasonCodes, "data_need_graph_open_scope_total_needs_organization"); } if (clarificationGaps.length > 0) { pushReason(reasonCodes, "data_need_graph_has_clarification_gaps"); } return { schema_version: ASSISTANT_MCP_DISCOVERY_DATA_NEED_GRAPH_SCHEMA_VERSION, policy_owner: "assistantMcpDiscoveryDataNeedGraph", subject_candidates: subjectCandidates, business_fact_family: businessFactFamily, action_family: toNonEmptyString(turnMeaning?.asked_action_family), aggregation_need: aggregationNeed, time_scope_need: timeScopeNeed, comparison_need: comparisonNeed, ranking_need: rankingNeed, proof_expectation: proofExpectationFor({ family: businessFactFamily, clarificationGaps }), clarification_gaps: clarificationGaps, decomposition_candidates: decompositionCandidates, forbidden_overclaim_flags: forbiddenOverclaimFlagsFor(businessFactFamily), reason_codes: reasonCodes }; }