NODEDC_1C/llm_normalizer/backend/src/services/assistantMcpDiscoveryDataNe...

701 lines
30 KiB
TypeScript
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import type { AssistantMcpDiscoveryTurnMeaningRef } from "./assistantMcpDiscoveryPolicy";
export const ASSISTANT_MCP_DISCOVERY_DATA_NEED_GRAPH_SCHEMA_VERSION =
"assistant_data_need_graph_v1" as const;
export type AssistantMcpDiscoveryDataNeedProofExpectation =
| "schema_surface"
| "entity_grounding"
| "coverage_checked_fact"
| "bounded_inference"
| "clarification_required";
export interface AssistantMcpDiscoveryDataNeedGraphContract {
schema_version: typeof ASSISTANT_MCP_DISCOVERY_DATA_NEED_GRAPH_SCHEMA_VERSION;
policy_owner: "assistantMcpDiscoveryDataNeedGraph";
subject_candidates: string[];
metadata_scope_hint?: string | null;
subject_resolution_optional?: boolean;
business_fact_family: string | null;
action_family: string | null;
aggregation_need: string | null;
time_scope_need: string | null;
comparison_need: string | null;
ranking_need: string | null;
proof_expectation: AssistantMcpDiscoveryDataNeedProofExpectation;
clarification_gaps: string[];
decomposition_candidates: string[];
forbidden_overclaim_flags: string[];
reason_codes: string[];
}
export interface BuildAssistantMcpDiscoveryDataNeedGraphInput {
semanticDataNeed?: string | null;
rawUtterance?: string | null;
turnMeaning?: AssistantMcpDiscoveryTurnMeaningRef | null;
}
function toNonEmptyString(value: unknown): string | null {
if (value === null || value === undefined) {
return null;
}
const text = String(value).trim();
return text.length > 0 ? text : null;
}
function lower(value: unknown): string {
return String(value ?? "").trim().toLowerCase();
}
function normalizeReasonCode(value: string): string | null {
const normalized = value
.trim()
.replace(/[^\p{L}\p{N}_.:-]+/gu, "_")
.replace(/^_+|_+$/g, "")
.toLowerCase();
return normalized.length > 0 ? normalized.slice(0, 120) : null;
}
function pushReason(target: string[], value: string): void {
const normalized = normalizeReasonCode(value);
if (normalized && !target.includes(normalized)) {
target.push(normalized);
}
}
function pushUnique(target: string[], value: string | null | undefined): void {
const text = toNonEmptyString(value);
if (text && !target.includes(text)) {
target.push(text);
}
}
function businessFactFamilyFor(input: {
semanticDataNeed: string;
domain: string;
action: string;
unsupported: string;
}): string | null {
const combined = `${input.semanticDataNeed} ${input.domain} ${input.action} ${input.unsupported}`.trim();
if (combined.includes("metadata lane clarification")) {
return "schema_surface";
}
if (combined.includes("metadata")) {
return "schema_surface";
}
if (combined.includes("entity discovery") || combined.includes("entity_resolution")) {
return "entity_grounding";
}
if (
combined.includes("broad_business_evaluation") ||
combined.includes("broad_evaluation") ||
combined.includes("business overview") ||
combined.includes("business_overview") ||
combined.includes("company analysis") ||
combined.includes("business audit")
) {
return "business_overview";
}
if (
combined.includes("inventory") ||
combined.includes("stock") ||
combined.includes("warehouse") ||
combined.includes("item provenance") ||
combined.includes("purchase provenance")
) {
if (
combined.includes("sale_trace") ||
combined.includes("sale trace") ||
combined.includes("buyer") ||
combined.includes("purchase_to_sale") ||
combined.includes("purchase-to-sale")
) {
return "inventory_sale_trace";
}
if (
combined.includes("supplier_overlap") ||
combined.includes("supplier overlap") ||
combined.includes("supplier stock") ||
combined.includes("stock by supplier")
) {
return "inventory_supplier_overlap";
}
if (
combined.includes("purchase_provenance") ||
combined.includes("purchase provenance") ||
combined.includes("purchase document") ||
combined.includes("supplier provenance")
) {
return "inventory_purchase_provenance";
}
return "inventory_stock_snapshot";
}
if (combined.includes("lifecycle") || combined.includes("activity")) {
return "activity_lifecycle";
}
if (combined.includes("movement")) {
return "movement_evidence";
}
if (combined.includes("document")) {
return "document_evidence";
}
if (combined.includes("value-flow") || combined.includes("turnover") || combined.includes("payout") || combined.includes("net")) {
return "value_flow";
}
return null;
}
function aggregationNeedFor(axis: string): string | null {
if (!axis) {
return null;
}
if (axis === "month") {
return "by_month";
}
return `by_${axis}`;
}
function hasAllTimeScopeHint(rawUtterance: string): boolean {
if (!rawUtterance) {
return false;
}
return /(?:\u0437\u0430\s+\u0432\u0441[\u0435\u0451]\s+\u0432\u0440\u0435\u043c\u044f|\u0437\u0430\s+\u0432\u0435\u0441\u044c\s+\u043f\u0435\u0440\u0438\u043e\u0434|\u0437\u0430\s+\u0432\u0441\u044e\s+\u0438\u0441\u0442\u043e\u0440\u0438(?:\u044e|\u0438)|\u0437\u0430\s+\u043b\u044e\u0431\u043e\u0439\s+\u043f\u0435\u0440\u0438\u043e\u0434|for\s+all\s+time|all\s+time|entire\s+period|full\s+history|any\s+period)/iu.test(
rawUtterance
);
}
function hasBusinessOverviewDirectMoneyAnswerHint(input: {
family: string | null;
rawUtterance: string;
rankingNeed: string | null;
}): boolean {
if (input.family !== "business_overview" || !input.rawUtterance) {
return false;
}
if (input.rankingNeed) {
return true;
}
const text = input.rawUtterance;
if (
/(?:\u043f\u043e\s+\u0434\u0435\u043d\p{L}*|\u0434\u0435\u043d\p{L}*)[\s\S]{0,80}(?:\u043f\u043b\u044e\u0441|\u043c\u0438\u043d\u0443\u0441|\u043d\u0435\u0442\u0442\u043e|\u043f\u0440\u0438\u0448\p{L}*|\u0443\u0448\p{L}*|\u043f\u043e\u043b\u0443\u0447\p{L}*|\u0437\u0430\u043f\u043b\u0430\u0442\p{L}*)|(?:\u043f\u043b\u044e\u0441|\u043c\u0438\u043d\u0443\u0441|\u043d\u0435\u0442\u0442\u043e)[\s\S]{0,80}(?:\u043f\u043e\s+\u0434\u0435\u043d\p{L}*|\u0434\u0435\u043d\p{L}*)/iu.test(
text
)
) {
return true;
}
if (
/(?:\u043d\u0435\s+\u043e\u0431\u0437\u043e\u0440|\u043f\u0440\u043e\u0441\u0442\u043e\s+\u0434\u0435\u043d\p{L}+|\u0431\u0435\u0437\s+\u0442\u043e\u043f(?:\u043e\u0432|\u0430)?\b|\u0434\u0435\u043d\p{L}{0,20}\s+\u043d\u0435\u0442\u0442\u043e|\u043f\u0440\u0438\u0448\u043b\p{L}*[\s\S]{0,80}\u0443\u0448\u043b\p{L}*[\s\S]{0,80}\u043d\u0435\u0442\u0442\u043e|\u0432\u0445\u043e\u0434\u044f\u0449\p{L}*[\s\S]{0,80}\u0438\u0441\u0445\u043e\u0434\u044f\u0449\p{L}*[\s\S]{0,80}\u043d\u0435\u0442\u0442\u043e)/iu.test(text)
) {
return true;
}
return /(?:\u0441\u043a\u043e\u043b\u044c\u043a\u043e|\u0441\u043a\u043e\u043a\w*|how\s+much)[\s\S]{0,120}(?:\u0437\u0430\u0440\u0430\u0431\u043e\u0442|\u0432\u044b\u0440\u0443\u0447|\u0434\u0435\u043d\p{L}*|\u043f\u043e\u043b\u0443\u0447|\u043f\u043e\u0441\u0442\u0443\u043f\p{L}*)|(?:\u0437\u0430\u0440\u0430\u0431\u043e\u0442|\u0432\u044b\u0440\u0443\u0447)[\s\S]{0,120}(?:\u0441\u043a\u043e\u043b\u044c\u043a\u043e|\u0441\u043a\u043e\u043a\w*|\u0432\u0441\u0435\u0433\u043e|\u0432\u043e\u043e\u0431\u0449\u0435|(?:19|20)\d{2}|all\s+time)|(?:\u043a\u0430\u043a\u043e\u0439|\u043a\u0430\u043a\u0430\u044f|\u043a\u0430\u043a\u0438\u0435|which|what)[\s\S]{0,80}(?:\u0441\u0430\u043c\p{L}*|top|best|most)[\s\S]{0,80}(?:\u0434\u043e\u0445\u043e\u0434\u043d|\u0432\u044b\u0440\u0443\u0447|\u043e\u0431\u043e\u0440\u043e\u0442|revenue|turnover)[\s\S]{0,40}(?:\u0433\u043e\u0434|year)/iu.test(
text
);
}
function timeScopeNeedFor(input: {
family: string | null;
explicitDateScope: string | null;
allTimeScopeHint: boolean;
subjectScopedBidirectionalAllTime: boolean;
}): string | null {
if (input.explicitDateScope) {
return "explicit_period";
}
if (
(input.allTimeScopeHint || input.subjectScopedBidirectionalAllTime) &&
(input.family === "value_flow" || input.family === "movement_evidence" || input.family === "document_evidence")
) {
return "all_time_scope";
}
if (input.family === "value_flow" || input.family === "movement_evidence" || input.family === "document_evidence") {
return "period_required";
}
if (input.family === "activity_lifecycle") {
return "open_activity_window";
}
if (input.family === "business_overview") {
return input.explicitDateScope ? "explicit_period" : "all_time_scope";
}
if (input.family === "inventory_stock_snapshot" || input.family === "inventory_supplier_overlap") {
return input.explicitDateScope ? "explicit_period" : "as_of_date_required";
}
if (input.family === "inventory_purchase_provenance" || input.family === "inventory_sale_trace") {
return input.explicitDateScope ? "explicit_period" : null;
}
return null;
}
function comparisonNeedFor(action: string): string | null {
if (action === "net_value_flow") {
return "incoming_vs_outgoing";
}
return null;
}
function hasOpenScopeOneSidedValueTotalHint(rawUtterance: string, action: string): boolean {
if (!rawUtterance) {
return false;
}
if (action === "turnover") {
return /(?:\bсколько\s+(?:(?:вообще|всего|реально)\s+){0,2}(?:мы\s+)?(?:получили|получено|входящих(?:\s+денег)?(?:\s+было)?|поступлений|денег\s+пришло)\b|(?:сумма|объем)\s+(?:входящих|поступлений)|поступлений\s+за\b)/iu.test(
rawUtterance
);
}
if (action === "payout") {
return /(?:\bсколько\s+(?:(?:вообще|всего|реально)\s+){0,2}(?:мы\s+)?(?:заплатили|выплатили|потратили|исходящих(?:\s+денег)?(?:\s+было)?|платежей(?:\s+было)?|списаний(?:\s+было)?)\b|(?:сумма|объем)\s+(?:исходящих|платежей|списаний)|(?:платежей|списаний)\s+за\b)/iu.test(
rawUtterance
);
}
return false;
}
function hasOpenScopeOneSidedValueTotalHintUtf8Safe(rawUtterance: string, action: string): boolean {
if (!rawUtterance) {
return false;
}
if (action === "turnover") {
return /(?:\u0441\u043a\u043e\u043b\u044c\u043a\u043e\s+(?:(?:\u0432\u043e\u043e\u0431\u0449\u0435|\u0432\u0441\u0435\u0433\u043e|\u0440\u0435\u0430\u043b\u044c\u043d\u043e)\s+){0,2}(?:\u043c\u044b\s+)?(?:\u043f\u043e\u043b\u0443\u0447\u0438\u043b\u0438|\u043f\u043e\u043b\u0443\u0447\u0435\u043d\u043e|\u0432\u0445\u043e\u0434\u044f\u0449\u0438\u0445(?:\s+\u0434\u0435\u043d\u0435\u0433)?(?:\s+\u0431\u044b\u043b\u043e)?|\u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d\u0438\u0439|\u0434\u0435\u043d\u0435\u0433\s+\u043f\u0440\u0438\u0448\u043b\u043e)|(?:\u0441\u0443\u043c\u043c\u0430|\u043e\u0431\u044a\u0435\u043c)\s+(?:\u0432\u0445\u043e\u0434\u044f\u0449\u0438\u0445|\u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d\u0438\u0439)|\u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d\u0438\u0439\s+\u0437\u0430)/u.test(
rawUtterance
);
}
if (action === "payout") {
return /(?:\u0441\u043a\u043e\u043b\u044c\u043a\u043e\s+(?:(?:\u0432\u043e\u043e\u0431\u0449\u0435|\u0432\u0441\u0435\u0433\u043e|\u0440\u0435\u0430\u043b\u044c\u043d\u043e)\s+){0,2}(?:\u043c\u044b\s+)?(?:\u0437\u0430\u043f\u043b\u0430\u0442\u0438\u043b\u0438|\u0432\u044b\u043f\u043b\u0430\u0442\u0438\u043b\u0438|\u043f\u043e\u0442\u0440\u0430\u0442\u0438\u043b\u0438|\u0438\u0441\u0445\u043e\u0434\u044f\u0449\u0438\u0445(?:\s+\u0434\u0435\u043d\u0435\u0433)?(?:\s+\u0431\u044b\u043b\u043e)?|\u043f\u043b\u0430\u0442\u0435\u0436\u0435\u0439(?:\s+\u0431\u044b\u043b\u043e)?|\u0441\u043f\u0438\u0441\u0430\u043d\u0438\u0439(?:\s+\u0431\u044b\u043b\u043e)?)|(?:\u0441\u0443\u043c\u043c\u0430|\u043e\u0431\u044a\u0435\u043c)\s+(?:\u0438\u0441\u0445\u043e\u0434\u044f\u0449\u0438\u0445|\u043f\u043b\u0430\u0442\u0435\u0436\u0435\u0439|\u0441\u043f\u0438\u0441\u0430\u043d\u0438\u0439)|(?:\u043f\u043b\u0430\u0442\u0435\u0436\u0435\u0439|\u0441\u043f\u0438\u0441\u0430\u043d\u0438\u0439)\s+\u0437\u0430)/u.test(
rawUtterance
);
}
return false;
}
function supportsOrganizationScopedOpenTotal(action: string): boolean {
return action === "turnover" || action === "payout";
}
function allowsOpenScopeWithoutSubject(input: {
family: string | null;
action: string;
organizationScope: string | null;
comparisonNeed: string | null;
rankingNeed: string | null;
oneSidedOpenScopeTotalHint: boolean;
}): boolean {
if (input.family !== "value_flow") {
return false;
}
if (input.rankingNeed || input.comparisonNeed === "incoming_vs_outgoing") {
return true;
}
return Boolean(
supportsOrganizationScopedOpenTotal(input.action) && (input.organizationScope || input.oneSidedOpenScopeTotalHint)
);
}
function allowsMetadataScopedOpenLaneWithoutSubject(input: {
family: string | null;
subjectResolutionOptional: boolean;
}): boolean {
return Boolean(
input.subjectResolutionOptional &&
(input.family === "movement_evidence" || input.family === "document_evidence")
);
}
function rankingNeedFromRawUtterance(value: string): string | null {
const text = lower(value);
if (!text) {
return null;
}
if (/\u0438\u0441\u043a\u043b\u044e\u0447[\p{L}\p{N}_]*\s+\u0442\u043e\u043f/iu.test(text)) {
return null;
}
if (/(?:\u0431\u0435\u0437\s+\u0442\u043e\u043f(?:\u043e\u0432|\u0430)?\b|\u043d\u0435\s+\u0442\u043e\u043f\b|\u0438\u0441\u043a\u043b\u044e\u0447\w*\s+\u0442\u043e\u043f|\u0431\u0435\u0437\s+\u0440\u0435\u0439\u0442\u0438\u043d\u0433\u0430\b|без\s+СРѕРї(?:РѕРІ|Р°)?\b|РЅРµ\s+СРѕРї\b|исклюС\S*\s+СРѕРї|без\s+СЂРµРСРёРЅРіР°\b)/iu.test(text)) {
return null;
}
if (/(?:\u0442\u043e\u043f[-\s]?\d*|\u0441\u0430\u043c(?:\u044b\u0439|\u0430\u044f|\u043e\u0435|\u044b\u0435)|\u0431\u043e\u043b\u044c\u0448\u0435\s+\u0432\u0441\u0435\u0433\u043e|\u043d\u0430\u0438\u0431\u043e\u043b[\u0435\u0451]\u0435|\u043c\u0430\u043a\u0441\u0438\u043c\u0430\u043b\u044c\u043d|\u043c\u0430\u043a\u0441\u0438\u043c\u0443\u043c|\u043a\u0440\u0443\u043f\u043d\u0435\u0439\u0448|\u043b\u0443\u0447\u0448\u0438\u0439)/iu.test(text)) {
return "top_desc";
}
if (/(?:\u043c\u0435\u043d\u044c\u0448\u0435\s+\u0432\u0441\u0435\u0433\u043e|\u043d\u0430\u0438\u043c\u0435\u043d[\u044c\u0448]\u0435|\u043c\u0438\u043d\u0438\u043c\u0430\u043b\u044c\u043d|\u043c\u0438\u043d\u0438\u043c\u0443\u043c|\u0445\u0443\u0434\u0448\u0438\u0439)/iu.test(text)) {
return "bottom_asc";
}
if (/(?:\btop[-\s]?\d+\b|\btop\b|\u0442\u043e\u043f[-\s]?\d+|\u0442\u043e\u043f\b|\u0441\u0430\u043c(?:\u044b\u0439|\u0430\u044f|\u043e\u0435|\u044b\u0435)\b|\u0431\u043e\u043b\u044c\u0448\u0435\s+\u0432\u0441\u0435\u0433\u043e|\u043d\u0430\u0438\u0431\u043e\u043b[\u0435\u0451]\u0435|\u043c\u0430\u043a\u0441\u0438\u043c\u0430\u043b\u044c\u043d|\u043c\u0430\u043a\u0441\u0438\u043c\u0443\u043c|\u043a\u0440\u0443\u043f\u043d\u0435\u0439\u0448|\u043b\u0443\u0447\u0448\u0438\u0439|highest|largest|most|best)/iu.test(text)) {
return "top_desc";
}
if (/(?:\u043c\u0435\u043d\u044c\u0448\u0435\s+\u0432\u0441\u0435\u0433\u043e|\u043d\u0430\u0438\u043c\u0435\u043d[\u044c\u0448]\u0435|\u043c\u0438\u043d\u0438\u043c\u0430\u043b\u044c\u043d|\u043c\u0438\u043d\u0438\u043c\u0443\u043c|\u0445\u0443\u0434\u0448\u0438\u0439|lowest|smallest|least|worst)/iu.test(text)) {
return "bottom_asc";
}
if (/(?:\btop[-\s]?\d+\b|\btop\b|топ[-\s]?\d+|топ\b|сам(?:ый|ая|ое|ые)\b|больше\s+всего|наибол[её]е|максимальн|максимум|крупнейш|лучший|highest|largest|most|best)/iu.test(text)) {
return "top_desc";
}
if (/(?:меньше\s+всего|наимен[ьш]е|минимальн|минимум|худший|lowest|smallest|least|worst)/iu.test(text)) {
return "bottom_asc";
}
if (
/(?:\btop[-\s]?\d+\b|\btop\b|топ[-\s]?\d+|топ\b|сам(?:ый|ая|ое|ые)\b|больше\s+всего|наибол[её]е|highest|largest|most)/iu.test(
text
)
) {
return "top_desc";
}
if (/(?:меньше\s+всего|наимен[ьш]е|lowest|smallest|least)/iu.test(text)) {
return "bottom_asc";
}
return null;
}
function suppressRankingNeedFromRawUtterance(value: string): boolean {
const text = lower(value);
if (!text) {
return false;
}
return /(?:\u0431\u0435\u0437\s+\u0442\u043e\u043f(?:\u043e\u0432|\u0430)?\b|\u043d\u0435\s+\u0442\u043e\u043f\b|\u043d\u0435\s+\u043e\u0431\u0437\u043e\u0440\b|\u043f\u0440\u043e\u0441\u0442\u043e\s+\u0434\u0435\u043d\p{L}+|\u0438\u0441\u043a\u043b\u044e\u0447\w*\s+\u0442\u043e\u043f|\u0431\u0435\u0437\s+\u0440\u0435\u0439\u0442\u0438\u043d\u0433\u0430\b|без\s+С‚РСР С(?:Р СР Р|Р В°)?\b|Р РР Вµ\s+С‚РСР С\b|Р РР Вµ\s+Р СР В±Р В·Р СРЎР\b|Р СРЎРР ССЃС‚РС\s+Р ТРµРР\p{L}+|Р СРЎРѓР СР»СРС‡\S*\s+С‚РСР С|без\s+РЎРРµРвС‚РСР РР СР В°\b)/iu.test(
text
);
}
function proofExpectationFor(input: {
family: string | null;
clarificationGaps: string[];
}): AssistantMcpDiscoveryDataNeedProofExpectation {
if (input.clarificationGaps.length > 0) {
return "clarification_required";
}
if (input.family === "schema_surface") {
return "schema_surface";
}
if (input.family === "entity_grounding") {
return "entity_grounding";
}
if (input.family === "activity_lifecycle") {
return "bounded_inference";
}
if (input.family === "business_overview") {
return "bounded_inference";
}
return "coverage_checked_fact";
}
function decompositionCandidatesFor(input: {
family: string | null;
action: string;
aggregationNeed: string | null;
comparisonNeed: string | null;
rankingNeed: string | null;
openScopeWithoutSubject: boolean;
metadataScopedOpenLaneWithoutSubject: boolean;
}): string[] {
const result: string[] = [];
if (input.family === "schema_surface") {
pushUnique(result, "inspect_metadata_surface");
return result;
}
if (input.family === "entity_grounding") {
pushUnique(result, "search_business_entity");
pushUnique(result, "resolve_entity_reference");
pushUnique(result, "probe_coverage");
return result;
}
if (input.family === "value_flow") {
if (input.rankingNeed && input.openScopeWithoutSubject) {
pushUnique(result, "collect_scoped_movements");
pushUnique(result, "aggregate_ranked_axis_values");
pushUnique(result, "probe_coverage");
return result;
}
if (input.comparisonNeed === "incoming_vs_outgoing" && input.openScopeWithoutSubject) {
pushUnique(result, "collect_incoming_movements");
pushUnique(result, "collect_outgoing_movements");
if (input.aggregationNeed === "by_month") {
pushUnique(result, "aggregate_by_month");
}
pushUnique(result, "probe_coverage");
return result;
}
if (input.openScopeWithoutSubject) {
pushUnique(result, "collect_scoped_movements");
pushUnique(result, input.aggregationNeed === "by_month" ? "aggregate_by_month" : "aggregate_checked_amounts");
pushUnique(result, "probe_coverage");
return result;
}
pushUnique(result, "resolve_entity_reference");
if (input.action === "net_value_flow") {
pushUnique(result, "collect_incoming_movements");
pushUnique(result, "collect_outgoing_movements");
} else {
pushUnique(result, "collect_scoped_movements");
}
pushUnique(result, input.aggregationNeed === "by_month" ? "aggregate_by_month" : "aggregate_checked_amounts");
pushUnique(result, "probe_coverage");
return result;
}
if (input.family === "movement_evidence") {
if (!input.metadataScopedOpenLaneWithoutSubject) {
pushUnique(result, "resolve_entity_reference");
}
pushUnique(result, "fetch_scoped_movements");
pushUnique(result, "probe_coverage");
return result;
}
if (input.family === "document_evidence") {
if (!input.metadataScopedOpenLaneWithoutSubject) {
pushUnique(result, "resolve_entity_reference");
}
pushUnique(result, "fetch_scoped_documents");
pushUnique(result, "probe_coverage");
return result;
}
if (input.family === "activity_lifecycle") {
pushUnique(result, "resolve_entity_reference");
pushUnique(result, "fetch_supporting_documents");
pushUnique(result, "probe_coverage");
pushUnique(result, "explain_evidence_basis");
return result;
}
if (input.family === "business_overview") {
pushUnique(result, "collect_scoped_movements");
pushUnique(result, "aggregate_checked_amounts");
pushUnique(result, "aggregate_ranked_axis_values");
pushUnique(result, "fetch_supporting_documents");
pushUnique(result, "probe_coverage");
pushUnique(result, "explain_evidence_basis");
return result;
}
if (input.family === "inventory_stock_snapshot") {
pushUnique(result, "fetch_scoped_movements");
pushUnique(result, "aggregate_checked_amounts");
pushUnique(result, "probe_coverage");
pushUnique(result, "explain_evidence_basis");
return result;
}
if (input.family === "inventory_supplier_overlap") {
pushUnique(result, "resolve_entity_reference");
pushUnique(result, "fetch_scoped_movements");
pushUnique(result, "fetch_scoped_documents");
pushUnique(result, "aggregate_checked_amounts");
pushUnique(result, "probe_coverage");
pushUnique(result, "explain_evidence_basis");
return result;
}
if (input.family === "inventory_purchase_provenance" || input.family === "inventory_sale_trace") {
pushUnique(result, "resolve_entity_reference");
pushUnique(result, "fetch_scoped_documents");
pushUnique(result, "drilldown_related_objects");
pushUnique(result, "probe_coverage");
pushUnique(result, "explain_evidence_basis");
}
return result;
}
function forbiddenOverclaimFlagsFor(family: string | null): string[] {
const result: string[] = ["no_raw_model_claims"];
if (family === "schema_surface") {
pushUnique(result, "no_fake_schema_surface");
}
if (family === "entity_grounding") {
pushUnique(result, "no_unresolved_entity_claim");
}
if (family === "activity_lifecycle") {
pushUnique(result, "no_legal_age_claim_without_evidence");
}
if (family === "business_overview") {
pushUnique(result, "no_unchecked_fact_totals");
pushUnique(result, "no_unchecked_business_health_claim");
pushUnique(result, "no_profit_or_margin_claim_without_evidence");
}
if (family === "value_flow" || family === "movement_evidence" || family === "document_evidence") {
pushUnique(result, "no_unchecked_fact_totals");
}
if (family === "inventory_stock_snapshot") {
pushUnique(result, "no_unchecked_stock_snapshot");
}
if (family === "inventory_purchase_provenance" || family === "inventory_supplier_overlap") {
pushUnique(result, "no_unproven_supplier_attribution");
}
if (family === "inventory_sale_trace") {
pushUnique(result, "no_unproven_buyer_or_sale_trace");
}
return result;
}
export function buildAssistantMcpDiscoveryDataNeedGraph(
input: BuildAssistantMcpDiscoveryDataNeedGraphInput
): AssistantMcpDiscoveryDataNeedGraphContract {
const semanticDataNeed = lower(input.semanticDataNeed);
const turnMeaning = input.turnMeaning ?? null;
const domain = lower(turnMeaning?.asked_domain_family);
const action = lower(turnMeaning?.asked_action_family);
const unsupported = lower(turnMeaning?.unsupported_but_understood_family);
const rawUtterance = lower(input.rawUtterance);
const rawQuestionSignal = lower([input.rawUtterance, turnMeaning?.raw_message, turnMeaning?.effective_message].join(" "));
const aggregationAxis = lower(turnMeaning?.asked_aggregation_axis);
const seededRankingNeed = toNonEmptyString(turnMeaning?.seeded_ranking_need);
const explicitDateScope = toNonEmptyString(turnMeaning?.explicit_date_scope);
const explicitOrganizationScope = toNonEmptyString(turnMeaning?.explicit_organization_scope);
const metadataScopeHint = toNonEmptyString(turnMeaning?.metadata_scope_hint);
const subjectResolutionOptional = turnMeaning?.subject_resolution_optional === true;
const subjectCandidates = (turnMeaning?.explicit_entity_candidates ?? [])
.map((item) => toNonEmptyString(item))
.filter((item): item is string => Boolean(item));
const businessFactFamily = businessFactFamilyFor({
semanticDataNeed,
domain,
action,
unsupported
});
const aggregationNeed = aggregationNeedFor(aggregationAxis);
const comparisonNeed = comparisonNeedFor(action);
const allTimeScopeHint = hasAllTimeScopeHint(rawUtterance);
const subjectScopedBidirectionalAllTime =
businessFactFamily === "value_flow" &&
comparisonNeed === "incoming_vs_outgoing" &&
subjectCandidates.length > 0 &&
!explicitDateScope;
const suppressRankingNeed =
suppressRankingNeedFromRawUtterance(rawQuestionSignal) ||
/(?:\u0431\u0435\u0437\s+\u0442\u043e\u043f(?:\u043e\u0432|\u0430)?|\u043d\u0435\s+\u0442\u043e\u043f|\u043d\u0435\s+\u043e\u0431\u0437\u043e\u0440|\u043f\u0440\u043e\u0441\u0442\u043e\s+\u0434\u0435\u043d\p{L}+|\u0438\u0441\u043a\u043b\u044e\u0447\w*\s+\u0442\u043e\u043f|\u0431\u0435\u0437\s+\u0440\u0435\u0439\u0442\u0438\u043d\u0433\u0430)/iu.test(
rawQuestionSignal
);
const rawRankingNeed = rankingNeedFromRawUtterance(rawQuestionSignal);
const rankingNeed = suppressRankingNeed ? null : rawRankingNeed ?? seededRankingNeed;
const directBusinessOverviewMoneyAnswerHint = hasBusinessOverviewDirectMoneyAnswerHint({
family: businessFactFamily,
rawUtterance: rawQuestionSignal,
rankingNeed
});
const oneSidedOpenScopeTotalHint = hasOpenScopeOneSidedValueTotalHintUtf8Safe(rawUtterance, action);
const openScopeWithoutSubject =
subjectCandidates.length === 0 &&
allowsOpenScopeWithoutSubject({
family: businessFactFamily,
action,
organizationScope: explicitOrganizationScope,
comparisonNeed,
rankingNeed,
oneSidedOpenScopeTotalHint
});
const metadataScopedOpenLaneWithoutSubject =
subjectCandidates.length === 0 &&
allowsMetadataScopedOpenLaneWithoutSubject({
family: businessFactFamily,
subjectResolutionOptional
});
const inventoryStockSnapshotWithoutSubject =
subjectCandidates.length === 0 &&
businessFactFamily === "inventory_stock_snapshot";
const clarificationGaps: string[] = [];
if (unsupported === "metadata_lane_choice_clarification" || action === "resolve_next_lane") {
pushUnique(clarificationGaps, "lane_family_choice");
}
if (
subjectCandidates.length === 0 &&
businessFactFamily === "value_flow" &&
openScopeWithoutSubject &&
!explicitOrganizationScope
) {
pushUnique(clarificationGaps, "organization");
} else if (
subjectCandidates.length === 0 &&
metadataScopedOpenLaneWithoutSubject &&
!explicitOrganizationScope
) {
pushUnique(clarificationGaps, "organization");
} else if (
subjectCandidates.length === 0 &&
businessFactFamily === "business_overview" &&
!explicitOrganizationScope
) {
pushUnique(clarificationGaps, "organization");
} else if (
subjectCandidates.length === 0 &&
businessFactFamily !== "schema_surface" &&
businessFactFamily !== "business_overview" &&
!openScopeWithoutSubject &&
!metadataScopedOpenLaneWithoutSubject &&
!inventoryStockSnapshotWithoutSubject
) {
pushUnique(clarificationGaps, "subject");
}
const timeScopeNeed = timeScopeNeedFor({
family: businessFactFamily,
explicitDateScope,
allTimeScopeHint,
subjectScopedBidirectionalAllTime
});
if (timeScopeNeed === "period_required" && !explicitDateScope) {
pushUnique(clarificationGaps, "period");
}
if (timeScopeNeed === "as_of_date_required" && !explicitDateScope) {
pushUnique(clarificationGaps, "as_of_date");
}
const decompositionCandidates = decompositionCandidatesFor({
family: businessFactFamily,
action,
aggregationNeed,
comparisonNeed,
rankingNeed,
openScopeWithoutSubject,
metadataScopedOpenLaneWithoutSubject
});
const reasonCodes: string[] = [];
pushReason(reasonCodes, "data_need_graph_built");
if (businessFactFamily) {
pushReason(reasonCodes, `data_need_graph_family_${businessFactFamily}`);
} else {
pushReason(reasonCodes, "data_need_graph_family_unknown");
}
if (aggregationNeed) {
pushReason(reasonCodes, `data_need_graph_aggregation_${aggregationNeed}`);
}
if (rankingNeed) {
pushReason(reasonCodes, `data_need_graph_ranking_${rankingNeed}`);
}
if (comparisonNeed) {
pushReason(reasonCodes, `data_need_graph_comparison_${comparisonNeed}`);
}
if (openScopeWithoutSubject && !rankingNeed && !comparisonNeed) {
pushReason(reasonCodes, "data_need_graph_open_scope_total_without_subject");
}
if (metadataScopedOpenLaneWithoutSubject) {
pushReason(reasonCodes, "data_need_graph_metadata_scoped_open_lane_without_subject");
}
if (allTimeScopeHint) {
pushReason(reasonCodes, "data_need_graph_all_time_scope_hint");
}
if (subjectScopedBidirectionalAllTime) {
pushReason(reasonCodes, "data_need_graph_subject_bidirectional_value_flow_defaults_to_all_time_scope");
}
if (businessFactFamily === "business_overview" && !explicitDateScope) {
pushReason(reasonCodes, "data_need_graph_business_overview_defaults_to_all_time_scope");
}
if (directBusinessOverviewMoneyAnswerHint) {
pushReason(reasonCodes, "data_need_graph_business_overview_direct_money_answer");
}
if (clarificationGaps.includes("organization")) {
pushReason(reasonCodes, "data_need_graph_open_scope_total_needs_organization");
}
if (clarificationGaps.length > 0) {
pushReason(reasonCodes, "data_need_graph_has_clarification_gaps");
}
return {
schema_version: ASSISTANT_MCP_DISCOVERY_DATA_NEED_GRAPH_SCHEMA_VERSION,
policy_owner: "assistantMcpDiscoveryDataNeedGraph",
subject_candidates: subjectCandidates,
metadata_scope_hint: metadataScopeHint,
subject_resolution_optional: subjectResolutionOptional || undefined,
business_fact_family: businessFactFamily,
action_family: toNonEmptyString(turnMeaning?.asked_action_family),
aggregation_need: aggregationNeed,
time_scope_need: timeScopeNeed,
comparison_need: comparisonNeed,
ranking_need: rankingNeed,
proof_expectation: proofExpectationFor({
family: businessFactFamily,
clarificationGaps
}),
clarification_gaps: clarificationGaps,
decomposition_candidates: decompositionCandidates,
forbidden_overclaim_flags: forbiddenOverclaimFlagsFor(businessFactFamily),
reason_codes: reasonCodes
};
}