NODEDC_1C/llm_normalizer/backend/src/services/assistantMcpDiscoveryTurnIn...

279 lines
9.8 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import type { AssistantMcpDiscoveryTurnMeaningRef } from "./assistantMcpDiscoveryPolicy";
export const ASSISTANT_MCP_DISCOVERY_TURN_INPUT_SCHEMA_VERSION =
"assistant_mcp_discovery_turn_input_v1" as const;
export type AssistantMcpDiscoveryTurnInputStatus = "ready" | "needs_more_context" | "not_applicable";
export type AssistantMcpDiscoveryTurnInputSource =
| "assistant_turn_meaning"
| "predecompose_contract"
| "raw_text"
| "none";
export interface BuildAssistantMcpDiscoveryTurnInputAdapterInput {
assistantTurnMeaning?: Record<string, unknown> | null;
predecomposeContract?: Record<string, unknown> | null;
userMessage?: string | null;
effectiveMessage?: string | null;
}
export interface AssistantMcpDiscoveryTurnInputContract {
schema_version: typeof ASSISTANT_MCP_DISCOVERY_TURN_INPUT_SCHEMA_VERSION;
policy_owner: "assistantMcpDiscoveryTurnInputAdapter";
adapter_status: AssistantMcpDiscoveryTurnInputStatus;
should_run_discovery: boolean;
semantic_data_need: string | null;
turn_meaning_ref: AssistantMcpDiscoveryTurnMeaningRef | null;
source_signal: AssistantMcpDiscoveryTurnInputSource;
reason_codes: string[];
}
function toRecordObject(value: unknown): Record<string, unknown> | null {
if (!value || typeof value !== "object" || Array.isArray(value)) {
return null;
}
return value as Record<string, unknown>;
}
function toNonEmptyString(value: unknown): string | null {
if (value === null || value === undefined) {
return null;
}
const text = String(value).trim();
return text.length > 0 ? text : null;
}
function normalizeReasonCode(value: string): string | null {
const normalized = value
.trim()
.replace(/[^\p{L}\p{N}_.:-]+/gu, "_")
.replace(/^_+|_+$/g, "")
.toLowerCase();
return normalized.length > 0 ? normalized.slice(0, 120) : null;
}
function pushReason(target: string[], value: string): void {
const normalized = normalizeReasonCode(value);
if (normalized && !target.includes(normalized)) {
target.push(normalized);
}
}
function pushUnique(target: string[], value: unknown): void {
const text = toNonEmptyString(value);
if (text && !target.includes(text)) {
target.push(text);
}
}
function compactLower(value: unknown): string {
return String(value ?? "")
.toLowerCase()
.replace(/\s+/g, " ")
.trim();
}
function candidateValue(value: unknown): string | null {
const direct = toNonEmptyString(value);
if (direct && direct !== "[object Object]") {
return direct;
}
const record = toRecordObject(value);
if (!record) {
return null;
}
return (
toNonEmptyString(record.value) ??
toNonEmptyString(record.name) ??
toNonEmptyString(record.ref) ??
toNonEmptyString(record.text)
);
}
function collectEntityCandidates(value: unknown): string[] {
const result: string[] = [];
if (Array.isArray(value)) {
for (const item of value) {
pushUnique(result, candidateValue(item));
}
return result;
}
pushUnique(result, candidateValue(value));
return result;
}
function collectPredecomposeEntities(predecompose: Record<string, unknown> | null): {
counterparty: string | null;
organization: string | null;
} {
const entities = toRecordObject(predecompose?.entities);
return {
counterparty: toNonEmptyString(entities?.counterparty),
organization: toNonEmptyString(entities?.organization)
};
}
function collectDateScope(predecompose: Record<string, unknown> | null): string | null {
const period = toRecordObject(predecompose?.period);
const asOfDate = toNonEmptyString(period?.as_of_date);
const periodFrom = toNonEmptyString(period?.period_from);
const periodTo = toNonEmptyString(period?.period_to);
if (asOfDate) {
return asOfDate;
}
const yearFrom = periodFrom?.match(/^(\d{4})-01-01$/);
const yearTo = periodTo?.match(/^(\d{4})-12-31$/);
if (yearFrom && yearTo && yearFrom[1] === yearTo[1]) {
return yearFrom[1];
}
if (periodFrom && periodTo) {
return `${periodFrom}..${periodTo}`;
}
return periodFrom ?? periodTo ?? null;
}
function hasLifecycleSignal(text: string): boolean {
return /(?:сколько\s+лет|как\s+давно|давно\s+ли|возраст|перв(?:ая|ый)\s+актив|когда\s+начал|когда\s+появ|lifecycle|activity\s+duration|business\s+age|how\s+long)/iu.test(
text
);
}
function semanticNeedFor(input: {
domain: string | null;
action: string | null;
unsupported: string | null;
lifecycleSignal: boolean;
}): string | null {
const combined = compactLower(`${input.domain ?? ""} ${input.action ?? ""} ${input.unsupported ?? ""}`);
if (input.lifecycleSignal || /(?:lifecycle|activity|duration|age)/iu.test(combined)) {
return "counterparty lifecycle evidence";
}
if (/(?:turnover|revenue|payment|payout|value)/iu.test(combined)) {
return "counterparty value-flow evidence";
}
if (/(?:document|documents|list_documents)/iu.test(combined)) {
return "document evidence";
}
if (/(?:metadata|schema|catalog)/iu.test(combined)) {
return "1C metadata evidence";
}
return null;
}
function shouldRunDiscovery(input: {
unsupported: string | null;
lifecycleSignal: boolean;
semanticDataNeed: string | null;
explicitIntentCandidate: string | null;
}): boolean {
if (input.lifecycleSignal || input.unsupported) {
return true;
}
if (!input.explicitIntentCandidate && input.semanticDataNeed) {
return true;
}
return false;
}
export function buildAssistantMcpDiscoveryTurnInput(
input: BuildAssistantMcpDiscoveryTurnInputAdapterInput
): AssistantMcpDiscoveryTurnInputContract {
const assistantTurnMeaning = toRecordObject(input.assistantTurnMeaning);
const predecomposeContract = toRecordObject(input.predecomposeContract);
const predecomposeEntities = collectPredecomposeEntities(predecomposeContract);
const reasonCodes: string[] = [];
const rawText = compactLower(`${input.userMessage ?? ""} ${input.effectiveMessage ?? ""}`);
const lifecycleSignal = hasLifecycleSignal(rawText);
const rawDomain = toNonEmptyString(assistantTurnMeaning?.asked_domain_family);
const rawAction = toNonEmptyString(assistantTurnMeaning?.asked_action_family);
const unsupported = toNonEmptyString(assistantTurnMeaning?.unsupported_but_understood_family);
const explicitIntentCandidate = toNonEmptyString(assistantTurnMeaning?.explicit_intent_candidate);
const semanticDataNeed = semanticNeedFor({
domain: rawDomain,
action: rawAction,
unsupported,
lifecycleSignal
});
const entityCandidates = collectEntityCandidates(assistantTurnMeaning?.explicit_entity_candidates);
pushUnique(entityCandidates, predecomposeEntities.counterparty);
const turnMeaning: AssistantMcpDiscoveryTurnMeaningRef = {
asked_domain_family: lifecycleSignal ? "counterparty_lifecycle" : rawDomain,
asked_action_family: lifecycleSignal ? "activity_duration" : rawAction,
explicit_entity_candidates: entityCandidates,
explicit_organization_scope: predecomposeEntities.organization,
explicit_date_scope: collectDateScope(predecomposeContract),
unsupported_but_understood_family: unsupported ?? (lifecycleSignal ? "counterparty_lifecycle" : null),
stale_replay_forbidden: Boolean(assistantTurnMeaning?.stale_replay_forbidden || unsupported || lifecycleSignal)
};
const cleanTurnMeaning: AssistantMcpDiscoveryTurnMeaningRef = {};
if (toNonEmptyString(turnMeaning.asked_domain_family)) {
cleanTurnMeaning.asked_domain_family = turnMeaning.asked_domain_family;
}
if (toNonEmptyString(turnMeaning.asked_action_family)) {
cleanTurnMeaning.asked_action_family = turnMeaning.asked_action_family;
}
if ((turnMeaning.explicit_entity_candidates?.length ?? 0) > 0) {
cleanTurnMeaning.explicit_entity_candidates = turnMeaning.explicit_entity_candidates;
}
if (toNonEmptyString(turnMeaning.explicit_organization_scope)) {
cleanTurnMeaning.explicit_organization_scope = turnMeaning.explicit_organization_scope;
}
if (toNonEmptyString(turnMeaning.explicit_date_scope)) {
cleanTurnMeaning.explicit_date_scope = turnMeaning.explicit_date_scope;
}
if (toNonEmptyString(turnMeaning.unsupported_but_understood_family)) {
cleanTurnMeaning.unsupported_but_understood_family = turnMeaning.unsupported_but_understood_family;
}
if (turnMeaning.stale_replay_forbidden) {
cleanTurnMeaning.stale_replay_forbidden = true;
}
const runDiscovery = shouldRunDiscovery({
unsupported,
lifecycleSignal,
semanticDataNeed,
explicitIntentCandidate
});
const hasTurnMeaning = Object.keys(cleanTurnMeaning).length > 0;
const sourceSignal: AssistantMcpDiscoveryTurnInputSource = assistantTurnMeaning
? "assistant_turn_meaning"
: predecomposeContract
? "predecompose_contract"
: lifecycleSignal
? "raw_text"
: "none";
if (lifecycleSignal) {
pushReason(reasonCodes, "mcp_discovery_lifecycle_signal_detected");
}
if (unsupported) {
pushReason(reasonCodes, "mcp_discovery_unsupported_but_understood_turn");
}
if (predecomposeEntities.counterparty) {
pushReason(reasonCodes, "mcp_discovery_counterparty_from_predecompose");
}
if (entityCandidates.length > 0) {
pushReason(reasonCodes, "mcp_discovery_entity_scope_available");
}
if (!runDiscovery) {
pushReason(reasonCodes, "mcp_discovery_not_applicable_for_supported_exact_turn");
}
if (runDiscovery && !hasTurnMeaning) {
pushReason(reasonCodes, "mcp_discovery_turn_meaning_missing");
}
return {
schema_version: ASSISTANT_MCP_DISCOVERY_TURN_INPUT_SCHEMA_VERSION,
policy_owner: "assistantMcpDiscoveryTurnInputAdapter",
adapter_status: !runDiscovery ? "not_applicable" : hasTurnMeaning ? "ready" : "needs_more_context",
should_run_discovery: runDiscovery,
semantic_data_need: runDiscovery ? semanticDataNeed : null,
turn_meaning_ref: runDiscovery && hasTurnMeaning ? cleanTurnMeaning : null,
source_signal: sourceSignal,
reason_codes: reasonCodes
};
}