NODEDC_1C/llm_normalizer/backend/src/services/assistantMcpDiscoveryDataNe...

451 lines
17 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import type { AssistantMcpDiscoveryTurnMeaningRef } from "./assistantMcpDiscoveryPolicy";
export const ASSISTANT_MCP_DISCOVERY_DATA_NEED_GRAPH_SCHEMA_VERSION =
"assistant_data_need_graph_v1" as const;
export type AssistantMcpDiscoveryDataNeedProofExpectation =
| "schema_surface"
| "entity_grounding"
| "coverage_checked_fact"
| "bounded_inference"
| "clarification_required";
export interface AssistantMcpDiscoveryDataNeedGraphContract {
schema_version: typeof ASSISTANT_MCP_DISCOVERY_DATA_NEED_GRAPH_SCHEMA_VERSION;
policy_owner: "assistantMcpDiscoveryDataNeedGraph";
subject_candidates: string[];
business_fact_family: string | null;
action_family: string | null;
aggregation_need: string | null;
time_scope_need: string | null;
comparison_need: string | null;
ranking_need: string | null;
proof_expectation: AssistantMcpDiscoveryDataNeedProofExpectation;
clarification_gaps: string[];
decomposition_candidates: string[];
forbidden_overclaim_flags: string[];
reason_codes: string[];
}
export interface BuildAssistantMcpDiscoveryDataNeedGraphInput {
semanticDataNeed?: string | null;
rawUtterance?: string | null;
turnMeaning?: AssistantMcpDiscoveryTurnMeaningRef | null;
}
function toNonEmptyString(value: unknown): string | null {
if (value === null || value === undefined) {
return null;
}
const text = String(value).trim();
return text.length > 0 ? text : null;
}
function lower(value: unknown): string {
return String(value ?? "").trim().toLowerCase();
}
function normalizeReasonCode(value: string): string | null {
const normalized = value
.trim()
.replace(/[^\p{L}\p{N}_.:-]+/gu, "_")
.replace(/^_+|_+$/g, "")
.toLowerCase();
return normalized.length > 0 ? normalized.slice(0, 120) : null;
}
function pushReason(target: string[], value: string): void {
const normalized = normalizeReasonCode(value);
if (normalized && !target.includes(normalized)) {
target.push(normalized);
}
}
function pushUnique(target: string[], value: string | null | undefined): void {
const text = toNonEmptyString(value);
if (text && !target.includes(text)) {
target.push(text);
}
}
function businessFactFamilyFor(input: {
semanticDataNeed: string;
domain: string;
action: string;
unsupported: string;
}): string | null {
const combined = `${input.semanticDataNeed} ${input.domain} ${input.action} ${input.unsupported}`.trim();
if (combined.includes("metadata lane clarification")) {
return "schema_surface";
}
if (combined.includes("metadata")) {
return "schema_surface";
}
if (combined.includes("entity discovery") || combined.includes("entity_resolution")) {
return "entity_grounding";
}
if (combined.includes("lifecycle") || combined.includes("activity")) {
return "activity_lifecycle";
}
if (combined.includes("movement")) {
return "movement_evidence";
}
if (combined.includes("document")) {
return "document_evidence";
}
if (combined.includes("value-flow") || combined.includes("turnover") || combined.includes("payout") || combined.includes("net")) {
return "value_flow";
}
return null;
}
function aggregationNeedFor(axis: string): string | null {
if (!axis) {
return null;
}
if (axis === "month") {
return "by_month";
}
return `by_${axis}`;
}
function hasAllTimeScopeHint(rawUtterance: string): boolean {
if (!rawUtterance) {
return false;
}
return /(?:\u0437\u0430\s+\u0432\u0441[\u0435\u0451]\s+\u0432\u0440\u0435\u043c\u044f|\u0437\u0430\s+\u0432\u0435\u0441\u044c\s+\u043f\u0435\u0440\u0438\u043e\u0434|\u0437\u0430\s+\u0432\u0441\u044e\s+\u0438\u0441\u0442\u043e\u0440\u0438(?:\u044e|\u0438)|\u0437\u0430\s+\u043b\u044e\u0431\u043e\u0439\s+\u043f\u0435\u0440\u0438\u043e\u0434|for\s+all\s+time|all\s+time|entire\s+period|full\s+history|any\s+period)/iu.test(
rawUtterance
);
}
function timeScopeNeedFor(input: {
family: string | null;
explicitDateScope: string | null;
allTimeScopeHint: boolean;
}): string | null {
if (input.explicitDateScope) {
return "explicit_period";
}
if (
input.allTimeScopeHint &&
(input.family === "value_flow" || input.family === "movement_evidence" || input.family === "document_evidence")
) {
return "all_time_scope";
}
if (input.family === "value_flow" || input.family === "movement_evidence" || input.family === "document_evidence") {
return "period_required";
}
if (input.family === "activity_lifecycle") {
return "open_activity_window";
}
return null;
}
function comparisonNeedFor(action: string): string | null {
if (action === "net_value_flow") {
return "incoming_vs_outgoing";
}
return null;
}
function hasOpenScopeOneSidedValueTotalHint(rawUtterance: string, action: string): boolean {
if (!rawUtterance) {
return false;
}
if (action === "turnover") {
return /(?:\bсколько\s+(?:(?:вообще|всего|реально)\s+){0,2}(?:мы\s+)?(?:получили|получено|входящих(?:\s+денег)?(?:\s+было)?|поступлений|денег\s+пришло)\b|(?:сумма|объем)\s+(?:входящих|поступлений)|поступлений\s+за\b)/iu.test(
rawUtterance
);
}
if (action === "payout") {
return /(?:\bсколько\s+(?:(?:вообще|всего|реально)\s+){0,2}(?:мы\s+)?(?:заплатили|выплатили|потратили|исходящих(?:\s+денег)?(?:\s+было)?|платежей(?:\s+было)?|списаний(?:\s+было)?)\b|(?:сумма|объем)\s+(?:исходящих|платежей|списаний)|(?:платежей|списаний)\s+за\b)/iu.test(
rawUtterance
);
}
return false;
}
function hasOpenScopeOneSidedValueTotalHintUtf8Safe(rawUtterance: string, action: string): boolean {
if (!rawUtterance) {
return false;
}
if (action === "turnover") {
return /(?:\u0441\u043a\u043e\u043b\u044c\u043a\u043e\s+(?:(?:\u0432\u043e\u043e\u0431\u0449\u0435|\u0432\u0441\u0435\u0433\u043e|\u0440\u0435\u0430\u043b\u044c\u043d\u043e)\s+){0,2}(?:\u043c\u044b\s+)?(?:\u043f\u043e\u043b\u0443\u0447\u0438\u043b\u0438|\u043f\u043e\u043b\u0443\u0447\u0435\u043d\u043e|\u0432\u0445\u043e\u0434\u044f\u0449\u0438\u0445(?:\s+\u0434\u0435\u043d\u0435\u0433)?(?:\s+\u0431\u044b\u043b\u043e)?|\u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d\u0438\u0439|\u0434\u0435\u043d\u0435\u0433\s+\u043f\u0440\u0438\u0448\u043b\u043e)|(?:\u0441\u0443\u043c\u043c\u0430|\u043e\u0431\u044a\u0435\u043c)\s+(?:\u0432\u0445\u043e\u0434\u044f\u0449\u0438\u0445|\u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d\u0438\u0439)|\u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d\u0438\u0439\s+\u0437\u0430)/u.test(
rawUtterance
);
}
if (action === "payout") {
return /(?:\u0441\u043a\u043e\u043b\u044c\u043a\u043e\s+(?:(?:\u0432\u043e\u043e\u0431\u0449\u0435|\u0432\u0441\u0435\u0433\u043e|\u0440\u0435\u0430\u043b\u044c\u043d\u043e)\s+){0,2}(?:\u043c\u044b\s+)?(?:\u0437\u0430\u043f\u043b\u0430\u0442\u0438\u043b\u0438|\u0432\u044b\u043f\u043b\u0430\u0442\u0438\u043b\u0438|\u043f\u043e\u0442\u0440\u0430\u0442\u0438\u043b\u0438|\u0438\u0441\u0445\u043e\u0434\u044f\u0449\u0438\u0445(?:\s+\u0434\u0435\u043d\u0435\u0433)?(?:\s+\u0431\u044b\u043b\u043e)?|\u043f\u043b\u0430\u0442\u0435\u0436\u0435\u0439(?:\s+\u0431\u044b\u043b\u043e)?|\u0441\u043f\u0438\u0441\u0430\u043d\u0438\u0439(?:\s+\u0431\u044b\u043b\u043e)?)|(?:\u0441\u0443\u043c\u043c\u0430|\u043e\u0431\u044a\u0435\u043c)\s+(?:\u0438\u0441\u0445\u043e\u0434\u044f\u0449\u0438\u0445|\u043f\u043b\u0430\u0442\u0435\u0436\u0435\u0439|\u0441\u043f\u0438\u0441\u0430\u043d\u0438\u0439)|(?:\u043f\u043b\u0430\u0442\u0435\u0436\u0435\u0439|\u0441\u043f\u0438\u0441\u0430\u043d\u0438\u0439)\s+\u0437\u0430)/u.test(
rawUtterance
);
}
return false;
}
function supportsOrganizationScopedOpenTotal(action: string): boolean {
return action === "turnover" || action === "payout";
}
function allowsOpenScopeWithoutSubject(input: {
family: string | null;
action: string;
organizationScope: string | null;
comparisonNeed: string | null;
rankingNeed: string | null;
oneSidedOpenScopeTotalHint: boolean;
}): boolean {
if (input.family !== "value_flow") {
return false;
}
if (input.rankingNeed || input.comparisonNeed === "incoming_vs_outgoing") {
return true;
}
return Boolean(
supportsOrganizationScopedOpenTotal(input.action) && (input.organizationScope || input.oneSidedOpenScopeTotalHint)
);
}
function rankingNeedFromRawUtterance(value: string): string | null {
const text = lower(value);
if (!text) {
return null;
}
if (
/(?:\btop[-\s]?\d+\b|\btop\b|топ[-\s]?\d+|топ\b|сам(?:ый|ая|ое|ые)\b|больше\s+всего|наибол[её]е|highest|largest|most)/iu.test(
text
)
) {
return "top_desc";
}
if (/(?:меньше\s+всего|наимен[ьш]е|lowest|smallest|least)/iu.test(text)) {
return "bottom_asc";
}
return null;
}
function proofExpectationFor(input: {
family: string | null;
clarificationGaps: string[];
}): AssistantMcpDiscoveryDataNeedProofExpectation {
if (input.clarificationGaps.length > 0) {
return "clarification_required";
}
if (input.family === "schema_surface") {
return "schema_surface";
}
if (input.family === "entity_grounding") {
return "entity_grounding";
}
if (input.family === "activity_lifecycle") {
return "bounded_inference";
}
return "coverage_checked_fact";
}
function decompositionCandidatesFor(input: {
family: string | null;
action: string;
aggregationNeed: string | null;
comparisonNeed: string | null;
rankingNeed: string | null;
openScopeWithoutSubject: boolean;
}): string[] {
const result: string[] = [];
if (input.family === "schema_surface") {
pushUnique(result, "inspect_metadata_surface");
return result;
}
if (input.family === "entity_grounding") {
pushUnique(result, "search_business_entity");
pushUnique(result, "resolve_entity_reference");
pushUnique(result, "probe_coverage");
return result;
}
if (input.family === "value_flow") {
if (input.rankingNeed && input.openScopeWithoutSubject) {
pushUnique(result, "collect_scoped_movements");
pushUnique(result, "aggregate_ranked_axis_values");
pushUnique(result, "probe_coverage");
return result;
}
if (input.comparisonNeed === "incoming_vs_outgoing" && input.openScopeWithoutSubject) {
pushUnique(result, "collect_incoming_movements");
pushUnique(result, "collect_outgoing_movements");
if (input.aggregationNeed === "by_month") {
pushUnique(result, "aggregate_by_month");
}
pushUnique(result, "probe_coverage");
return result;
}
if (input.openScopeWithoutSubject) {
pushUnique(result, "collect_scoped_movements");
pushUnique(result, input.aggregationNeed === "by_month" ? "aggregate_by_month" : "aggregate_checked_amounts");
pushUnique(result, "probe_coverage");
return result;
}
pushUnique(result, "resolve_entity_reference");
if (input.action === "net_value_flow") {
pushUnique(result, "collect_incoming_movements");
pushUnique(result, "collect_outgoing_movements");
} else {
pushUnique(result, "collect_scoped_movements");
}
pushUnique(result, input.aggregationNeed === "by_month" ? "aggregate_by_month" : "aggregate_checked_amounts");
pushUnique(result, "probe_coverage");
return result;
}
if (input.family === "movement_evidence") {
pushUnique(result, "resolve_entity_reference");
pushUnique(result, "fetch_scoped_movements");
pushUnique(result, "probe_coverage");
return result;
}
if (input.family === "document_evidence") {
pushUnique(result, "resolve_entity_reference");
pushUnique(result, "fetch_scoped_documents");
pushUnique(result, "probe_coverage");
return result;
}
if (input.family === "activity_lifecycle") {
pushUnique(result, "resolve_entity_reference");
pushUnique(result, "fetch_supporting_documents");
pushUnique(result, "probe_coverage");
pushUnique(result, "explain_evidence_basis");
}
return result;
}
function forbiddenOverclaimFlagsFor(family: string | null): string[] {
const result: string[] = ["no_raw_model_claims"];
if (family === "schema_surface") {
pushUnique(result, "no_fake_schema_surface");
}
if (family === "entity_grounding") {
pushUnique(result, "no_unresolved_entity_claim");
}
if (family === "activity_lifecycle") {
pushUnique(result, "no_legal_age_claim_without_evidence");
}
if (family === "value_flow" || family === "movement_evidence" || family === "document_evidence") {
pushUnique(result, "no_unchecked_fact_totals");
}
return result;
}
export function buildAssistantMcpDiscoveryDataNeedGraph(
input: BuildAssistantMcpDiscoveryDataNeedGraphInput
): AssistantMcpDiscoveryDataNeedGraphContract {
const semanticDataNeed = lower(input.semanticDataNeed);
const turnMeaning = input.turnMeaning ?? null;
const domain = lower(turnMeaning?.asked_domain_family);
const action = lower(turnMeaning?.asked_action_family);
const unsupported = lower(turnMeaning?.unsupported_but_understood_family);
const rawUtterance = lower(input.rawUtterance);
const aggregationAxis = lower(turnMeaning?.asked_aggregation_axis);
const seededRankingNeed = toNonEmptyString(turnMeaning?.seeded_ranking_need);
const explicitDateScope = toNonEmptyString(turnMeaning?.explicit_date_scope);
const explicitOrganizationScope = toNonEmptyString(turnMeaning?.explicit_organization_scope);
const subjectCandidates = (turnMeaning?.explicit_entity_candidates ?? [])
.map((item) => toNonEmptyString(item))
.filter((item): item is string => Boolean(item));
const businessFactFamily = businessFactFamilyFor({
semanticDataNeed,
domain,
action,
unsupported
});
const aggregationNeed = aggregationNeedFor(aggregationAxis);
const comparisonNeed = comparisonNeedFor(action);
const rankingNeed = rankingNeedFromRawUtterance(rawUtterance) ?? seededRankingNeed;
const allTimeScopeHint = hasAllTimeScopeHint(rawUtterance);
const oneSidedOpenScopeTotalHint = hasOpenScopeOneSidedValueTotalHintUtf8Safe(rawUtterance, action);
const openScopeWithoutSubject =
subjectCandidates.length === 0 &&
allowsOpenScopeWithoutSubject({
family: businessFactFamily,
action,
organizationScope: explicitOrganizationScope,
comparisonNeed,
rankingNeed,
oneSidedOpenScopeTotalHint
});
const clarificationGaps: string[] = [];
if (unsupported === "metadata_lane_choice_clarification" || action === "resolve_next_lane") {
pushUnique(clarificationGaps, "lane_family_choice");
}
if (
subjectCandidates.length === 0 &&
businessFactFamily === "value_flow" &&
openScopeWithoutSubject &&
!explicitOrganizationScope
) {
pushUnique(clarificationGaps, "organization");
} else if (subjectCandidates.length === 0 && businessFactFamily !== "schema_surface" && !openScopeWithoutSubject) {
pushUnique(clarificationGaps, "subject");
}
const timeScopeNeed = timeScopeNeedFor({
family: businessFactFamily,
explicitDateScope,
allTimeScopeHint
});
if (timeScopeNeed === "period_required" && !explicitDateScope) {
pushUnique(clarificationGaps, "period");
}
const decompositionCandidates = decompositionCandidatesFor({
family: businessFactFamily,
action,
aggregationNeed,
comparisonNeed,
rankingNeed,
openScopeWithoutSubject
});
const reasonCodes: string[] = [];
pushReason(reasonCodes, "data_need_graph_built");
if (businessFactFamily) {
pushReason(reasonCodes, `data_need_graph_family_${businessFactFamily}`);
} else {
pushReason(reasonCodes, "data_need_graph_family_unknown");
}
if (aggregationNeed) {
pushReason(reasonCodes, `data_need_graph_aggregation_${aggregationNeed}`);
}
if (rankingNeed) {
pushReason(reasonCodes, `data_need_graph_ranking_${rankingNeed}`);
}
if (comparisonNeed) {
pushReason(reasonCodes, `data_need_graph_comparison_${comparisonNeed}`);
}
if (openScopeWithoutSubject && !rankingNeed && !comparisonNeed) {
pushReason(reasonCodes, "data_need_graph_open_scope_total_without_subject");
}
if (allTimeScopeHint) {
pushReason(reasonCodes, "data_need_graph_all_time_scope_hint");
}
if (clarificationGaps.includes("organization")) {
pushReason(reasonCodes, "data_need_graph_open_scope_total_needs_organization");
}
if (clarificationGaps.length > 0) {
pushReason(reasonCodes, "data_need_graph_has_clarification_gaps");
}
return {
schema_version: ASSISTANT_MCP_DISCOVERY_DATA_NEED_GRAPH_SCHEMA_VERSION,
policy_owner: "assistantMcpDiscoveryDataNeedGraph",
subject_candidates: subjectCandidates,
business_fact_family: businessFactFamily,
action_family: toNonEmptyString(turnMeaning?.asked_action_family),
aggregation_need: aggregationNeed,
time_scope_need: timeScopeNeed,
comparison_need: comparisonNeed,
ranking_need: rankingNeed,
proof_expectation: proofExpectationFor({
family: businessFactFamily,
clarificationGaps
}),
clarification_gaps: clarificationGaps,
decomposition_candidates: decompositionCandidates,
forbidden_overclaim_flags: forbiddenOverclaimFlagsFor(businessFactFamily),
reason_codes: reasonCodes
};
}