NODEDC_1C/llm_normalizer/backend/dist/services/assistantMcpDiscoveryDataNe...

385 lines
17 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.ASSISTANT_MCP_DISCOVERY_DATA_NEED_GRAPH_SCHEMA_VERSION = void 0;
exports.buildAssistantMcpDiscoveryDataNeedGraph = buildAssistantMcpDiscoveryDataNeedGraph;
exports.ASSISTANT_MCP_DISCOVERY_DATA_NEED_GRAPH_SCHEMA_VERSION = "assistant_data_need_graph_v1";
function toNonEmptyString(value) {
if (value === null || value === undefined) {
return null;
}
const text = String(value).trim();
return text.length > 0 ? text : null;
}
function lower(value) {
return String(value ?? "").trim().toLowerCase();
}
function normalizeReasonCode(value) {
const normalized = value
.trim()
.replace(/[^\p{L}\p{N}_.:-]+/gu, "_")
.replace(/^_+|_+$/g, "")
.toLowerCase();
return normalized.length > 0 ? normalized.slice(0, 120) : null;
}
function pushReason(target, value) {
const normalized = normalizeReasonCode(value);
if (normalized && !target.includes(normalized)) {
target.push(normalized);
}
}
function pushUnique(target, value) {
const text = toNonEmptyString(value);
if (text && !target.includes(text)) {
target.push(text);
}
}
function businessFactFamilyFor(input) {
const combined = `${input.semanticDataNeed} ${input.domain} ${input.action} ${input.unsupported}`.trim();
if (combined.includes("metadata lane clarification")) {
return "schema_surface";
}
if (combined.includes("metadata")) {
return "schema_surface";
}
if (combined.includes("entity discovery") || combined.includes("entity_resolution")) {
return "entity_grounding";
}
if (combined.includes("lifecycle") || combined.includes("activity")) {
return "activity_lifecycle";
}
if (combined.includes("movement")) {
return "movement_evidence";
}
if (combined.includes("document")) {
return "document_evidence";
}
if (combined.includes("value-flow") || combined.includes("turnover") || combined.includes("payout") || combined.includes("net")) {
return "value_flow";
}
return null;
}
function aggregationNeedFor(axis) {
if (!axis) {
return null;
}
if (axis === "month") {
return "by_month";
}
return `by_${axis}`;
}
function hasAllTimeScopeHint(rawUtterance) {
if (!rawUtterance) {
return false;
}
return /(?:\u0437\u0430\s+\u0432\u0441[\u0435\u0451]\s+\u0432\u0440\u0435\u043c\u044f|\u0437\u0430\s+\u0432\u0435\u0441\u044c\s+\u043f\u0435\u0440\u0438\u043e\u0434|\u0437\u0430\s+\u0432\u0441\u044e\s+\u0438\u0441\u0442\u043e\u0440\u0438(?:\u044e|\u0438)|\u0437\u0430\s+\u043b\u044e\u0431\u043e\u0439\s+\u043f\u0435\u0440\u0438\u043e\u0434|for\s+all\s+time|all\s+time|entire\s+period|full\s+history|any\s+period)/iu.test(rawUtterance);
}
function timeScopeNeedFor(input) {
if (input.explicitDateScope) {
return "explicit_period";
}
if (input.allTimeScopeHint &&
(input.family === "value_flow" || input.family === "movement_evidence" || input.family === "document_evidence")) {
return "all_time_scope";
}
if (input.family === "value_flow" || input.family === "movement_evidence" || input.family === "document_evidence") {
return "period_required";
}
if (input.family === "activity_lifecycle") {
return "open_activity_window";
}
return null;
}
function comparisonNeedFor(action) {
if (action === "net_value_flow") {
return "incoming_vs_outgoing";
}
return null;
}
function hasOpenScopeOneSidedValueTotalHint(rawUtterance, action) {
if (!rawUtterance) {
return false;
}
if (action === "turnover") {
return /(?:\bсколько\s+(?:(?:вообще|всего|реально)\s+){0,2}(?:мы\s+)?(?:получили|получено|входящих(?:\s+денег)?(?:\s+было)?|поступлений|денег\s+пришло)\b|(?:сумма|объем)\s+(?:входящих|поступлений)|поступлений\s+за\b)/iu.test(rawUtterance);
}
if (action === "payout") {
return /(?:\bсколько\s+(?:(?:вообще|всего|реально)\s+){0,2}(?:мы\s+)?(?:заплатили|выплатили|потратили|исходящих(?:\s+денег)?(?:\s+было)?|платежей(?:\s+было)?|списаний(?:\s+было)?)\b|(?:сумма|объем)\s+(?:исходящих|платежей|списаний)|(?:платежей|списаний)\s+за\b)/iu.test(rawUtterance);
}
return false;
}
function hasOpenScopeOneSidedValueTotalHintUtf8Safe(rawUtterance, action) {
if (!rawUtterance) {
return false;
}
if (action === "turnover") {
return /(?:\u0441\u043a\u043e\u043b\u044c\u043a\u043e\s+(?:(?:\u0432\u043e\u043e\u0431\u0449\u0435|\u0432\u0441\u0435\u0433\u043e|\u0440\u0435\u0430\u043b\u044c\u043d\u043e)\s+){0,2}(?:\u043c\u044b\s+)?(?:\u043f\u043e\u043b\u0443\u0447\u0438\u043b\u0438|\u043f\u043e\u043b\u0443\u0447\u0435\u043d\u043e|\u0432\u0445\u043e\u0434\u044f\u0449\u0438\u0445(?:\s+\u0434\u0435\u043d\u0435\u0433)?(?:\s+\u0431\u044b\u043b\u043e)?|\u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d\u0438\u0439|\u0434\u0435\u043d\u0435\u0433\s+\u043f\u0440\u0438\u0448\u043b\u043e)|(?:\u0441\u0443\u043c\u043c\u0430|\u043e\u0431\u044a\u0435\u043c)\s+(?:\u0432\u0445\u043e\u0434\u044f\u0449\u0438\u0445|\u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d\u0438\u0439)|\u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d\u0438\u0439\s+\u0437\u0430)/u.test(rawUtterance);
}
if (action === "payout") {
return /(?:\u0441\u043a\u043e\u043b\u044c\u043a\u043e\s+(?:(?:\u0432\u043e\u043e\u0431\u0449\u0435|\u0432\u0441\u0435\u0433\u043e|\u0440\u0435\u0430\u043b\u044c\u043d\u043e)\s+){0,2}(?:\u043c\u044b\s+)?(?:\u0437\u0430\u043f\u043b\u0430\u0442\u0438\u043b\u0438|\u0432\u044b\u043f\u043b\u0430\u0442\u0438\u043b\u0438|\u043f\u043e\u0442\u0440\u0430\u0442\u0438\u043b\u0438|\u0438\u0441\u0445\u043e\u0434\u044f\u0449\u0438\u0445(?:\s+\u0434\u0435\u043d\u0435\u0433)?(?:\s+\u0431\u044b\u043b\u043e)?|\u043f\u043b\u0430\u0442\u0435\u0436\u0435\u0439(?:\s+\u0431\u044b\u043b\u043e)?|\u0441\u043f\u0438\u0441\u0430\u043d\u0438\u0439(?:\s+\u0431\u044b\u043b\u043e)?)|(?:\u0441\u0443\u043c\u043c\u0430|\u043e\u0431\u044a\u0435\u043c)\s+(?:\u0438\u0441\u0445\u043e\u0434\u044f\u0449\u0438\u0445|\u043f\u043b\u0430\u0442\u0435\u0436\u0435\u0439|\u0441\u043f\u0438\u0441\u0430\u043d\u0438\u0439)|(?:\u043f\u043b\u0430\u0442\u0435\u0436\u0435\u0439|\u0441\u043f\u0438\u0441\u0430\u043d\u0438\u0439)\s+\u0437\u0430)/u.test(rawUtterance);
}
return false;
}
function supportsOrganizationScopedOpenTotal(action) {
return action === "turnover" || action === "payout";
}
function allowsOpenScopeWithoutSubject(input) {
if (input.family !== "value_flow") {
return false;
}
if (input.rankingNeed || input.comparisonNeed === "incoming_vs_outgoing") {
return true;
}
return Boolean(supportsOrganizationScopedOpenTotal(input.action) && (input.organizationScope || input.oneSidedOpenScopeTotalHint));
}
function allowsMetadataScopedOpenLaneWithoutSubject(input) {
return Boolean(input.subjectResolutionOptional &&
(input.family === "movement_evidence" || input.family === "document_evidence"));
}
function rankingNeedFromRawUtterance(value) {
const text = lower(value);
if (!text) {
return null;
}
if (/(?:\btop[-\s]?\d+\b|\btop\b|топ[-\s]?\d+|топ\b|сам(?:ый|ая|ое|ые)\b|больше\s+всего|наибол[её]е|highest|largest|most)/iu.test(text)) {
return "top_desc";
}
if (/(?:меньше\s+всего|наимен[ьш]е|lowest|smallest|least)/iu.test(text)) {
return "bottom_asc";
}
return null;
}
function proofExpectationFor(input) {
if (input.clarificationGaps.length > 0) {
return "clarification_required";
}
if (input.family === "schema_surface") {
return "schema_surface";
}
if (input.family === "entity_grounding") {
return "entity_grounding";
}
if (input.family === "activity_lifecycle") {
return "bounded_inference";
}
return "coverage_checked_fact";
}
function decompositionCandidatesFor(input) {
const result = [];
if (input.family === "schema_surface") {
pushUnique(result, "inspect_metadata_surface");
return result;
}
if (input.family === "entity_grounding") {
pushUnique(result, "search_business_entity");
pushUnique(result, "resolve_entity_reference");
pushUnique(result, "probe_coverage");
return result;
}
if (input.family === "value_flow") {
if (input.rankingNeed && input.openScopeWithoutSubject) {
pushUnique(result, "collect_scoped_movements");
pushUnique(result, "aggregate_ranked_axis_values");
pushUnique(result, "probe_coverage");
return result;
}
if (input.comparisonNeed === "incoming_vs_outgoing" && input.openScopeWithoutSubject) {
pushUnique(result, "collect_incoming_movements");
pushUnique(result, "collect_outgoing_movements");
if (input.aggregationNeed === "by_month") {
pushUnique(result, "aggregate_by_month");
}
pushUnique(result, "probe_coverage");
return result;
}
if (input.openScopeWithoutSubject) {
pushUnique(result, "collect_scoped_movements");
pushUnique(result, input.aggregationNeed === "by_month" ? "aggregate_by_month" : "aggregate_checked_amounts");
pushUnique(result, "probe_coverage");
return result;
}
pushUnique(result, "resolve_entity_reference");
if (input.action === "net_value_flow") {
pushUnique(result, "collect_incoming_movements");
pushUnique(result, "collect_outgoing_movements");
}
else {
pushUnique(result, "collect_scoped_movements");
}
pushUnique(result, input.aggregationNeed === "by_month" ? "aggregate_by_month" : "aggregate_checked_amounts");
pushUnique(result, "probe_coverage");
return result;
}
if (input.family === "movement_evidence") {
if (!input.metadataScopedOpenLaneWithoutSubject) {
pushUnique(result, "resolve_entity_reference");
}
pushUnique(result, "fetch_scoped_movements");
pushUnique(result, "probe_coverage");
return result;
}
if (input.family === "document_evidence") {
if (!input.metadataScopedOpenLaneWithoutSubject) {
pushUnique(result, "resolve_entity_reference");
}
pushUnique(result, "fetch_scoped_documents");
pushUnique(result, "probe_coverage");
return result;
}
if (input.family === "activity_lifecycle") {
pushUnique(result, "resolve_entity_reference");
pushUnique(result, "fetch_supporting_documents");
pushUnique(result, "probe_coverage");
pushUnique(result, "explain_evidence_basis");
}
return result;
}
function forbiddenOverclaimFlagsFor(family) {
const result = ["no_raw_model_claims"];
if (family === "schema_surface") {
pushUnique(result, "no_fake_schema_surface");
}
if (family === "entity_grounding") {
pushUnique(result, "no_unresolved_entity_claim");
}
if (family === "activity_lifecycle") {
pushUnique(result, "no_legal_age_claim_without_evidence");
}
if (family === "value_flow" || family === "movement_evidence" || family === "document_evidence") {
pushUnique(result, "no_unchecked_fact_totals");
}
return result;
}
function buildAssistantMcpDiscoveryDataNeedGraph(input) {
const semanticDataNeed = lower(input.semanticDataNeed);
const turnMeaning = input.turnMeaning ?? null;
const domain = lower(turnMeaning?.asked_domain_family);
const action = lower(turnMeaning?.asked_action_family);
const unsupported = lower(turnMeaning?.unsupported_but_understood_family);
const rawUtterance = lower(input.rawUtterance);
const aggregationAxis = lower(turnMeaning?.asked_aggregation_axis);
const seededRankingNeed = toNonEmptyString(turnMeaning?.seeded_ranking_need);
const explicitDateScope = toNonEmptyString(turnMeaning?.explicit_date_scope);
const explicitOrganizationScope = toNonEmptyString(turnMeaning?.explicit_organization_scope);
const metadataScopeHint = toNonEmptyString(turnMeaning?.metadata_scope_hint);
const subjectResolutionOptional = turnMeaning?.subject_resolution_optional === true;
const subjectCandidates = (turnMeaning?.explicit_entity_candidates ?? [])
.map((item) => toNonEmptyString(item))
.filter((item) => Boolean(item));
const businessFactFamily = businessFactFamilyFor({
semanticDataNeed,
domain,
action,
unsupported
});
const aggregationNeed = aggregationNeedFor(aggregationAxis);
const comparisonNeed = comparisonNeedFor(action);
const rankingNeed = rankingNeedFromRawUtterance(rawUtterance) ?? seededRankingNeed;
const allTimeScopeHint = hasAllTimeScopeHint(rawUtterance);
const oneSidedOpenScopeTotalHint = hasOpenScopeOneSidedValueTotalHintUtf8Safe(rawUtterance, action);
const openScopeWithoutSubject = subjectCandidates.length === 0 &&
allowsOpenScopeWithoutSubject({
family: businessFactFamily,
action,
organizationScope: explicitOrganizationScope,
comparisonNeed,
rankingNeed,
oneSidedOpenScopeTotalHint
});
const metadataScopedOpenLaneWithoutSubject = subjectCandidates.length === 0 &&
allowsMetadataScopedOpenLaneWithoutSubject({
family: businessFactFamily,
subjectResolutionOptional
});
const clarificationGaps = [];
if (unsupported === "metadata_lane_choice_clarification" || action === "resolve_next_lane") {
pushUnique(clarificationGaps, "lane_family_choice");
}
if (subjectCandidates.length === 0 &&
businessFactFamily === "value_flow" &&
openScopeWithoutSubject &&
!explicitOrganizationScope) {
pushUnique(clarificationGaps, "organization");
}
else if (subjectCandidates.length === 0 &&
metadataScopedOpenLaneWithoutSubject &&
!explicitOrganizationScope) {
pushUnique(clarificationGaps, "organization");
}
else if (subjectCandidates.length === 0 &&
businessFactFamily !== "schema_surface" &&
!openScopeWithoutSubject &&
!metadataScopedOpenLaneWithoutSubject) {
pushUnique(clarificationGaps, "subject");
}
const timeScopeNeed = timeScopeNeedFor({
family: businessFactFamily,
explicitDateScope,
allTimeScopeHint
});
if (timeScopeNeed === "period_required" && !explicitDateScope) {
pushUnique(clarificationGaps, "period");
}
const decompositionCandidates = decompositionCandidatesFor({
family: businessFactFamily,
action,
aggregationNeed,
comparisonNeed,
rankingNeed,
openScopeWithoutSubject,
metadataScopedOpenLaneWithoutSubject
});
const reasonCodes = [];
pushReason(reasonCodes, "data_need_graph_built");
if (businessFactFamily) {
pushReason(reasonCodes, `data_need_graph_family_${businessFactFamily}`);
}
else {
pushReason(reasonCodes, "data_need_graph_family_unknown");
}
if (aggregationNeed) {
pushReason(reasonCodes, `data_need_graph_aggregation_${aggregationNeed}`);
}
if (rankingNeed) {
pushReason(reasonCodes, `data_need_graph_ranking_${rankingNeed}`);
}
if (comparisonNeed) {
pushReason(reasonCodes, `data_need_graph_comparison_${comparisonNeed}`);
}
if (openScopeWithoutSubject && !rankingNeed && !comparisonNeed) {
pushReason(reasonCodes, "data_need_graph_open_scope_total_without_subject");
}
if (metadataScopedOpenLaneWithoutSubject) {
pushReason(reasonCodes, "data_need_graph_metadata_scoped_open_lane_without_subject");
}
if (allTimeScopeHint) {
pushReason(reasonCodes, "data_need_graph_all_time_scope_hint");
}
if (clarificationGaps.includes("organization")) {
pushReason(reasonCodes, "data_need_graph_open_scope_total_needs_organization");
}
if (clarificationGaps.length > 0) {
pushReason(reasonCodes, "data_need_graph_has_clarification_gaps");
}
return {
schema_version: exports.ASSISTANT_MCP_DISCOVERY_DATA_NEED_GRAPH_SCHEMA_VERSION,
policy_owner: "assistantMcpDiscoveryDataNeedGraph",
subject_candidates: subjectCandidates,
metadata_scope_hint: metadataScopeHint,
subject_resolution_optional: subjectResolutionOptional || undefined,
business_fact_family: businessFactFamily,
action_family: toNonEmptyString(turnMeaning?.asked_action_family),
aggregation_need: aggregationNeed,
time_scope_need: timeScopeNeed,
comparison_need: comparisonNeed,
ranking_need: rankingNeed,
proof_expectation: proofExpectationFor({
family: businessFactFamily,
clarificationGaps
}),
clarification_gaps: clarificationGaps,
decomposition_candidates: decompositionCandidates,
forbidden_overclaim_flags: forbiddenOverclaimFlagsFor(businessFactFamily),
reason_codes: reasonCodes
};
}