NODEDC_1C/llm_normalizer/backend/dist/services/assistantMcpDiscoveryDataNe...

493 lines
22 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.ASSISTANT_MCP_DISCOVERY_DATA_NEED_GRAPH_SCHEMA_VERSION = void 0;
exports.buildAssistantMcpDiscoveryDataNeedGraph = buildAssistantMcpDiscoveryDataNeedGraph;
exports.ASSISTANT_MCP_DISCOVERY_DATA_NEED_GRAPH_SCHEMA_VERSION = "assistant_data_need_graph_v1";
function toNonEmptyString(value) {
if (value === null || value === undefined) {
return null;
}
const text = String(value).trim();
return text.length > 0 ? text : null;
}
function lower(value) {
return String(value ?? "").trim().toLowerCase();
}
function normalizeReasonCode(value) {
const normalized = value
.trim()
.replace(/[^\p{L}\p{N}_.:-]+/gu, "_")
.replace(/^_+|_+$/g, "")
.toLowerCase();
return normalized.length > 0 ? normalized.slice(0, 120) : null;
}
function pushReason(target, value) {
const normalized = normalizeReasonCode(value);
if (normalized && !target.includes(normalized)) {
target.push(normalized);
}
}
function pushUnique(target, value) {
const text = toNonEmptyString(value);
if (text && !target.includes(text)) {
target.push(text);
}
}
function businessFactFamilyFor(input) {
const combined = `${input.semanticDataNeed} ${input.domain} ${input.action} ${input.unsupported}`.trim();
if (combined.includes("metadata lane clarification")) {
return "schema_surface";
}
if (combined.includes("metadata")) {
return "schema_surface";
}
if (combined.includes("entity discovery") || combined.includes("entity_resolution")) {
return "entity_grounding";
}
if (combined.includes("broad_business_evaluation") ||
combined.includes("broad_evaluation") ||
combined.includes("business overview") ||
combined.includes("business_overview") ||
combined.includes("company analysis") ||
combined.includes("business audit")) {
return "business_overview";
}
if (combined.includes("inventory") ||
combined.includes("stock") ||
combined.includes("warehouse") ||
combined.includes("item provenance") ||
combined.includes("purchase provenance")) {
if (combined.includes("sale_trace") ||
combined.includes("sale trace") ||
combined.includes("buyer") ||
combined.includes("purchase_to_sale") ||
combined.includes("purchase-to-sale")) {
return "inventory_sale_trace";
}
if (combined.includes("supplier_overlap") ||
combined.includes("supplier overlap") ||
combined.includes("supplier stock") ||
combined.includes("stock by supplier")) {
return "inventory_supplier_overlap";
}
if (combined.includes("purchase_provenance") ||
combined.includes("purchase provenance") ||
combined.includes("purchase document") ||
combined.includes("supplier provenance")) {
return "inventory_purchase_provenance";
}
return "inventory_stock_snapshot";
}
if (combined.includes("lifecycle") || combined.includes("activity")) {
return "activity_lifecycle";
}
if (combined.includes("movement")) {
return "movement_evidence";
}
if (combined.includes("document")) {
return "document_evidence";
}
if (combined.includes("value-flow") || combined.includes("turnover") || combined.includes("payout") || combined.includes("net")) {
return "value_flow";
}
return null;
}
function aggregationNeedFor(axis) {
if (!axis) {
return null;
}
if (axis === "month") {
return "by_month";
}
return `by_${axis}`;
}
function hasAllTimeScopeHint(rawUtterance) {
if (!rawUtterance) {
return false;
}
return /(?:\u0437\u0430\s+\u0432\u0441[\u0435\u0451]\s+\u0432\u0440\u0435\u043c\u044f|\u0437\u0430\s+\u0432\u0435\u0441\u044c\s+\u043f\u0435\u0440\u0438\u043e\u0434|\u0437\u0430\s+\u0432\u0441\u044e\s+\u0438\u0441\u0442\u043e\u0440\u0438(?:\u044e|\u0438)|\u0437\u0430\s+\u043b\u044e\u0431\u043e\u0439\s+\u043f\u0435\u0440\u0438\u043e\u0434|for\s+all\s+time|all\s+time|entire\s+period|full\s+history|any\s+period)/iu.test(rawUtterance);
}
function timeScopeNeedFor(input) {
if (input.explicitDateScope) {
return "explicit_period";
}
if (input.allTimeScopeHint &&
(input.family === "value_flow" || input.family === "movement_evidence" || input.family === "document_evidence")) {
return "all_time_scope";
}
if (input.family === "value_flow" || input.family === "movement_evidence" || input.family === "document_evidence") {
return "period_required";
}
if (input.family === "activity_lifecycle") {
return "open_activity_window";
}
if (input.family === "business_overview") {
return input.explicitDateScope ? "explicit_period" : "all_time_scope";
}
if (input.family === "inventory_stock_snapshot" || input.family === "inventory_supplier_overlap") {
return input.explicitDateScope ? "explicit_period" : "as_of_date_required";
}
if (input.family === "inventory_purchase_provenance" || input.family === "inventory_sale_trace") {
return input.explicitDateScope ? "explicit_period" : null;
}
return null;
}
function comparisonNeedFor(action) {
if (action === "net_value_flow") {
return "incoming_vs_outgoing";
}
return null;
}
function hasOpenScopeOneSidedValueTotalHint(rawUtterance, action) {
if (!rawUtterance) {
return false;
}
if (action === "turnover") {
return /(?:\bсколько\s+(?:(?:вообще|всего|реально)\s+){0,2}(?:мы\s+)?(?:получили|получено|входящих(?:\s+денег)?(?:\s+было)?|поступлений|денег\s+пришло)\b|(?:сумма|объем)\s+(?:входящих|поступлений)|поступлений\s+за\b)/iu.test(rawUtterance);
}
if (action === "payout") {
return /(?:\bсколько\s+(?:(?:вообще|всего|реально)\s+){0,2}(?:мы\s+)?(?:заплатили|выплатили|потратили|исходящих(?:\s+денег)?(?:\s+было)?|платежей(?:\s+было)?|списаний(?:\s+было)?)\b|(?:сумма|объем)\s+(?:исходящих|платежей|списаний)|(?:платежей|списаний)\s+за\b)/iu.test(rawUtterance);
}
return false;
}
function hasOpenScopeOneSidedValueTotalHintUtf8Safe(rawUtterance, action) {
if (!rawUtterance) {
return false;
}
if (action === "turnover") {
return /(?:\u0441\u043a\u043e\u043b\u044c\u043a\u043e\s+(?:(?:\u0432\u043e\u043e\u0431\u0449\u0435|\u0432\u0441\u0435\u0433\u043e|\u0440\u0435\u0430\u043b\u044c\u043d\u043e)\s+){0,2}(?:\u043c\u044b\s+)?(?:\u043f\u043e\u043b\u0443\u0447\u0438\u043b\u0438|\u043f\u043e\u043b\u0443\u0447\u0435\u043d\u043e|\u0432\u0445\u043e\u0434\u044f\u0449\u0438\u0445(?:\s+\u0434\u0435\u043d\u0435\u0433)?(?:\s+\u0431\u044b\u043b\u043e)?|\u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d\u0438\u0439|\u0434\u0435\u043d\u0435\u0433\s+\u043f\u0440\u0438\u0448\u043b\u043e)|(?:\u0441\u0443\u043c\u043c\u0430|\u043e\u0431\u044a\u0435\u043c)\s+(?:\u0432\u0445\u043e\u0434\u044f\u0449\u0438\u0445|\u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d\u0438\u0439)|\u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d\u0438\u0439\s+\u0437\u0430)/u.test(rawUtterance);
}
if (action === "payout") {
return /(?:\u0441\u043a\u043e\u043b\u044c\u043a\u043e\s+(?:(?:\u0432\u043e\u043e\u0431\u0449\u0435|\u0432\u0441\u0435\u0433\u043e|\u0440\u0435\u0430\u043b\u044c\u043d\u043e)\s+){0,2}(?:\u043c\u044b\s+)?(?:\u0437\u0430\u043f\u043b\u0430\u0442\u0438\u043b\u0438|\u0432\u044b\u043f\u043b\u0430\u0442\u0438\u043b\u0438|\u043f\u043e\u0442\u0440\u0430\u0442\u0438\u043b\u0438|\u0438\u0441\u0445\u043e\u0434\u044f\u0449\u0438\u0445(?:\s+\u0434\u0435\u043d\u0435\u0433)?(?:\s+\u0431\u044b\u043b\u043e)?|\u043f\u043b\u0430\u0442\u0435\u0436\u0435\u0439(?:\s+\u0431\u044b\u043b\u043e)?|\u0441\u043f\u0438\u0441\u0430\u043d\u0438\u0439(?:\s+\u0431\u044b\u043b\u043e)?)|(?:\u0441\u0443\u043c\u043c\u0430|\u043e\u0431\u044a\u0435\u043c)\s+(?:\u0438\u0441\u0445\u043e\u0434\u044f\u0449\u0438\u0445|\u043f\u043b\u0430\u0442\u0435\u0436\u0435\u0439|\u0441\u043f\u0438\u0441\u0430\u043d\u0438\u0439)|(?:\u043f\u043b\u0430\u0442\u0435\u0436\u0435\u0439|\u0441\u043f\u0438\u0441\u0430\u043d\u0438\u0439)\s+\u0437\u0430)/u.test(rawUtterance);
}
return false;
}
function supportsOrganizationScopedOpenTotal(action) {
return action === "turnover" || action === "payout";
}
function allowsOpenScopeWithoutSubject(input) {
if (input.family !== "value_flow") {
return false;
}
if (input.rankingNeed || input.comparisonNeed === "incoming_vs_outgoing") {
return true;
}
return Boolean(supportsOrganizationScopedOpenTotal(input.action) && (input.organizationScope || input.oneSidedOpenScopeTotalHint));
}
function allowsMetadataScopedOpenLaneWithoutSubject(input) {
return Boolean(input.subjectResolutionOptional &&
(input.family === "movement_evidence" || input.family === "document_evidence"));
}
function rankingNeedFromRawUtterance(value) {
const text = lower(value);
if (!text) {
return null;
}
if (/(?:\btop[-\s]?\d+\b|\btop\b|топ[-\s]?\d+|топ\b|сам(?:ый|ая|ое|ые)\b|больше\s+всего|наибол[её]е|highest|largest|most)/iu.test(text)) {
return "top_desc";
}
if (/(?:меньше\s+всего|наимен[ьш]е|lowest|smallest|least)/iu.test(text)) {
return "bottom_asc";
}
return null;
}
function proofExpectationFor(input) {
if (input.clarificationGaps.length > 0) {
return "clarification_required";
}
if (input.family === "schema_surface") {
return "schema_surface";
}
if (input.family === "entity_grounding") {
return "entity_grounding";
}
if (input.family === "activity_lifecycle") {
return "bounded_inference";
}
if (input.family === "business_overview") {
return "bounded_inference";
}
return "coverage_checked_fact";
}
function decompositionCandidatesFor(input) {
const result = [];
if (input.family === "schema_surface") {
pushUnique(result, "inspect_metadata_surface");
return result;
}
if (input.family === "entity_grounding") {
pushUnique(result, "search_business_entity");
pushUnique(result, "resolve_entity_reference");
pushUnique(result, "probe_coverage");
return result;
}
if (input.family === "value_flow") {
if (input.rankingNeed && input.openScopeWithoutSubject) {
pushUnique(result, "collect_scoped_movements");
pushUnique(result, "aggregate_ranked_axis_values");
pushUnique(result, "probe_coverage");
return result;
}
if (input.comparisonNeed === "incoming_vs_outgoing" && input.openScopeWithoutSubject) {
pushUnique(result, "collect_incoming_movements");
pushUnique(result, "collect_outgoing_movements");
if (input.aggregationNeed === "by_month") {
pushUnique(result, "aggregate_by_month");
}
pushUnique(result, "probe_coverage");
return result;
}
if (input.openScopeWithoutSubject) {
pushUnique(result, "collect_scoped_movements");
pushUnique(result, input.aggregationNeed === "by_month" ? "aggregate_by_month" : "aggregate_checked_amounts");
pushUnique(result, "probe_coverage");
return result;
}
pushUnique(result, "resolve_entity_reference");
if (input.action === "net_value_flow") {
pushUnique(result, "collect_incoming_movements");
pushUnique(result, "collect_outgoing_movements");
}
else {
pushUnique(result, "collect_scoped_movements");
}
pushUnique(result, input.aggregationNeed === "by_month" ? "aggregate_by_month" : "aggregate_checked_amounts");
pushUnique(result, "probe_coverage");
return result;
}
if (input.family === "movement_evidence") {
if (!input.metadataScopedOpenLaneWithoutSubject) {
pushUnique(result, "resolve_entity_reference");
}
pushUnique(result, "fetch_scoped_movements");
pushUnique(result, "probe_coverage");
return result;
}
if (input.family === "document_evidence") {
if (!input.metadataScopedOpenLaneWithoutSubject) {
pushUnique(result, "resolve_entity_reference");
}
pushUnique(result, "fetch_scoped_documents");
pushUnique(result, "probe_coverage");
return result;
}
if (input.family === "activity_lifecycle") {
pushUnique(result, "resolve_entity_reference");
pushUnique(result, "fetch_supporting_documents");
pushUnique(result, "probe_coverage");
pushUnique(result, "explain_evidence_basis");
return result;
}
if (input.family === "business_overview") {
pushUnique(result, "collect_scoped_movements");
pushUnique(result, "aggregate_checked_amounts");
pushUnique(result, "aggregate_ranked_axis_values");
pushUnique(result, "fetch_supporting_documents");
pushUnique(result, "probe_coverage");
pushUnique(result, "explain_evidence_basis");
return result;
}
if (input.family === "inventory_stock_snapshot") {
pushUnique(result, "fetch_scoped_movements");
pushUnique(result, "aggregate_checked_amounts");
pushUnique(result, "probe_coverage");
pushUnique(result, "explain_evidence_basis");
return result;
}
if (input.family === "inventory_supplier_overlap") {
pushUnique(result, "resolve_entity_reference");
pushUnique(result, "fetch_scoped_movements");
pushUnique(result, "fetch_scoped_documents");
pushUnique(result, "aggregate_checked_amounts");
pushUnique(result, "probe_coverage");
pushUnique(result, "explain_evidence_basis");
return result;
}
if (input.family === "inventory_purchase_provenance" || input.family === "inventory_sale_trace") {
pushUnique(result, "resolve_entity_reference");
pushUnique(result, "fetch_scoped_documents");
pushUnique(result, "drilldown_related_objects");
pushUnique(result, "probe_coverage");
pushUnique(result, "explain_evidence_basis");
}
return result;
}
function forbiddenOverclaimFlagsFor(family) {
const result = ["no_raw_model_claims"];
if (family === "schema_surface") {
pushUnique(result, "no_fake_schema_surface");
}
if (family === "entity_grounding") {
pushUnique(result, "no_unresolved_entity_claim");
}
if (family === "activity_lifecycle") {
pushUnique(result, "no_legal_age_claim_without_evidence");
}
if (family === "business_overview") {
pushUnique(result, "no_unchecked_fact_totals");
pushUnique(result, "no_unchecked_business_health_claim");
pushUnique(result, "no_profit_or_margin_claim_without_evidence");
}
if (family === "value_flow" || family === "movement_evidence" || family === "document_evidence") {
pushUnique(result, "no_unchecked_fact_totals");
}
if (family === "inventory_stock_snapshot") {
pushUnique(result, "no_unchecked_stock_snapshot");
}
if (family === "inventory_purchase_provenance" || family === "inventory_supplier_overlap") {
pushUnique(result, "no_unproven_supplier_attribution");
}
if (family === "inventory_sale_trace") {
pushUnique(result, "no_unproven_buyer_or_sale_trace");
}
return result;
}
function buildAssistantMcpDiscoveryDataNeedGraph(input) {
const semanticDataNeed = lower(input.semanticDataNeed);
const turnMeaning = input.turnMeaning ?? null;
const domain = lower(turnMeaning?.asked_domain_family);
const action = lower(turnMeaning?.asked_action_family);
const unsupported = lower(turnMeaning?.unsupported_but_understood_family);
const rawUtterance = lower(input.rawUtterance);
const aggregationAxis = lower(turnMeaning?.asked_aggregation_axis);
const seededRankingNeed = toNonEmptyString(turnMeaning?.seeded_ranking_need);
const explicitDateScope = toNonEmptyString(turnMeaning?.explicit_date_scope);
const explicitOrganizationScope = toNonEmptyString(turnMeaning?.explicit_organization_scope);
const metadataScopeHint = toNonEmptyString(turnMeaning?.metadata_scope_hint);
const subjectResolutionOptional = turnMeaning?.subject_resolution_optional === true;
const subjectCandidates = (turnMeaning?.explicit_entity_candidates ?? [])
.map((item) => toNonEmptyString(item))
.filter((item) => Boolean(item));
const businessFactFamily = businessFactFamilyFor({
semanticDataNeed,
domain,
action,
unsupported
});
const aggregationNeed = aggregationNeedFor(aggregationAxis);
const comparisonNeed = comparisonNeedFor(action);
const rankingNeed = rankingNeedFromRawUtterance(rawUtterance) ?? seededRankingNeed;
const allTimeScopeHint = hasAllTimeScopeHint(rawUtterance);
const oneSidedOpenScopeTotalHint = hasOpenScopeOneSidedValueTotalHintUtf8Safe(rawUtterance, action);
const openScopeWithoutSubject = subjectCandidates.length === 0 &&
allowsOpenScopeWithoutSubject({
family: businessFactFamily,
action,
organizationScope: explicitOrganizationScope,
comparisonNeed,
rankingNeed,
oneSidedOpenScopeTotalHint
});
const metadataScopedOpenLaneWithoutSubject = subjectCandidates.length === 0 &&
allowsMetadataScopedOpenLaneWithoutSubject({
family: businessFactFamily,
subjectResolutionOptional
});
const inventoryStockSnapshotWithoutSubject = subjectCandidates.length === 0 &&
businessFactFamily === "inventory_stock_snapshot";
const clarificationGaps = [];
if (unsupported === "metadata_lane_choice_clarification" || action === "resolve_next_lane") {
pushUnique(clarificationGaps, "lane_family_choice");
}
if (subjectCandidates.length === 0 &&
businessFactFamily === "value_flow" &&
openScopeWithoutSubject &&
!explicitOrganizationScope) {
pushUnique(clarificationGaps, "organization");
}
else if (subjectCandidates.length === 0 &&
metadataScopedOpenLaneWithoutSubject &&
!explicitOrganizationScope) {
pushUnique(clarificationGaps, "organization");
}
else if (subjectCandidates.length === 0 &&
businessFactFamily === "business_overview" &&
!explicitOrganizationScope) {
pushUnique(clarificationGaps, "organization");
}
else if (subjectCandidates.length === 0 &&
businessFactFamily !== "schema_surface" &&
businessFactFamily !== "business_overview" &&
!openScopeWithoutSubject &&
!metadataScopedOpenLaneWithoutSubject &&
!inventoryStockSnapshotWithoutSubject) {
pushUnique(clarificationGaps, "subject");
}
const timeScopeNeed = timeScopeNeedFor({
family: businessFactFamily,
explicitDateScope,
allTimeScopeHint
});
if (timeScopeNeed === "period_required" && !explicitDateScope) {
pushUnique(clarificationGaps, "period");
}
if (timeScopeNeed === "as_of_date_required" && !explicitDateScope) {
pushUnique(clarificationGaps, "as_of_date");
}
const decompositionCandidates = decompositionCandidatesFor({
family: businessFactFamily,
action,
aggregationNeed,
comparisonNeed,
rankingNeed,
openScopeWithoutSubject,
metadataScopedOpenLaneWithoutSubject
});
const reasonCodes = [];
pushReason(reasonCodes, "data_need_graph_built");
if (businessFactFamily) {
pushReason(reasonCodes, `data_need_graph_family_${businessFactFamily}`);
}
else {
pushReason(reasonCodes, "data_need_graph_family_unknown");
}
if (aggregationNeed) {
pushReason(reasonCodes, `data_need_graph_aggregation_${aggregationNeed}`);
}
if (rankingNeed) {
pushReason(reasonCodes, `data_need_graph_ranking_${rankingNeed}`);
}
if (comparisonNeed) {
pushReason(reasonCodes, `data_need_graph_comparison_${comparisonNeed}`);
}
if (openScopeWithoutSubject && !rankingNeed && !comparisonNeed) {
pushReason(reasonCodes, "data_need_graph_open_scope_total_without_subject");
}
if (metadataScopedOpenLaneWithoutSubject) {
pushReason(reasonCodes, "data_need_graph_metadata_scoped_open_lane_without_subject");
}
if (allTimeScopeHint) {
pushReason(reasonCodes, "data_need_graph_all_time_scope_hint");
}
if (businessFactFamily === "business_overview" && !explicitDateScope) {
pushReason(reasonCodes, "data_need_graph_business_overview_defaults_to_all_time_scope");
}
if (clarificationGaps.includes("organization")) {
pushReason(reasonCodes, "data_need_graph_open_scope_total_needs_organization");
}
if (clarificationGaps.length > 0) {
pushReason(reasonCodes, "data_need_graph_has_clarification_gaps");
}
return {
schema_version: exports.ASSISTANT_MCP_DISCOVERY_DATA_NEED_GRAPH_SCHEMA_VERSION,
policy_owner: "assistantMcpDiscoveryDataNeedGraph",
subject_candidates: subjectCandidates,
metadata_scope_hint: metadataScopeHint,
subject_resolution_optional: subjectResolutionOptional || undefined,
business_fact_family: businessFactFamily,
action_family: toNonEmptyString(turnMeaning?.asked_action_family),
aggregation_need: aggregationNeed,
time_scope_need: timeScopeNeed,
comparison_need: comparisonNeed,
ranking_need: rankingNeed,
proof_expectation: proofExpectationFor({
family: businessFactFamily,
clarificationGaps
}),
clarification_gaps: clarificationGaps,
decomposition_candidates: decompositionCandidates,
forbidden_overclaim_flags: forbiddenOverclaimFlagsFor(businessFactFamily),
reason_codes: reasonCodes
};
}