451 lines
17 KiB
TypeScript
451 lines
17 KiB
TypeScript
import type { AssistantMcpDiscoveryTurnMeaningRef } from "./assistantMcpDiscoveryPolicy";
|
||
|
||
export const ASSISTANT_MCP_DISCOVERY_DATA_NEED_GRAPH_SCHEMA_VERSION =
|
||
"assistant_data_need_graph_v1" as const;
|
||
|
||
export type AssistantMcpDiscoveryDataNeedProofExpectation =
|
||
| "schema_surface"
|
||
| "entity_grounding"
|
||
| "coverage_checked_fact"
|
||
| "bounded_inference"
|
||
| "clarification_required";
|
||
|
||
export interface AssistantMcpDiscoveryDataNeedGraphContract {
|
||
schema_version: typeof ASSISTANT_MCP_DISCOVERY_DATA_NEED_GRAPH_SCHEMA_VERSION;
|
||
policy_owner: "assistantMcpDiscoveryDataNeedGraph";
|
||
subject_candidates: string[];
|
||
business_fact_family: string | null;
|
||
action_family: string | null;
|
||
aggregation_need: string | null;
|
||
time_scope_need: string | null;
|
||
comparison_need: string | null;
|
||
ranking_need: string | null;
|
||
proof_expectation: AssistantMcpDiscoveryDataNeedProofExpectation;
|
||
clarification_gaps: string[];
|
||
decomposition_candidates: string[];
|
||
forbidden_overclaim_flags: string[];
|
||
reason_codes: string[];
|
||
}
|
||
|
||
export interface BuildAssistantMcpDiscoveryDataNeedGraphInput {
|
||
semanticDataNeed?: string | null;
|
||
rawUtterance?: string | null;
|
||
turnMeaning?: AssistantMcpDiscoveryTurnMeaningRef | null;
|
||
}
|
||
|
||
function toNonEmptyString(value: unknown): string | null {
|
||
if (value === null || value === undefined) {
|
||
return null;
|
||
}
|
||
const text = String(value).trim();
|
||
return text.length > 0 ? text : null;
|
||
}
|
||
|
||
function lower(value: unknown): string {
|
||
return String(value ?? "").trim().toLowerCase();
|
||
}
|
||
|
||
function normalizeReasonCode(value: string): string | null {
|
||
const normalized = value
|
||
.trim()
|
||
.replace(/[^\p{L}\p{N}_.:-]+/gu, "_")
|
||
.replace(/^_+|_+$/g, "")
|
||
.toLowerCase();
|
||
return normalized.length > 0 ? normalized.slice(0, 120) : null;
|
||
}
|
||
|
||
function pushReason(target: string[], value: string): void {
|
||
const normalized = normalizeReasonCode(value);
|
||
if (normalized && !target.includes(normalized)) {
|
||
target.push(normalized);
|
||
}
|
||
}
|
||
|
||
function pushUnique(target: string[], value: string | null | undefined): void {
|
||
const text = toNonEmptyString(value);
|
||
if (text && !target.includes(text)) {
|
||
target.push(text);
|
||
}
|
||
}
|
||
|
||
function businessFactFamilyFor(input: {
|
||
semanticDataNeed: string;
|
||
domain: string;
|
||
action: string;
|
||
unsupported: string;
|
||
}): string | null {
|
||
const combined = `${input.semanticDataNeed} ${input.domain} ${input.action} ${input.unsupported}`.trim();
|
||
if (combined.includes("metadata lane clarification")) {
|
||
return "schema_surface";
|
||
}
|
||
if (combined.includes("metadata")) {
|
||
return "schema_surface";
|
||
}
|
||
if (combined.includes("entity discovery") || combined.includes("entity_resolution")) {
|
||
return "entity_grounding";
|
||
}
|
||
if (combined.includes("lifecycle") || combined.includes("activity")) {
|
||
return "activity_lifecycle";
|
||
}
|
||
if (combined.includes("movement")) {
|
||
return "movement_evidence";
|
||
}
|
||
if (combined.includes("document")) {
|
||
return "document_evidence";
|
||
}
|
||
if (combined.includes("value-flow") || combined.includes("turnover") || combined.includes("payout") || combined.includes("net")) {
|
||
return "value_flow";
|
||
}
|
||
return null;
|
||
}
|
||
|
||
function aggregationNeedFor(axis: string): string | null {
|
||
if (!axis) {
|
||
return null;
|
||
}
|
||
if (axis === "month") {
|
||
return "by_month";
|
||
}
|
||
return `by_${axis}`;
|
||
}
|
||
|
||
function hasAllTimeScopeHint(rawUtterance: string): boolean {
|
||
if (!rawUtterance) {
|
||
return false;
|
||
}
|
||
return /(?:\u0437\u0430\s+\u0432\u0441[\u0435\u0451]\s+\u0432\u0440\u0435\u043c\u044f|\u0437\u0430\s+\u0432\u0435\u0441\u044c\s+\u043f\u0435\u0440\u0438\u043e\u0434|\u0437\u0430\s+\u0432\u0441\u044e\s+\u0438\u0441\u0442\u043e\u0440\u0438(?:\u044e|\u0438)|\u0437\u0430\s+\u043b\u044e\u0431\u043e\u0439\s+\u043f\u0435\u0440\u0438\u043e\u0434|for\s+all\s+time|all\s+time|entire\s+period|full\s+history|any\s+period)/iu.test(
|
||
rawUtterance
|
||
);
|
||
}
|
||
|
||
function timeScopeNeedFor(input: {
|
||
family: string | null;
|
||
explicitDateScope: string | null;
|
||
allTimeScopeHint: boolean;
|
||
}): string | null {
|
||
if (input.explicitDateScope) {
|
||
return "explicit_period";
|
||
}
|
||
if (
|
||
input.allTimeScopeHint &&
|
||
(input.family === "value_flow" || input.family === "movement_evidence" || input.family === "document_evidence")
|
||
) {
|
||
return "all_time_scope";
|
||
}
|
||
if (input.family === "value_flow" || input.family === "movement_evidence" || input.family === "document_evidence") {
|
||
return "period_required";
|
||
}
|
||
if (input.family === "activity_lifecycle") {
|
||
return "open_activity_window";
|
||
}
|
||
return null;
|
||
}
|
||
|
||
function comparisonNeedFor(action: string): string | null {
|
||
if (action === "net_value_flow") {
|
||
return "incoming_vs_outgoing";
|
||
}
|
||
return null;
|
||
}
|
||
|
||
function hasOpenScopeOneSidedValueTotalHint(rawUtterance: string, action: string): boolean {
|
||
if (!rawUtterance) {
|
||
return false;
|
||
}
|
||
if (action === "turnover") {
|
||
return /(?:\bсколько\s+(?:(?:вообще|всего|реально)\s+){0,2}(?:мы\s+)?(?:получили|получено|входящих(?:\s+денег)?(?:\s+было)?|поступлений|денег\s+пришло)\b|(?:сумма|объем)\s+(?:входящих|поступлений)|поступлений\s+за\b)/iu.test(
|
||
rawUtterance
|
||
);
|
||
}
|
||
if (action === "payout") {
|
||
return /(?:\bсколько\s+(?:(?:вообще|всего|реально)\s+){0,2}(?:мы\s+)?(?:заплатили|выплатили|потратили|исходящих(?:\s+денег)?(?:\s+было)?|платежей(?:\s+было)?|списаний(?:\s+было)?)\b|(?:сумма|объем)\s+(?:исходящих|платежей|списаний)|(?:платежей|списаний)\s+за\b)/iu.test(
|
||
rawUtterance
|
||
);
|
||
}
|
||
return false;
|
||
}
|
||
|
||
function hasOpenScopeOneSidedValueTotalHintUtf8Safe(rawUtterance: string, action: string): boolean {
|
||
if (!rawUtterance) {
|
||
return false;
|
||
}
|
||
if (action === "turnover") {
|
||
return /(?:\u0441\u043a\u043e\u043b\u044c\u043a\u043e\s+(?:(?:\u0432\u043e\u043e\u0431\u0449\u0435|\u0432\u0441\u0435\u0433\u043e|\u0440\u0435\u0430\u043b\u044c\u043d\u043e)\s+){0,2}(?:\u043c\u044b\s+)?(?:\u043f\u043e\u043b\u0443\u0447\u0438\u043b\u0438|\u043f\u043e\u043b\u0443\u0447\u0435\u043d\u043e|\u0432\u0445\u043e\u0434\u044f\u0449\u0438\u0445(?:\s+\u0434\u0435\u043d\u0435\u0433)?(?:\s+\u0431\u044b\u043b\u043e)?|\u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d\u0438\u0439|\u0434\u0435\u043d\u0435\u0433\s+\u043f\u0440\u0438\u0448\u043b\u043e)|(?:\u0441\u0443\u043c\u043c\u0430|\u043e\u0431\u044a\u0435\u043c)\s+(?:\u0432\u0445\u043e\u0434\u044f\u0449\u0438\u0445|\u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d\u0438\u0439)|\u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d\u0438\u0439\s+\u0437\u0430)/u.test(
|
||
rawUtterance
|
||
);
|
||
}
|
||
if (action === "payout") {
|
||
return /(?:\u0441\u043a\u043e\u043b\u044c\u043a\u043e\s+(?:(?:\u0432\u043e\u043e\u0431\u0449\u0435|\u0432\u0441\u0435\u0433\u043e|\u0440\u0435\u0430\u043b\u044c\u043d\u043e)\s+){0,2}(?:\u043c\u044b\s+)?(?:\u0437\u0430\u043f\u043b\u0430\u0442\u0438\u043b\u0438|\u0432\u044b\u043f\u043b\u0430\u0442\u0438\u043b\u0438|\u043f\u043e\u0442\u0440\u0430\u0442\u0438\u043b\u0438|\u0438\u0441\u0445\u043e\u0434\u044f\u0449\u0438\u0445(?:\s+\u0434\u0435\u043d\u0435\u0433)?(?:\s+\u0431\u044b\u043b\u043e)?|\u043f\u043b\u0430\u0442\u0435\u0436\u0435\u0439(?:\s+\u0431\u044b\u043b\u043e)?|\u0441\u043f\u0438\u0441\u0430\u043d\u0438\u0439(?:\s+\u0431\u044b\u043b\u043e)?)|(?:\u0441\u0443\u043c\u043c\u0430|\u043e\u0431\u044a\u0435\u043c)\s+(?:\u0438\u0441\u0445\u043e\u0434\u044f\u0449\u0438\u0445|\u043f\u043b\u0430\u0442\u0435\u0436\u0435\u0439|\u0441\u043f\u0438\u0441\u0430\u043d\u0438\u0439)|(?:\u043f\u043b\u0430\u0442\u0435\u0436\u0435\u0439|\u0441\u043f\u0438\u0441\u0430\u043d\u0438\u0439)\s+\u0437\u0430)/u.test(
|
||
rawUtterance
|
||
);
|
||
}
|
||
return false;
|
||
}
|
||
|
||
function supportsOrganizationScopedOpenTotal(action: string): boolean {
|
||
return action === "turnover" || action === "payout";
|
||
}
|
||
|
||
function allowsOpenScopeWithoutSubject(input: {
|
||
family: string | null;
|
||
action: string;
|
||
organizationScope: string | null;
|
||
comparisonNeed: string | null;
|
||
rankingNeed: string | null;
|
||
oneSidedOpenScopeTotalHint: boolean;
|
||
}): boolean {
|
||
if (input.family !== "value_flow") {
|
||
return false;
|
||
}
|
||
if (input.rankingNeed || input.comparisonNeed === "incoming_vs_outgoing") {
|
||
return true;
|
||
}
|
||
return Boolean(
|
||
supportsOrganizationScopedOpenTotal(input.action) && (input.organizationScope || input.oneSidedOpenScopeTotalHint)
|
||
);
|
||
}
|
||
|
||
function rankingNeedFromRawUtterance(value: string): string | null {
|
||
const text = lower(value);
|
||
if (!text) {
|
||
return null;
|
||
}
|
||
if (
|
||
/(?:\btop[-\s]?\d+\b|\btop\b|топ[-\s]?\d+|топ\b|сам(?:ый|ая|ое|ые)\b|больше\s+всего|наибол[её]е|highest|largest|most)/iu.test(
|
||
text
|
||
)
|
||
) {
|
||
return "top_desc";
|
||
}
|
||
if (/(?:меньше\s+всего|наимен[ьш]е|lowest|smallest|least)/iu.test(text)) {
|
||
return "bottom_asc";
|
||
}
|
||
return null;
|
||
}
|
||
|
||
function proofExpectationFor(input: {
|
||
family: string | null;
|
||
clarificationGaps: string[];
|
||
}): AssistantMcpDiscoveryDataNeedProofExpectation {
|
||
if (input.clarificationGaps.length > 0) {
|
||
return "clarification_required";
|
||
}
|
||
if (input.family === "schema_surface") {
|
||
return "schema_surface";
|
||
}
|
||
if (input.family === "entity_grounding") {
|
||
return "entity_grounding";
|
||
}
|
||
if (input.family === "activity_lifecycle") {
|
||
return "bounded_inference";
|
||
}
|
||
return "coverage_checked_fact";
|
||
}
|
||
|
||
function decompositionCandidatesFor(input: {
|
||
family: string | null;
|
||
action: string;
|
||
aggregationNeed: string | null;
|
||
comparisonNeed: string | null;
|
||
rankingNeed: string | null;
|
||
openScopeWithoutSubject: boolean;
|
||
}): string[] {
|
||
const result: string[] = [];
|
||
if (input.family === "schema_surface") {
|
||
pushUnique(result, "inspect_metadata_surface");
|
||
return result;
|
||
}
|
||
if (input.family === "entity_grounding") {
|
||
pushUnique(result, "search_business_entity");
|
||
pushUnique(result, "resolve_entity_reference");
|
||
pushUnique(result, "probe_coverage");
|
||
return result;
|
||
}
|
||
if (input.family === "value_flow") {
|
||
if (input.rankingNeed && input.openScopeWithoutSubject) {
|
||
pushUnique(result, "collect_scoped_movements");
|
||
pushUnique(result, "aggregate_ranked_axis_values");
|
||
pushUnique(result, "probe_coverage");
|
||
return result;
|
||
}
|
||
if (input.comparisonNeed === "incoming_vs_outgoing" && input.openScopeWithoutSubject) {
|
||
pushUnique(result, "collect_incoming_movements");
|
||
pushUnique(result, "collect_outgoing_movements");
|
||
if (input.aggregationNeed === "by_month") {
|
||
pushUnique(result, "aggregate_by_month");
|
||
}
|
||
pushUnique(result, "probe_coverage");
|
||
return result;
|
||
}
|
||
if (input.openScopeWithoutSubject) {
|
||
pushUnique(result, "collect_scoped_movements");
|
||
pushUnique(result, input.aggregationNeed === "by_month" ? "aggregate_by_month" : "aggregate_checked_amounts");
|
||
pushUnique(result, "probe_coverage");
|
||
return result;
|
||
}
|
||
pushUnique(result, "resolve_entity_reference");
|
||
if (input.action === "net_value_flow") {
|
||
pushUnique(result, "collect_incoming_movements");
|
||
pushUnique(result, "collect_outgoing_movements");
|
||
} else {
|
||
pushUnique(result, "collect_scoped_movements");
|
||
}
|
||
pushUnique(result, input.aggregationNeed === "by_month" ? "aggregate_by_month" : "aggregate_checked_amounts");
|
||
pushUnique(result, "probe_coverage");
|
||
return result;
|
||
}
|
||
if (input.family === "movement_evidence") {
|
||
pushUnique(result, "resolve_entity_reference");
|
||
pushUnique(result, "fetch_scoped_movements");
|
||
pushUnique(result, "probe_coverage");
|
||
return result;
|
||
}
|
||
if (input.family === "document_evidence") {
|
||
pushUnique(result, "resolve_entity_reference");
|
||
pushUnique(result, "fetch_scoped_documents");
|
||
pushUnique(result, "probe_coverage");
|
||
return result;
|
||
}
|
||
if (input.family === "activity_lifecycle") {
|
||
pushUnique(result, "resolve_entity_reference");
|
||
pushUnique(result, "fetch_supporting_documents");
|
||
pushUnique(result, "probe_coverage");
|
||
pushUnique(result, "explain_evidence_basis");
|
||
}
|
||
return result;
|
||
}
|
||
|
||
function forbiddenOverclaimFlagsFor(family: string | null): string[] {
|
||
const result: string[] = ["no_raw_model_claims"];
|
||
if (family === "schema_surface") {
|
||
pushUnique(result, "no_fake_schema_surface");
|
||
}
|
||
if (family === "entity_grounding") {
|
||
pushUnique(result, "no_unresolved_entity_claim");
|
||
}
|
||
if (family === "activity_lifecycle") {
|
||
pushUnique(result, "no_legal_age_claim_without_evidence");
|
||
}
|
||
if (family === "value_flow" || family === "movement_evidence" || family === "document_evidence") {
|
||
pushUnique(result, "no_unchecked_fact_totals");
|
||
}
|
||
return result;
|
||
}
|
||
|
||
export function buildAssistantMcpDiscoveryDataNeedGraph(
|
||
input: BuildAssistantMcpDiscoveryDataNeedGraphInput
|
||
): AssistantMcpDiscoveryDataNeedGraphContract {
|
||
const semanticDataNeed = lower(input.semanticDataNeed);
|
||
const turnMeaning = input.turnMeaning ?? null;
|
||
const domain = lower(turnMeaning?.asked_domain_family);
|
||
const action = lower(turnMeaning?.asked_action_family);
|
||
const unsupported = lower(turnMeaning?.unsupported_but_understood_family);
|
||
const rawUtterance = lower(input.rawUtterance);
|
||
const aggregationAxis = lower(turnMeaning?.asked_aggregation_axis);
|
||
const seededRankingNeed = toNonEmptyString(turnMeaning?.seeded_ranking_need);
|
||
const explicitDateScope = toNonEmptyString(turnMeaning?.explicit_date_scope);
|
||
const explicitOrganizationScope = toNonEmptyString(turnMeaning?.explicit_organization_scope);
|
||
const subjectCandidates = (turnMeaning?.explicit_entity_candidates ?? [])
|
||
.map((item) => toNonEmptyString(item))
|
||
.filter((item): item is string => Boolean(item));
|
||
const businessFactFamily = businessFactFamilyFor({
|
||
semanticDataNeed,
|
||
domain,
|
||
action,
|
||
unsupported
|
||
});
|
||
const aggregationNeed = aggregationNeedFor(aggregationAxis);
|
||
const comparisonNeed = comparisonNeedFor(action);
|
||
const rankingNeed = rankingNeedFromRawUtterance(rawUtterance) ?? seededRankingNeed;
|
||
const allTimeScopeHint = hasAllTimeScopeHint(rawUtterance);
|
||
const oneSidedOpenScopeTotalHint = hasOpenScopeOneSidedValueTotalHintUtf8Safe(rawUtterance, action);
|
||
const openScopeWithoutSubject =
|
||
subjectCandidates.length === 0 &&
|
||
allowsOpenScopeWithoutSubject({
|
||
family: businessFactFamily,
|
||
action,
|
||
organizationScope: explicitOrganizationScope,
|
||
comparisonNeed,
|
||
rankingNeed,
|
||
oneSidedOpenScopeTotalHint
|
||
});
|
||
const clarificationGaps: string[] = [];
|
||
if (unsupported === "metadata_lane_choice_clarification" || action === "resolve_next_lane") {
|
||
pushUnique(clarificationGaps, "lane_family_choice");
|
||
}
|
||
if (
|
||
subjectCandidates.length === 0 &&
|
||
businessFactFamily === "value_flow" &&
|
||
openScopeWithoutSubject &&
|
||
!explicitOrganizationScope
|
||
) {
|
||
pushUnique(clarificationGaps, "organization");
|
||
} else if (subjectCandidates.length === 0 && businessFactFamily !== "schema_surface" && !openScopeWithoutSubject) {
|
||
pushUnique(clarificationGaps, "subject");
|
||
}
|
||
const timeScopeNeed = timeScopeNeedFor({
|
||
family: businessFactFamily,
|
||
explicitDateScope,
|
||
allTimeScopeHint
|
||
});
|
||
if (timeScopeNeed === "period_required" && !explicitDateScope) {
|
||
pushUnique(clarificationGaps, "period");
|
||
}
|
||
const decompositionCandidates = decompositionCandidatesFor({
|
||
family: businessFactFamily,
|
||
action,
|
||
aggregationNeed,
|
||
comparisonNeed,
|
||
rankingNeed,
|
||
openScopeWithoutSubject
|
||
});
|
||
const reasonCodes: string[] = [];
|
||
pushReason(reasonCodes, "data_need_graph_built");
|
||
if (businessFactFamily) {
|
||
pushReason(reasonCodes, `data_need_graph_family_${businessFactFamily}`);
|
||
} else {
|
||
pushReason(reasonCodes, "data_need_graph_family_unknown");
|
||
}
|
||
if (aggregationNeed) {
|
||
pushReason(reasonCodes, `data_need_graph_aggregation_${aggregationNeed}`);
|
||
}
|
||
if (rankingNeed) {
|
||
pushReason(reasonCodes, `data_need_graph_ranking_${rankingNeed}`);
|
||
}
|
||
if (comparisonNeed) {
|
||
pushReason(reasonCodes, `data_need_graph_comparison_${comparisonNeed}`);
|
||
}
|
||
if (openScopeWithoutSubject && !rankingNeed && !comparisonNeed) {
|
||
pushReason(reasonCodes, "data_need_graph_open_scope_total_without_subject");
|
||
}
|
||
if (allTimeScopeHint) {
|
||
pushReason(reasonCodes, "data_need_graph_all_time_scope_hint");
|
||
}
|
||
if (clarificationGaps.includes("organization")) {
|
||
pushReason(reasonCodes, "data_need_graph_open_scope_total_needs_organization");
|
||
}
|
||
if (clarificationGaps.length > 0) {
|
||
pushReason(reasonCodes, "data_need_graph_has_clarification_gaps");
|
||
}
|
||
|
||
return {
|
||
schema_version: ASSISTANT_MCP_DISCOVERY_DATA_NEED_GRAPH_SCHEMA_VERSION,
|
||
policy_owner: "assistantMcpDiscoveryDataNeedGraph",
|
||
subject_candidates: subjectCandidates,
|
||
business_fact_family: businessFactFamily,
|
||
action_family: toNonEmptyString(turnMeaning?.asked_action_family),
|
||
aggregation_need: aggregationNeed,
|
||
time_scope_need: timeScopeNeed,
|
||
comparison_need: comparisonNeed,
|
||
ranking_need: rankingNeed,
|
||
proof_expectation: proofExpectationFor({
|
||
family: businessFactFamily,
|
||
clarificationGaps
|
||
}),
|
||
clarification_gaps: clarificationGaps,
|
||
decomposition_candidates: decompositionCandidates,
|
||
forbidden_overclaim_flags: forbiddenOverclaimFlagsFor(businessFactFamily),
|
||
reason_codes: reasonCodes
|
||
};
|
||
}
|